boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

aarch64.py (10525B)


      1 from common import (
      2     AddI,
      3     ArchDef,
      4     BranchReg,
      5     CondB,
      6     CondBZ,
      7     Enter,
      8     La,
      9     LaBr,
     10     LdArg,
     11     Li,
     12     LogI,
     13     Mem,
     14     Mov,
     15     Nullary,
     16     Rrr,
     17     ShiftI,
     18     le32,
     19     round_up,
     20 )
     21 
     22 
     23 NAT = {
     24     'a0': 0,
     25     'a1': 1,
     26     'a2': 2,
     27     'a3': 3,
     28     'x4': 4,
     29     'x5': 5,
     30     't0': 9,
     31     't1': 10,
     32     't2': 11,
     33     's0': 19,
     34     's1': 20,
     35     's2': 21,
     36     's3': 22,
     37     'sp': 31,
     38     'xzr': 31,
     39     'lr': 30,
     40     'br': 17,
     41     'scratch': 16,
     42     'x8': 8,
     43     'save0': 23,
     44     'save1': 24,
     45     'save2': 25,
     46 }
     47 
     48 
     49 RRR_BASE = {
     50     'ADD': 0x8B000000,
     51     'SUB': 0xCB000000,
     52     'AND': 0x8A000000,
     53     'OR': 0xAA000000,
     54     'XOR': 0xCA000000,
     55     'SHL': 0x9AC02000,
     56     'SHR': 0x9AC02400,
     57     'SAR': 0x9AC02800,
     58     'DIV': 0x9AC00C00,
     59 }
     60 
     61 
     62 SYSCALL_NUMBERS = {
     63     'SYS_READ': 63,
     64     'SYS_WRITE': 64,
     65     'SYS_CLOSE': 57,
     66     'SYS_OPENAT': 56,
     67     'SYS_EXIT': 93,
     68     'SYS_CLONE': 220,
     69     'SYS_EXECVE': 221,
     70     'SYS_WAITID': 95,
     71 }
     72 
     73 
     74 def aa_rrr(base, rd, ra, rb):
     75     d = NAT[rd]
     76     a = NAT[ra]
     77     b = NAT[rb]
     78     return le32(base | (b << 16) | (a << 5) | d)
     79 
     80 
     81 def aa_add_imm(rd, ra, imm12, sub=False):
     82     d = NAT[rd]
     83     a = NAT[ra]
     84     base = 0xD1000000 if sub else 0x91000000
     85     return le32(base | ((imm12 & 0xFFF) << 10) | (a << 5) | d)
     86 
     87 
     88 def aa_mov_rr(dst, src):
     89     if dst == 'sp':
     90         return aa_add_imm('sp', src, 0, sub=False)
     91     if src == 'sp':
     92         return aa_add_imm(dst, 'sp', 0, sub=False)
     93     d = NAT[dst]
     94     s = NAT[src]
     95     return le32(0xAA000000 | (s << 16) | (31 << 5) | d)
     96 
     97 
     98 def aa_ubfm(rd, ra, immr, imms):
     99     d = NAT[rd]
    100     a = NAT[ra]
    101     return le32(0xD3400000 | (immr << 16) | (imms << 10) | (a << 5) | d)
    102 
    103 
    104 def aa_sbfm(rd, ra, immr, imms):
    105     d = NAT[rd]
    106     a = NAT[ra]
    107     return le32(0x93400000 | (immr << 16) | (imms << 10) | (a << 5) | d)
    108 
    109 
    110 def aa_movz(rd, imm16):
    111     d = NAT[rd]
    112     return le32(0xD2800000 | ((imm16 & 0xFFFF) << 5) | d)
    113 
    114 
    115 def aa_movn(rd, imm16):
    116     d = NAT[rd]
    117     return le32(0x92800000 | ((imm16 & 0xFFFF) << 5) | d)
    118 
    119 
    120 def aa_materialize_small_imm(rd, imm):
    121     if imm >= 0:
    122         return aa_movz(rd, imm)
    123     return aa_movn(rd, (~imm) & 0xFFFF)
    124 
    125 
    126 def aa_ldst_uimm12(base, rt, rn, off_bytes, size_log2):
    127     imm12 = off_bytes >> size_log2
    128     t = NAT[rt]
    129     n = NAT[rn]
    130     return le32(base | (imm12 << 10) | (n << 5) | t)
    131 
    132 
    133 def aa_ldst_unscaled(base, rt, rn, off):
    134     imm9 = off & 0x1FF
    135     t = NAT[rt]
    136     n = NAT[rn]
    137     return le32(base | (imm9 << 12) | (n << 5) | t)
    138 
    139 
    140 def aa_mem(op, rt, rn, off):
    141     bases = {
    142         'LD': (0xF9400000, 3, 0xF8400000),
    143         'ST': (0xF9000000, 3, 0xF8000000),
    144         'LB': (0x39400000, 0, 0x38400000),
    145         'SB': (0x39000000, 0, 0x38000000),
    146     }
    147     uimm_base, size_log2, unscaled_base = bases[op]
    148     scale = 1 << size_log2
    149     if off >= 0 and off % scale == 0 and off < (4096 << size_log2):
    150         return aa_ldst_uimm12(uimm_base, rt, rn, off, size_log2)
    151     if -256 <= off <= 255:
    152         return aa_ldst_unscaled(unscaled_base, rt, rn, off)
    153     if -2048 <= off <= 2047:
    154         if off >= 0:
    155             addr = aa_add_imm('scratch', rn, off, sub=False)
    156         else:
    157             addr = aa_add_imm('scratch', rn, -off, sub=True)
    158         return addr + aa_ldst_uimm12(uimm_base, rt, 'scratch', 0, size_log2)
    159     raise ValueError(f'aarch64 offset out of range for {op}: {off}')
    160 
    161 
    162 def aa_cmp_skip(op, ra, rb):
    163     a = NAT[ra]
    164     b = NAT[rb]
    165     cmp_hex = le32(0xEB000000 | (b << 16) | (a << 5) | 31)
    166     skip_cond = {
    167         'BEQ': 1,
    168         'BNE': 0,
    169         'BLT': 10,
    170         'BLTU': 2,
    171     }[op]
    172     return cmp_hex + le32(0x54000040 | skip_cond)
    173 
    174 
    175 def aa_br(reg):
    176     return le32(0xD61F0000 | (NAT[reg] << 5))
    177 
    178 
    179 def aa_blr(reg):
    180     return le32(0xD63F0000 | (NAT[reg] << 5))
    181 
    182 
    183 def aa_ret():
    184     return le32(0xD65F03C0)
    185 
    186 
    187 def aa_epilogue():
    188     # Frame teardown, shared by ERET, TAIL, TAILR. Loads lr and the
    189     # saved caller sp from the hidden header at native_sp+0/+8, then
    190     # unwinds sp. Does NOT transfer control; the caller appends an
    191     # aa_ret / aa_br as appropriate.
    192     return (
    193         aa_mem('LD', 'lr', 'sp', 0)
    194         + aa_mem('LD', 'x8', 'sp', 8)
    195         + aa_mov_rr('sp', 'x8')
    196     )
    197 
    198 
    199 def aa_lit64_prefix(rd):
    200     ## 64-bit literal-pool prefix for LI: ldr xN, [pc,#8]; b PC+12.
    201     ## The 8 bytes that follow in source become the literal; b skips them.
    202     d = NAT[rd]
    203     ldr_lit = 0x58000040 | d
    204     b_plus12 = 0x14000003
    205     return le32(ldr_lit) + le32(b_plus12)
    206 
    207 
    208 def aa_lit32_prefix(rd):
    209     ## 32-bit literal-pool prefix for LA / LA_BR: ldr wN, [pc,#8]; b PC+8.
    210     ## ldr w zero-extends into the full 64-bit register, so a 4-byte literal
    211     ## is enough for any address in the stage0 layout (base 0x00600000,
    212     ## programs well under 4 GB). This lets source use `&label` directly
    213     ## without padding to 8 bytes.
    214     d = NAT[rd]
    215     ldr_lit = 0x18000040 | d
    216     b_plus8 = 0x14000002
    217     return le32(ldr_lit) + le32(b_plus8)
    218 
    219 
    220 def encode_li(_arch, row):
    221     return aa_lit64_prefix(row.rd)
    222 
    223 
    224 def encode_la(_arch, row):
    225     return aa_lit32_prefix(row.rd)
    226 
    227 
    228 def encode_labr(_arch, _row):
    229     return aa_lit32_prefix('br')
    230 
    231 
    232 def encode_mov(_arch, row):
    233     # Portable `sp` is the frame-local base, which is 16 bytes above
    234     # native sp (the backend's 2-word hidden header sits at the low end
    235     # of each frame allocation). So reading sp into a register yields
    236     # native_sp + 16, not native_sp itself.
    237     if row.rs == 'sp':
    238         return aa_add_imm(row.rd, 'sp', 16, sub=False)
    239     return aa_mov_rr(row.rd, row.rs)
    240 
    241 
    242 def encode_rrr(_arch, row):
    243     if row.op == 'MUL':
    244         d = NAT[row.rd]
    245         a = NAT[row.ra]
    246         b = NAT[row.rb]
    247         return le32(0x9B000000 | (b << 16) | (31 << 10) | (a << 5) | d)
    248     if row.op == 'REM':
    249         d = NAT[row.rd]
    250         a = NAT[row.ra]
    251         b = NAT[row.rb]
    252         sc = NAT['scratch']
    253         sdiv = 0x9AC00C00 | (b << 16) | (a << 5) | sc
    254         msub = 0x9B008000 | (b << 16) | (a << 10) | (sc << 5) | d
    255         return le32(sdiv) + le32(msub)
    256     return aa_rrr(RRR_BASE[row.op], row.rd, row.ra, row.rb)
    257 
    258 
    259 def encode_addi(_arch, row):
    260     if row.imm >= 0:
    261         return aa_add_imm(row.rd, row.ra, row.imm, sub=False)
    262     return aa_add_imm(row.rd, row.ra, -row.imm, sub=True)
    263 
    264 
    265 def encode_logi(_arch, row):
    266     seq = aa_materialize_small_imm('scratch', row.imm)
    267     base = {
    268         'ANDI': 0x8A000000,
    269         'ORI': 0xAA000000,
    270     }[row.op]
    271     return seq + aa_rrr(base, row.rd, row.ra, 'scratch')
    272 
    273 
    274 def encode_shifti(_arch, row):
    275     if row.op == 'SHLI':
    276         return aa_ubfm(row.rd, row.ra, (-row.imm) & 63, 63 - row.imm)
    277     if row.op == 'SHRI':
    278         return aa_ubfm(row.rd, row.ra, row.imm, 63)
    279     return aa_sbfm(row.rd, row.ra, row.imm, 63)
    280 
    281 
    282 def encode_mem(_arch, row):
    283     # Portable sp points to the frame-local base; the 2-word hidden
    284     # header sits at native_sp+0/+8 and is not portable-addressable.
    285     # Shift sp-relative offsets past the header.
    286     off = row.off + 16 if row.rn == 'sp' else row.off
    287     return aa_mem(row.op, row.rt, row.rn, off)
    288 
    289 
    290 def encode_ldarg(_arch, row):
    291     return aa_mem('LD', 'scratch', 'sp', 8) + aa_mem('LD', row.rd, 'scratch', 16 + 8 * row.slot)
    292 
    293 
    294 def encode_branch_reg(_arch, row):
    295     if row.kind == 'BR':
    296         return aa_br(row.rs)
    297     if row.kind == 'CALLR':
    298         return aa_blr(row.rs)
    299     if row.kind == 'TAILR':
    300         return aa_epilogue() + aa_br(row.rs)
    301     raise ValueError(f'unknown branch-reg kind: {row.kind}')
    302 
    303 
    304 def encode_condb(_arch, row):
    305     return aa_cmp_skip(row.op, row.ra, row.rb) + aa_br('br')
    306 
    307 
    308 def encode_condbz(_arch, row):
    309     a = NAT[row.ra]
    310     br_hex = aa_br('br')
    311     if row.op == 'BEQZ':
    312         return le32(0xB5000000 | (2 << 5) | a) + br_hex
    313     if row.op == 'BNEZ':
    314         return le32(0xB4000000 | (2 << 5) | a) + br_hex
    315     cmp_zero = le32(0xEB1F001F | (a << 5))
    316     bge = le32(0x54000040 | 10)
    317     return cmp_zero + bge + br_hex
    318 
    319 
    320 def encode_enter(arch, row):
    321     frame_bytes = round_up(arch.stack_align, 2 * arch.word_bytes + row.size)
    322     return (
    323         aa_add_imm('sp', 'sp', frame_bytes, sub=True)
    324         + aa_mem('ST', 'lr', 'sp', 0)
    325         + aa_add_imm('x8', 'sp', frame_bytes, sub=False)
    326         + aa_mem('ST', 'x8', 'sp', 8)
    327     )
    328 
    329 
    330 def encode_nullary(_arch, row):
    331     if row.kind == 'B':
    332         return aa_br('br')
    333     if row.kind == 'CALL':
    334         return aa_blr('br')
    335     if row.kind == 'RET':
    336         return aa_ret()
    337     if row.kind == 'ERET':
    338         return aa_epilogue() + aa_ret()
    339     if row.kind == 'TAIL':
    340         return aa_epilogue() + aa_br('br')
    341     if row.kind == 'SYSCALL':
    342         return ''.join([
    343             aa_mov_rr('x8', 'a0'),
    344             aa_mov_rr('save0', 'a1'),
    345             aa_mov_rr('save1', 'a2'),
    346             aa_mov_rr('save2', 'a3'),
    347             aa_mov_rr('a0', 'save0'),
    348             aa_mov_rr('a1', 'save1'),
    349             aa_mov_rr('a2', 'save2'),
    350             aa_mov_rr('a3', 't0'),
    351             aa_mov_rr('x4', 's0'),
    352             aa_mov_rr('x5', 's1'),
    353             le32(0xD4000001),
    354             aa_mov_rr('a1', 'save0'),
    355             aa_mov_rr('a2', 'save1'),
    356             aa_mov_rr('a3', 'save2'),
    357         ])
    358     raise ValueError(f'unknown nullary kind: {row.kind}')
    359 
    360 
    361 def aa_start_stub():
    362     # Backend-owned :_start stub per docs/P1.md §Program Entry. Captures
    363     # argc from [sp] and argv pointer from sp+8, calls p1_main under the
    364     # one-word direct-result convention (a0=argc, a1=argv), then issues a
    365     # native Linux sys_exit with p1_main's return value. Mirrors the
    366     # m1pp-path stub in p1/P1-aarch64.M1pp (`%p1_entry`).
    367     #
    368     # Raw hex outside `DEFINE` bodies must be single-quoted so bootstrap
    369     # M0 treats it as a literal byte run rather than a token.
    370     def q(hex_bytes):
    371         return f"'{hex_bytes}'"
    372     return [
    373         ':_start',
    374         q(aa_mem('LD', 'a0', 'sp', 0)),
    375         q(aa_add_imm('a1', 'sp', 8, sub=False)),
    376         q(aa_lit32_prefix('br')),
    377         '&p1_main',
    378         q(aa_blr('br')),
    379         q(aa_movz('x8', 93)),
    380         q(le32(0xD4000001)),
    381     ]
    382 
    383 
    384 ENCODERS = {
    385     Li: encode_li,
    386     La: encode_la,
    387     LaBr: encode_labr,
    388     Mov: encode_mov,
    389     Rrr: encode_rrr,
    390     AddI: encode_addi,
    391     LogI: encode_logi,
    392     ShiftI: encode_shifti,
    393     Mem: encode_mem,
    394     LdArg: encode_ldarg,
    395     Nullary: encode_nullary,
    396     BranchReg: encode_branch_reg,
    397     CondB: encode_condb,
    398     CondBZ: encode_condbz,
    399     Enter: encode_enter,
    400 }
    401 
    402 
    403 ARCH = ArchDef(
    404     name='aarch64',
    405     word_bytes=8,
    406     stack_align=16,
    407     syscall_numbers=SYSCALL_NUMBERS,
    408     encoders=ENCODERS,
    409     start_stub=aa_start_stub,
    410 )