boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

aarch64.py (10586B)


      1 from common import (
      2     AddI,
      3     ArchDef,
      4     BranchReg,
      5     CondB,
      6     CondBZ,
      7     Enter,
      8     La,
      9     LaBr,
     10     LdArg,
     11     Li,
     12     LogI,
     13     Mem,
     14     Mov,
     15     Nullary,
     16     Rrr,
     17     ShiftI,
     18     le32,
     19     register_arch,
     20     round_up,
     21 )
     22 
     23 
     24 NAT = {
     25     'a0': 0,
     26     'a1': 1,
     27     'a2': 2,
     28     'a3': 3,
     29     'x4': 4,
     30     'x5': 5,
     31     't0': 9,
     32     't1': 10,
     33     't2': 11,
     34     's0': 19,
     35     's1': 20,
     36     's2': 21,
     37     's3': 22,
     38     'sp': 31,
     39     'xzr': 31,
     40     'lr': 30,
     41     'br': 17,
     42     'scratch': 16,
     43     'x8': 8,
     44     'save0': 23,
     45     'save1': 24,
     46     'save2': 25,
     47 }
     48 
     49 
     50 RRR_BASE = {
     51     'ADD': 0x8B000000,
     52     'SUB': 0xCB000000,
     53     'AND': 0x8A000000,
     54     'OR': 0xAA000000,
     55     'XOR': 0xCA000000,
     56     'SHL': 0x9AC02000,
     57     'SHR': 0x9AC02400,
     58     'SAR': 0x9AC02800,
     59     'DIV': 0x9AC00C00,
     60 }
     61 
     62 
     63 SYSCALL_NUMBERS = {
     64     'SYS_READ': 63,
     65     'SYS_WRITE': 64,
     66     'SYS_CLOSE': 57,
     67     'SYS_OPENAT': 56,
     68     'SYS_EXIT': 93,
     69     'SYS_CLONE': 220,
     70     'SYS_EXECVE': 221,
     71     'SYS_WAITID': 95,
     72 }
     73 
     74 
     75 def aa_rrr(base, rd, ra, rb):
     76     d = NAT[rd]
     77     a = NAT[ra]
     78     b = NAT[rb]
     79     return le32(base | (b << 16) | (a << 5) | d)
     80 
     81 
     82 def aa_add_imm(rd, ra, imm12, sub=False):
     83     d = NAT[rd]
     84     a = NAT[ra]
     85     base = 0xD1000000 if sub else 0x91000000
     86     return le32(base | ((imm12 & 0xFFF) << 10) | (a << 5) | d)
     87 
     88 
     89 def aa_mov_rr(dst, src):
     90     if dst == 'sp':
     91         return aa_add_imm('sp', src, 0, sub=False)
     92     if src == 'sp':
     93         return aa_add_imm(dst, 'sp', 0, sub=False)
     94     d = NAT[dst]
     95     s = NAT[src]
     96     return le32(0xAA000000 | (s << 16) | (31 << 5) | d)
     97 
     98 
     99 def aa_ubfm(rd, ra, immr, imms):
    100     d = NAT[rd]
    101     a = NAT[ra]
    102     return le32(0xD3400000 | (immr << 16) | (imms << 10) | (a << 5) | d)
    103 
    104 
    105 def aa_sbfm(rd, ra, immr, imms):
    106     d = NAT[rd]
    107     a = NAT[ra]
    108     return le32(0x93400000 | (immr << 16) | (imms << 10) | (a << 5) | d)
    109 
    110 
    111 def aa_movz(rd, imm16):
    112     d = NAT[rd]
    113     return le32(0xD2800000 | ((imm16 & 0xFFFF) << 5) | d)
    114 
    115 
    116 def aa_movn(rd, imm16):
    117     d = NAT[rd]
    118     return le32(0x92800000 | ((imm16 & 0xFFFF) << 5) | d)
    119 
    120 
    121 def aa_materialize_small_imm(rd, imm):
    122     if imm >= 0:
    123         return aa_movz(rd, imm)
    124     return aa_movn(rd, (~imm) & 0xFFFF)
    125 
    126 
    127 def aa_ldst_uimm12(base, rt, rn, off_bytes, size_log2):
    128     imm12 = off_bytes >> size_log2
    129     t = NAT[rt]
    130     n = NAT[rn]
    131     return le32(base | (imm12 << 10) | (n << 5) | t)
    132 
    133 
    134 def aa_ldst_unscaled(base, rt, rn, off):
    135     imm9 = off & 0x1FF
    136     t = NAT[rt]
    137     n = NAT[rn]
    138     return le32(base | (imm9 << 12) | (n << 5) | t)
    139 
    140 
    141 def aa_mem(op, rt, rn, off):
    142     bases = {
    143         'LD': (0xF9400000, 3, 0xF8400000),
    144         'ST': (0xF9000000, 3, 0xF8000000),
    145         'LB': (0x39400000, 0, 0x38400000),
    146         'SB': (0x39000000, 0, 0x38000000),
    147     }
    148     uimm_base, size_log2, unscaled_base = bases[op]
    149     scale = 1 << size_log2
    150     if off >= 0 and off % scale == 0 and off < (4096 << size_log2):
    151         return aa_ldst_uimm12(uimm_base, rt, rn, off, size_log2)
    152     if -256 <= off <= 255:
    153         return aa_ldst_unscaled(unscaled_base, rt, rn, off)
    154     if -2048 <= off <= 2047:
    155         if off >= 0:
    156             addr = aa_add_imm('scratch', rn, off, sub=False)
    157         else:
    158             addr = aa_add_imm('scratch', rn, -off, sub=True)
    159         return addr + aa_ldst_uimm12(uimm_base, rt, 'scratch', 0, size_log2)
    160     raise ValueError(f'aarch64 offset out of range for {op}: {off}')
    161 
    162 
    163 def aa_cmp_skip(op, ra, rb):
    164     a = NAT[ra]
    165     b = NAT[rb]
    166     cmp_hex = le32(0xEB000000 | (b << 16) | (a << 5) | 31)
    167     skip_cond = {
    168         'BEQ': 1,
    169         'BNE': 0,
    170         'BLT': 10,
    171         'BLTU': 2,
    172     }[op]
    173     return cmp_hex + le32(0x54000040 | skip_cond)
    174 
    175 
    176 def aa_br(reg):
    177     return le32(0xD61F0000 | (NAT[reg] << 5))
    178 
    179 
    180 def aa_blr(reg):
    181     return le32(0xD63F0000 | (NAT[reg] << 5))
    182 
    183 
    184 def aa_ret():
    185     return le32(0xD65F03C0)
    186 
    187 
    188 def aa_epilogue():
    189     # Frame teardown, shared by ERET, TAIL, TAILR. Loads lr and the
    190     # saved caller sp from the hidden header at native_sp+0/+8, then
    191     # unwinds sp. Does NOT transfer control; the caller appends an
    192     # aa_ret / aa_br as appropriate.
    193     return (
    194         aa_mem('LD', 'lr', 'sp', 0)
    195         + aa_mem('LD', 'x8', 'sp', 8)
    196         + aa_mov_rr('sp', 'x8')
    197     )
    198 
    199 
    200 def aa_lit64_prefix(rd):
    201     ## 64-bit literal-pool prefix for LI: ldr xN, [pc,#8]; b PC+12.
    202     ## The 8 bytes that follow in source become the literal; b skips them.
    203     d = NAT[rd]
    204     ldr_lit = 0x58000040 | d
    205     b_plus12 = 0x14000003
    206     return le32(ldr_lit) + le32(b_plus12)
    207 
    208 
    209 def aa_lit32_prefix(rd):
    210     ## 32-bit literal-pool prefix for LA / LA_BR: ldr wN, [pc,#8]; b PC+8.
    211     ## ldr w zero-extends into the full 64-bit register, so a 4-byte literal
    212     ## is enough for any address in the stage0 layout (base 0x00600000,
    213     ## programs well under 4 GB). This lets source use `&label` directly
    214     ## without padding to 8 bytes.
    215     d = NAT[rd]
    216     ldr_lit = 0x18000040 | d
    217     b_plus8 = 0x14000002
    218     return le32(ldr_lit) + le32(b_plus8)
    219 
    220 
    221 def encode_li(_arch, row):
    222     return aa_lit64_prefix(row.rd)
    223 
    224 
    225 def encode_la(_arch, row):
    226     return aa_lit32_prefix(row.rd)
    227 
    228 
    229 def encode_labr(_arch, _row):
    230     return aa_lit32_prefix('br')
    231 
    232 
    233 def encode_mov(_arch, row):
    234     # Portable `sp` is the frame-local base, which is 16 bytes above
    235     # native sp (the backend's 2-word hidden header sits at the low end
    236     # of each frame allocation). So reading sp into a register yields
    237     # native_sp + 16, not native_sp itself.
    238     if row.rs == 'sp':
    239         return aa_add_imm(row.rd, 'sp', 16, sub=False)
    240     return aa_mov_rr(row.rd, row.rs)
    241 
    242 
    243 def encode_rrr(_arch, row):
    244     if row.op == 'MUL':
    245         d = NAT[row.rd]
    246         a = NAT[row.ra]
    247         b = NAT[row.rb]
    248         return le32(0x9B000000 | (b << 16) | (31 << 10) | (a << 5) | d)
    249     if row.op == 'REM':
    250         d = NAT[row.rd]
    251         a = NAT[row.ra]
    252         b = NAT[row.rb]
    253         sc = NAT['scratch']
    254         sdiv = 0x9AC00C00 | (b << 16) | (a << 5) | sc
    255         msub = 0x9B008000 | (b << 16) | (a << 10) | (sc << 5) | d
    256         return le32(sdiv) + le32(msub)
    257     return aa_rrr(RRR_BASE[row.op], row.rd, row.ra, row.rb)
    258 
    259 
    260 def encode_addi(_arch, row):
    261     if row.imm >= 0:
    262         return aa_add_imm(row.rd, row.ra, row.imm, sub=False)
    263     return aa_add_imm(row.rd, row.ra, -row.imm, sub=True)
    264 
    265 
    266 def encode_logi(_arch, row):
    267     seq = aa_materialize_small_imm('scratch', row.imm)
    268     base = {
    269         'ANDI': 0x8A000000,
    270         'ORI': 0xAA000000,
    271     }[row.op]
    272     return seq + aa_rrr(base, row.rd, row.ra, 'scratch')
    273 
    274 
    275 def encode_shifti(_arch, row):
    276     if row.op == 'SHLI':
    277         return aa_ubfm(row.rd, row.ra, (-row.imm) & 63, 63 - row.imm)
    278     if row.op == 'SHRI':
    279         return aa_ubfm(row.rd, row.ra, row.imm, 63)
    280     return aa_sbfm(row.rd, row.ra, row.imm, 63)
    281 
    282 
    283 def encode_mem(_arch, row):
    284     # Portable sp points to the frame-local base; the 2-word hidden
    285     # header sits at native_sp+0/+8 and is not portable-addressable.
    286     # Shift sp-relative offsets past the header.
    287     off = row.off + 16 if row.rn == 'sp' else row.off
    288     return aa_mem(row.op, row.rt, row.rn, off)
    289 
    290 
    291 def encode_ldarg(_arch, row):
    292     return aa_mem('LD', 'scratch', 'sp', 8) + aa_mem('LD', row.rd, 'scratch', 16 + 8 * row.slot)
    293 
    294 
    295 def encode_branch_reg(_arch, row):
    296     if row.kind == 'BR':
    297         return aa_br(row.rs)
    298     if row.kind == 'CALLR':
    299         return aa_blr(row.rs)
    300     if row.kind == 'TAILR':
    301         return aa_epilogue() + aa_br(row.rs)
    302     raise ValueError(f'unknown branch-reg kind: {row.kind}')
    303 
    304 
    305 def encode_condb(_arch, row):
    306     return aa_cmp_skip(row.op, row.ra, row.rb) + aa_br('br')
    307 
    308 
    309 def encode_condbz(_arch, row):
    310     a = NAT[row.ra]
    311     br_hex = aa_br('br')
    312     if row.op == 'BEQZ':
    313         return le32(0xB5000000 | (2 << 5) | a) + br_hex
    314     if row.op == 'BNEZ':
    315         return le32(0xB4000000 | (2 << 5) | a) + br_hex
    316     cmp_zero = le32(0xEB1F001F | (a << 5))
    317     bge = le32(0x54000040 | 10)
    318     return cmp_zero + bge + br_hex
    319 
    320 
    321 def encode_enter(arch, row):
    322     frame_bytes = round_up(arch.stack_align, 2 * arch.word_bytes + row.size)
    323     return (
    324         aa_add_imm('sp', 'sp', frame_bytes, sub=True)
    325         + aa_mem('ST', 'lr', 'sp', 0)
    326         + aa_add_imm('x8', 'sp', frame_bytes, sub=False)
    327         + aa_mem('ST', 'x8', 'sp', 8)
    328     )
    329 
    330 
    331 def encode_nullary(_arch, row):
    332     if row.kind == 'B':
    333         return aa_br('br')
    334     if row.kind == 'CALL':
    335         return aa_blr('br')
    336     if row.kind == 'RET':
    337         return aa_ret()
    338     if row.kind == 'ERET':
    339         return aa_epilogue() + aa_ret()
    340     if row.kind == 'TAIL':
    341         return aa_epilogue() + aa_br('br')
    342     if row.kind == 'SYSCALL':
    343         return ''.join([
    344             aa_mov_rr('x8', 'a0'),
    345             aa_mov_rr('save0', 'a1'),
    346             aa_mov_rr('save1', 'a2'),
    347             aa_mov_rr('save2', 'a3'),
    348             aa_mov_rr('a0', 'save0'),
    349             aa_mov_rr('a1', 'save1'),
    350             aa_mov_rr('a2', 'save2'),
    351             aa_mov_rr('a3', 't0'),
    352             aa_mov_rr('x4', 's0'),
    353             aa_mov_rr('x5', 's1'),
    354             le32(0xD4000001),
    355             aa_mov_rr('a1', 'save0'),
    356             aa_mov_rr('a2', 'save1'),
    357             aa_mov_rr('a3', 'save2'),
    358         ])
    359     raise ValueError(f'unknown nullary kind: {row.kind}')
    360 
    361 
    362 def aa_start_stub():
    363     # Backend-owned :_start stub per docs/P1.md §Program Entry. Captures
    364     # argc from [sp] and argv pointer from sp+8, calls p1_main under the
    365     # one-word direct-result convention (a0=argc, a1=argv), then issues a
    366     # native Linux sys_exit with p1_main's return value. Mirrors the
    367     # m1pp-path stub in p1/P1-aarch64.M1pp (`%p1_entry`).
    368     #
    369     # Raw hex outside `DEFINE` bodies must be single-quoted so bootstrap
    370     # M0 treats it as a literal byte run rather than a token.
    371     def q(hex_bytes):
    372         return f"'{hex_bytes}'"
    373     return [
    374         ':_start',
    375         q(aa_mem('LD', 'a0', 'sp', 0)),
    376         q(aa_add_imm('a1', 'sp', 8, sub=False)),
    377         q(aa_lit32_prefix('br')),
    378         '&p1_main',
    379         q(aa_blr('br')),
    380         q(aa_movz('x8', 93)),
    381         q(le32(0xD4000001)),
    382     ]
    383 
    384 
    385 ENCODERS = {
    386     Li: encode_li,
    387     La: encode_la,
    388     LaBr: encode_labr,
    389     Mov: encode_mov,
    390     Rrr: encode_rrr,
    391     AddI: encode_addi,
    392     LogI: encode_logi,
    393     ShiftI: encode_shifti,
    394     Mem: encode_mem,
    395     LdArg: encode_ldarg,
    396     Nullary: encode_nullary,
    397     BranchReg: encode_branch_reg,
    398     CondB: encode_condb,
    399     CondBZ: encode_condbz,
    400     Enter: encode_enter,
    401 }
    402 
    403 
    404 register_arch(
    405     ArchDef(
    406         name='aarch64',
    407         word_bytes=8,
    408         stack_align=16,
    409         syscall_numbers=SYSCALL_NUMBERS,
    410         encoders=ENCODERS,
    411         start_stub=aa_start_stub,
    412     )
    413 )