llvm_disasm_aarch64.py (4332B)
1 #!/usr/bin/env python3 2 """Annotate p1_aarch64.M1 DEFINE rows with llvm-mc disassembly. 3 4 Reads generated DEFINE lines from p1_aarch64.M1, disassembles code-bearing rows 5 with llvm-mc, and prints the DEFINE name beside the native aarch64 mnemonic 6 sequence. Literal data rows such as syscall-number constants are labeled as data 7 instead of being treated as instructions. 8 """ 9 10 import argparse 11 import os 12 import re 13 import subprocess 14 import sys 15 from pathlib import Path 16 17 18 DEFINE_RE = re.compile(r'^DEFINE\s+(\S+)\s+([0-9A-Fa-f]+)\s*$') 19 20 21 def repo_root(): 22 return Path(__file__).resolve().parent.parent 23 24 25 def default_input_path(): 26 return repo_root() / 'build' / 'p1' / 'aarch64' / 'p1_aarch64.M1' 27 28 29 def ensure_generated(path: Path): 30 if path.exists(): 31 return 32 gen = repo_root() / 'p1' / 'p1_gen.py' 33 proc = subprocess.run( 34 [sys.executable, str(gen), '--arch', 'aarch64', str(path.parent.parent)], 35 check=True, 36 cwd=repo_root(), 37 capture_output=True, 38 text=True, 39 ) 40 if proc.stderr: 41 sys.stderr.write(proc.stderr) 42 43 44 def parse_rows(path: Path): 45 rows = [] 46 for line in path.read_text().splitlines(): 47 match = DEFINE_RE.match(line) 48 if not match: 49 continue 50 name, hex_bytes = match.groups() 51 rows.append((name, hex_bytes.upper())) 52 return rows 53 54 55 def is_data_row(name: str): 56 return name.startswith('sys_') 57 58 59 def disassemble_code_rows(rows, llvm_mc): 60 code_rows = [(name, hex_bytes) for name, hex_bytes in rows if not is_data_row(name)] 61 if not code_rows: 62 return {} 63 64 payload = '\n'.join(hex_bytes for _, hex_bytes in code_rows) + '\n' 65 proc = subprocess.run( 66 [llvm_mc, '--disassemble', '--hex', '--arch=aarch64'], 67 input=payload, 68 text=True, 69 capture_output=True, 70 check=True, 71 ) 72 inst_lines = [line.strip() for line in proc.stdout.splitlines() if line.strip()] 73 74 out = {} 75 index = 0 76 for name, hex_bytes in code_rows: 77 words = len(hex_bytes) // 8 78 out[name] = inst_lines[index:index + words] 79 index += words 80 81 if index != len(inst_lines): 82 raise RuntimeError( 83 f'llvm output row split mismatch: consumed {index}, got {len(inst_lines)}' 84 ) 85 return out 86 87 88 def format_rows(rows, disasm_by_name, show_bytes): 89 name_width = max(len(name) for name, _ in rows) if rows else 0 90 out = [] 91 for name, hex_bytes in rows: 92 if is_data_row(name): 93 rhs = f'data 0x{hex_bytes}' 94 out.append(f'{name:<{name_width}} {rhs}') 95 continue 96 97 insns = disasm_by_name.get(name, []) 98 if not insns: 99 out.append(f'{name:<{name_width}} <no disassembly>') 100 continue 101 102 prefix = name.ljust(name_width) 103 byte_col = f' {hex_bytes}' if show_bytes else '' 104 out.append(f'{prefix}{byte_col} {insns[0]}') 105 for insn in insns[1:]: 106 spacer = ' ' * name_width 107 if show_bytes: 108 spacer += ' ' + ' ' * len(hex_bytes) 109 out.append(f'{spacer} {insn}') 110 return '\n'.join(out) 111 112 113 def main(): 114 parser = argparse.ArgumentParser() 115 parser.add_argument( 116 'input', 117 nargs='?', 118 default=str(default_input_path()), 119 help='path to p1_aarch64.M1', 120 ) 121 parser.add_argument( 122 '--llvm-mc', 123 default=os.environ.get('LLVM_MC', 'llvm-mc'), 124 help='path to llvm-mc', 125 ) 126 parser.add_argument( 127 '--grep', 128 default='', 129 help='only include DEFINE names containing this substring', 130 ) 131 parser.add_argument( 132 '--limit', 133 type=int, 134 default=0, 135 help='maximum number of DEFINE rows to print (0 = all)', 136 ) 137 parser.add_argument( 138 '--show-bytes', 139 action='store_true', 140 help='include raw DEFINE bytes next to the name', 141 ) 142 args = parser.parse_args() 143 144 path = Path(args.input) 145 ensure_generated(path) 146 rows = parse_rows(path) 147 if args.grep: 148 rows = [(name, hex_bytes) for name, hex_bytes in rows if args.grep in name] 149 if args.limit: 150 rows = rows[:args.limit] 151 152 disasm_by_name = disassemble_code_rows(rows, args.llvm_mc) 153 print(format_rows(rows, disasm_by_name, args.show_bytes)) 154 155 156 if __name__ == '__main__': 157 main()