gen.py (7765B)
1 #!/usr/bin/env python3 2 """Generate the test/elf/bad/ corpus. 3 4 Each entry produces a (.elf, .expect) pair that test/elf/run.sh's 5 Layer C consumes: kit-roundtrip is run on the .elf and must exit 6 nonzero with a stderr containing the .expect substring. 7 8 Strategy: compile a tiny aarch64 .o with clang as a baseline, then 9 mutate specific bytes for each malformation. Run once and commit 10 the artifacts; the generator is checked in for auditability. 11 """ 12 13 import os 14 import struct 15 import subprocess 16 import sys 17 import tempfile 18 19 HERE = os.path.dirname(os.path.abspath(__file__)) 20 21 22 def build_baseline(): 23 """Compile a minimal aarch64 .o and return its bytes.""" 24 src = b"int answer(void){return 42;}\n" 25 with tempfile.TemporaryDirectory() as td: 26 c = os.path.join(td, "x.c") 27 o = os.path.join(td, "x.o") 28 with open(c, "wb") as f: 29 f.write(src) 30 subprocess.check_call([ 31 "clang", "--target=aarch64-linux-gnu", "-c", "-O0", c, "-o", o, 32 ]) 33 with open(o, "rb") as f: 34 return bytearray(f.read()) 35 36 37 def parse_shdrs(buf): 38 """Return (e_shoff, e_shentsize, e_shnum, e_shstrndx, [shdrs]).""" 39 e_shoff = struct.unpack_from("<Q", buf, 40)[0] 40 e_shentsize = struct.unpack_from("<H", buf, 58)[0] 41 e_shnum = struct.unpack_from("<H", buf, 60)[0] 42 e_shstrndx = struct.unpack_from("<H", buf, 62)[0] 43 shdrs = [] 44 for i in range(e_shnum): 45 off = e_shoff + i * e_shentsize 46 shdrs.append({ 47 "off": off, 48 "sh_name": struct.unpack_from("<I", buf, off + 0)[0], 49 "sh_type": struct.unpack_from("<I", buf, off + 4)[0], 50 "sh_flags": struct.unpack_from("<Q", buf, off + 8)[0], 51 "sh_offset": struct.unpack_from("<Q", buf, off + 24)[0], 52 "sh_size": struct.unpack_from("<Q", buf, off + 32)[0], 53 "sh_link": struct.unpack_from("<I", buf, off + 40)[0], 54 "sh_info": struct.unpack_from("<I", buf, off + 44)[0], 55 "sh_entsize": struct.unpack_from("<Q", buf, off + 56)[0], 56 }) 57 return e_shoff, e_shentsize, e_shnum, e_shstrndx, shdrs 58 59 60 def find_shdr(shdrs, sh_type): 61 for i, s in enumerate(shdrs): 62 if s["sh_type"] == sh_type: 63 return i, s 64 return None, None 65 66 67 SHT_SYMTAB = 2 68 SHT_RELA = 4 69 70 71 # (basename, expect_substring, mutator(bytes) -> bytes-or-None) 72 CASES = [] 73 74 75 def case(name, expect): 76 def deco(fn): 77 CASES.append((name, expect, fn)) 78 return fn 79 return deco 80 81 82 @case("truncated_ehdr", "input shorter than ELF header") 83 def m_truncated_ehdr(buf): 84 return buf[:32] 85 86 87 # bad_magic and wrong_endian are caught by kit_detect_target inside the 88 # kit-roundtrip harness *before* read_elf runs. The .expect substring 89 # matches the harness's rejection text, not a read_elf diagnostic. 90 @case("bad_magic", "not a recognized object file") 91 def m_bad_magic(buf): 92 b = bytearray(buf) 93 b[1] = 0x00 # corrupt EI_MAG1 94 return b 95 96 97 # The base object is 64-bit (e.g. aarch64). ELFCLASS32 is a valid class on its 98 # own now (riscv32 uses it), so the reader no longer blanket-rejects it — but 99 # kit_detect_target requires EI_CLASS to match the arch's pointer width, so a 100 # 64-bit machine tagged ELFCLASS32 is a mismatch caught before read_elf, with 101 # the same "not a recognized object file" text bad_magic/e_machine_unknown use. 102 @case("wrong_class", "not a recognized object file") 103 def m_wrong_class(buf): 104 b = bytearray(buf) 105 b[4] = 1 # ELFCLASS32 on a 64-bit-machine object -> class/arch mismatch 106 return b 107 108 109 @case("wrong_endian", "not a recognized object file") 110 def m_wrong_endian(buf): 111 b = bytearray(buf) 112 b[5] = 2 # ELFDATA2MSB 113 return b 114 115 116 @case("e_machine_unknown", "not a recognized object file") 117 def m_e_machine_unknown(buf): 118 """ELF reader supports aarch64 / x86_64 / riscv64. Use an e_machine 119 outside that set; kit_detect_target rejects it before read_elf 120 sees it, with the same "not a recognized object file" diagnostic 121 bad_magic produces.""" 122 b = bytearray(buf) 123 struct.pack_into("<H", b, 18, 0x00FF) 124 return b 125 126 127 @case("shentsize_bad", "unexpected e_shentsize") 128 def m_shentsize_bad(buf): 129 b = bytearray(buf) 130 struct.pack_into("<H", b, 58, 32) # not 64 131 return b 132 133 134 @case("shoff_oob", "section header table out of range") 135 def m_shoff_oob(buf): 136 # Past EOF but small enough that adding `e_shnum * sizeof(Elf64_Shdr)` 137 # to it doesn't wrap u64 — otherwise the bounds check passes via 138 # overflow and a later "shstrtab out of range" diagnostic fires first. 139 b = bytearray(buf) 140 struct.pack_into("<Q", b, 40, len(b) + 64) 141 return b 142 143 144 @case("shstrndx_oob", "e_shstrndx") 145 def m_shstrndx_oob(buf): 146 b = bytearray(buf) 147 e_shnum = struct.unpack_from("<H", b, 60)[0] 148 struct.pack_into("<H", b, 62, e_shnum + 5) 149 return b 150 151 152 @case("symtab_entsize_bad", ".symtab entsize") 153 def m_symtab_entsize_bad(buf): 154 b = bytearray(buf) 155 _, _, _, _, shdrs = parse_shdrs(b) 156 idx, sh = find_shdr(shdrs, SHT_SYMTAB) 157 assert sh, "baseline missing SYMTAB" 158 struct.pack_into("<Q", b, sh["off"] + 56, 32) # not 24 159 return b 160 161 162 @case("symtab_size_bad", ".symtab size") 163 def m_symtab_size_bad(buf): 164 b = bytearray(buf) 165 _, _, _, _, shdrs = parse_shdrs(b) 166 idx, sh = find_shdr(shdrs, SHT_SYMTAB) 167 assert sh, "baseline missing SYMTAB" 168 struct.pack_into("<Q", b, sh["off"] + 32, sh["sh_size"] + 1) 169 return b 170 171 172 @case("symtab_link_oob", ".symtab sh_link") 173 def m_symtab_link_oob(buf): 174 b = bytearray(buf) 175 _, _, e_shnum, _, shdrs = parse_shdrs(b) 176 idx, sh = find_shdr(shdrs, SHT_SYMTAB) 177 assert sh, "baseline missing SYMTAB" 178 struct.pack_into("<I", b, sh["off"] + 40, e_shnum + 5) 179 return b 180 181 182 @case("rela_entsize_bad", "rela entsize") 183 def m_rela_entsize_bad(buf): 184 b = bytearray(buf) 185 _, _, _, _, shdrs = parse_shdrs(b) 186 idx, sh = find_shdr(shdrs, SHT_RELA) 187 if not sh: 188 return None # skip if no RELA in baseline 189 struct.pack_into("<Q", b, sh["off"] + 56, 16) # not 24 190 return b 191 192 193 @case("rela_info_oob", "rela sh_info") 194 def m_rela_info_oob(buf): 195 b = bytearray(buf) 196 _, _, e_shnum, _, shdrs = parse_shdrs(b) 197 idx, sh = find_shdr(shdrs, SHT_RELA) 198 if not sh: 199 return None 200 struct.pack_into("<I", b, sh["off"] + 44, e_shnum + 5) 201 return b 202 203 204 @case("reloc_type_unsupported", "unsupported reloc type") 205 def m_reloc_type_unsupported(buf): 206 """Flip the r_info type to something we don't decode.""" 207 b = bytearray(buf) 208 _, _, _, _, shdrs = parse_shdrs(b) 209 idx, sh = find_shdr(shdrs, SHT_RELA) 210 if not sh: 211 return None 212 # Pick the first rela entry; r_info is at offset+8, length 8. 213 rel_off = sh["sh_offset"] + 8 214 r_info = struct.unpack_from("<Q", b, rel_off)[0] 215 # Replace type (low 32) with 9999 — unmapped. 216 sym = r_info >> 32 217 new_info = (sym << 32) | 9999 218 struct.pack_into("<Q", b, rel_off, new_info) 219 return b 220 221 222 def main(): 223 baseline = build_baseline() 224 written = 0 225 skipped = 0 226 for name, expect, fn in CASES: 227 out = fn(baseline) 228 if out is None: 229 print(f"SKIP {name} (baseline has no eligible section)") 230 skipped += 1 231 continue 232 elf_path = os.path.join(HERE, name + ".elf") 233 expect_path = os.path.join(HERE, name + ".expect") 234 with open(elf_path, "wb") as f: 235 f.write(bytes(out)) 236 with open(expect_path, "w") as f: 237 f.write(expect) 238 print(f"wrote {name}.elf ({len(out)} bytes) expect=\"{expect}\"") 239 written += 1 240 print(f"---\n{written} written, {skipped} skipped") 241 if written == 0: 242 sys.exit("no cases written") 243 244 245 if __name__ == "__main__": 246 main()