kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

gen.py (7765B)


      1 #!/usr/bin/env python3
      2 """Generate the test/elf/bad/ corpus.
      3 
      4 Each entry produces a (.elf, .expect) pair that test/elf/run.sh's
      5 Layer C consumes: kit-roundtrip is run on the .elf and must exit
      6 nonzero with a stderr containing the .expect substring.
      7 
      8 Strategy: compile a tiny aarch64 .o with clang as a baseline, then
      9 mutate specific bytes for each malformation.  Run once and commit
     10 the artifacts; the generator is checked in for auditability.
     11 """
     12 
     13 import os
     14 import struct
     15 import subprocess
     16 import sys
     17 import tempfile
     18 
     19 HERE = os.path.dirname(os.path.abspath(__file__))
     20 
     21 
     22 def build_baseline():
     23     """Compile a minimal aarch64 .o and return its bytes."""
     24     src = b"int answer(void){return 42;}\n"
     25     with tempfile.TemporaryDirectory() as td:
     26         c = os.path.join(td, "x.c")
     27         o = os.path.join(td, "x.o")
     28         with open(c, "wb") as f:
     29             f.write(src)
     30         subprocess.check_call([
     31             "clang", "--target=aarch64-linux-gnu", "-c", "-O0", c, "-o", o,
     32         ])
     33         with open(o, "rb") as f:
     34             return bytearray(f.read())
     35 
     36 
     37 def parse_shdrs(buf):
     38     """Return (e_shoff, e_shentsize, e_shnum, e_shstrndx, [shdrs])."""
     39     e_shoff     = struct.unpack_from("<Q", buf, 40)[0]
     40     e_shentsize = struct.unpack_from("<H", buf, 58)[0]
     41     e_shnum     = struct.unpack_from("<H", buf, 60)[0]
     42     e_shstrndx  = struct.unpack_from("<H", buf, 62)[0]
     43     shdrs = []
     44     for i in range(e_shnum):
     45         off = e_shoff + i * e_shentsize
     46         shdrs.append({
     47             "off": off,
     48             "sh_name":      struct.unpack_from("<I", buf, off + 0)[0],
     49             "sh_type":      struct.unpack_from("<I", buf, off + 4)[0],
     50             "sh_flags":     struct.unpack_from("<Q", buf, off + 8)[0],
     51             "sh_offset":    struct.unpack_from("<Q", buf, off + 24)[0],
     52             "sh_size":      struct.unpack_from("<Q", buf, off + 32)[0],
     53             "sh_link":      struct.unpack_from("<I", buf, off + 40)[0],
     54             "sh_info":      struct.unpack_from("<I", buf, off + 44)[0],
     55             "sh_entsize":   struct.unpack_from("<Q", buf, off + 56)[0],
     56         })
     57     return e_shoff, e_shentsize, e_shnum, e_shstrndx, shdrs
     58 
     59 
     60 def find_shdr(shdrs, sh_type):
     61     for i, s in enumerate(shdrs):
     62         if s["sh_type"] == sh_type:
     63             return i, s
     64     return None, None
     65 
     66 
     67 SHT_SYMTAB = 2
     68 SHT_RELA   = 4
     69 
     70 
     71 # (basename, expect_substring, mutator(bytes) -> bytes-or-None)
     72 CASES = []
     73 
     74 
     75 def case(name, expect):
     76     def deco(fn):
     77         CASES.append((name, expect, fn))
     78         return fn
     79     return deco
     80 
     81 
     82 @case("truncated_ehdr", "input shorter than ELF header")
     83 def m_truncated_ehdr(buf):
     84     return buf[:32]
     85 
     86 
     87 # bad_magic and wrong_endian are caught by kit_detect_target inside the
     88 # kit-roundtrip harness *before* read_elf runs.  The .expect substring
     89 # matches the harness's rejection text, not a read_elf diagnostic.
     90 @case("bad_magic", "not a recognized object file")
     91 def m_bad_magic(buf):
     92     b = bytearray(buf)
     93     b[1] = 0x00  # corrupt EI_MAG1
     94     return b
     95 
     96 
     97 # The base object is 64-bit (e.g. aarch64). ELFCLASS32 is a valid class on its
     98 # own now (riscv32 uses it), so the reader no longer blanket-rejects it — but
     99 # kit_detect_target requires EI_CLASS to match the arch's pointer width, so a
    100 # 64-bit machine tagged ELFCLASS32 is a mismatch caught before read_elf, with
    101 # the same "not a recognized object file" text bad_magic/e_machine_unknown use.
    102 @case("wrong_class", "not a recognized object file")
    103 def m_wrong_class(buf):
    104     b = bytearray(buf)
    105     b[4] = 1  # ELFCLASS32 on a 64-bit-machine object -> class/arch mismatch
    106     return b
    107 
    108 
    109 @case("wrong_endian", "not a recognized object file")
    110 def m_wrong_endian(buf):
    111     b = bytearray(buf)
    112     b[5] = 2  # ELFDATA2MSB
    113     return b
    114 
    115 
    116 @case("e_machine_unknown", "not a recognized object file")
    117 def m_e_machine_unknown(buf):
    118     """ELF reader supports aarch64 / x86_64 / riscv64. Use an e_machine
    119     outside that set; kit_detect_target rejects it before read_elf
    120     sees it, with the same "not a recognized object file" diagnostic
    121     bad_magic produces."""
    122     b = bytearray(buf)
    123     struct.pack_into("<H", b, 18, 0x00FF)
    124     return b
    125 
    126 
    127 @case("shentsize_bad", "unexpected e_shentsize")
    128 def m_shentsize_bad(buf):
    129     b = bytearray(buf)
    130     struct.pack_into("<H", b, 58, 32)  # not 64
    131     return b
    132 
    133 
    134 @case("shoff_oob", "section header table out of range")
    135 def m_shoff_oob(buf):
    136     # Past EOF but small enough that adding `e_shnum * sizeof(Elf64_Shdr)`
    137     # to it doesn't wrap u64 — otherwise the bounds check passes via
    138     # overflow and a later "shstrtab out of range" diagnostic fires first.
    139     b = bytearray(buf)
    140     struct.pack_into("<Q", b, 40, len(b) + 64)
    141     return b
    142 
    143 
    144 @case("shstrndx_oob", "e_shstrndx")
    145 def m_shstrndx_oob(buf):
    146     b = bytearray(buf)
    147     e_shnum = struct.unpack_from("<H", b, 60)[0]
    148     struct.pack_into("<H", b, 62, e_shnum + 5)
    149     return b
    150 
    151 
    152 @case("symtab_entsize_bad", ".symtab entsize")
    153 def m_symtab_entsize_bad(buf):
    154     b = bytearray(buf)
    155     _, _, _, _, shdrs = parse_shdrs(b)
    156     idx, sh = find_shdr(shdrs, SHT_SYMTAB)
    157     assert sh, "baseline missing SYMTAB"
    158     struct.pack_into("<Q", b, sh["off"] + 56, 32)  # not 24
    159     return b
    160 
    161 
    162 @case("symtab_size_bad", ".symtab size")
    163 def m_symtab_size_bad(buf):
    164     b = bytearray(buf)
    165     _, _, _, _, shdrs = parse_shdrs(b)
    166     idx, sh = find_shdr(shdrs, SHT_SYMTAB)
    167     assert sh, "baseline missing SYMTAB"
    168     struct.pack_into("<Q", b, sh["off"] + 32, sh["sh_size"] + 1)
    169     return b
    170 
    171 
    172 @case("symtab_link_oob", ".symtab sh_link")
    173 def m_symtab_link_oob(buf):
    174     b = bytearray(buf)
    175     _, _, e_shnum, _, shdrs = parse_shdrs(b)
    176     idx, sh = find_shdr(shdrs, SHT_SYMTAB)
    177     assert sh, "baseline missing SYMTAB"
    178     struct.pack_into("<I", b, sh["off"] + 40, e_shnum + 5)
    179     return b
    180 
    181 
    182 @case("rela_entsize_bad", "rela entsize")
    183 def m_rela_entsize_bad(buf):
    184     b = bytearray(buf)
    185     _, _, _, _, shdrs = parse_shdrs(b)
    186     idx, sh = find_shdr(shdrs, SHT_RELA)
    187     if not sh:
    188         return None  # skip if no RELA in baseline
    189     struct.pack_into("<Q", b, sh["off"] + 56, 16)  # not 24
    190     return b
    191 
    192 
    193 @case("rela_info_oob", "rela sh_info")
    194 def m_rela_info_oob(buf):
    195     b = bytearray(buf)
    196     _, _, e_shnum, _, shdrs = parse_shdrs(b)
    197     idx, sh = find_shdr(shdrs, SHT_RELA)
    198     if not sh:
    199         return None
    200     struct.pack_into("<I", b, sh["off"] + 44, e_shnum + 5)
    201     return b
    202 
    203 
    204 @case("reloc_type_unsupported", "unsupported reloc type")
    205 def m_reloc_type_unsupported(buf):
    206     """Flip the r_info type to something we don't decode."""
    207     b = bytearray(buf)
    208     _, _, _, _, shdrs = parse_shdrs(b)
    209     idx, sh = find_shdr(shdrs, SHT_RELA)
    210     if not sh:
    211         return None
    212     # Pick the first rela entry; r_info is at offset+8, length 8.
    213     rel_off = sh["sh_offset"] + 8
    214     r_info = struct.unpack_from("<Q", b, rel_off)[0]
    215     # Replace type (low 32) with 9999 — unmapped.
    216     sym = r_info >> 32
    217     new_info = (sym << 32) | 9999
    218     struct.pack_into("<Q", b, rel_off, new_info)
    219     return b
    220 
    221 
    222 def main():
    223     baseline = build_baseline()
    224     written = 0
    225     skipped = 0
    226     for name, expect, fn in CASES:
    227         out = fn(baseline)
    228         if out is None:
    229             print(f"SKIP {name} (baseline has no eligible section)")
    230             skipped += 1
    231             continue
    232         elf_path    = os.path.join(HERE, name + ".elf")
    233         expect_path = os.path.join(HERE, name + ".expect")
    234         with open(elf_path, "wb") as f:
    235             f.write(bytes(out))
    236         with open(expect_path, "w") as f:
    237             f.write(expect)
    238         print(f"wrote {name}.elf  ({len(out)} bytes)  expect=\"{expect}\"")
    239         written += 1
    240     print(f"---\n{written} written, {skipped} skipped")
    241     if written == 0:
    242         sys.exit("no cases written")
    243 
    244 
    245 if __name__ == "__main__":
    246     main()