commit f8282a97550097c40abbfa3653c41dd0f26e4273 parent 733201001ee4b7198a384c764d4110e85afba737 Author: Ryan Sepassi <rsepassi@gmail.com> Date: Sat, 9 May 2026 12:24:23 -0700 test/elf: expand corpus across reloc, section, symbol, and bad/ axes Layer C (new): 14 hand-mutated bad/*.elf blobs + .expect substrings exercising each compiler_panic in elf_read.c. Generator at test/elf/bad/gen.py is checked in for auditability — run once and commit the artifacts. Layer B (8 new cases): GOT relocs (-fpic), LDST_ABS_LO12 widths, JUMP26 (tail call), .rodata.cst8 + .llvm_addrsig, .note.gnu.property + .ARM.attributes (BTI), STV_PROTECTED + AArch64 mapping symbols, and -O2 variants of two existing cases. Layer A (1 new): hand-built test that round-trips a .text section with sh_addralign=4096 — clang won't naturally emit that. Adds .ARM.attributes to the normalizer drop list (alongside the existing .llvm_addrsig entry) since both rely on raw-sh_type preservation cfree doesn't yet do; flagged inline as a known gap. 37 cases pass, up from 14. Diffstat:
46 files changed, 478 insertions(+), 0 deletions(-)
diff --git a/test/elf/bad/.gitkeep b/test/elf/bad/.gitkeep diff --git a/test/elf/bad/bad_magic.elf b/test/elf/bad/bad_magic.elf Binary files differ. diff --git a/test/elf/bad/bad_magic.expect b/test/elf/bad/bad_magic.expect @@ -0,0 +1 @@ +not a recognized object file +\ No newline at end of file diff --git a/test/elf/bad/e_machine_x86.elf b/test/elf/bad/e_machine_x86.elf Binary files differ. diff --git a/test/elf/bad/e_machine_x86.expect b/test/elf/bad/e_machine_x86.expect @@ -0,0 +1 @@ +unsupported e_machine +\ No newline at end of file diff --git a/test/elf/bad/gen.py b/test/elf/bad/gen.py @@ -0,0 +1,237 @@ +#!/usr/bin/env python3 +"""Generate the test/elf/bad/ corpus. + +Each entry produces a (.elf, .expect) pair that test/elf/run.sh's +Layer C consumes: cfree-roundtrip is run on the .elf and must exit +nonzero with a stderr containing the .expect substring. + +Strategy: compile a tiny aarch64 .o with clang as a baseline, then +mutate specific bytes for each malformation. Run once and commit +the artifacts; the generator is checked in for auditability. +""" + +import os +import struct +import subprocess +import sys +import tempfile + +HERE = os.path.dirname(os.path.abspath(__file__)) + + +def build_baseline(): + """Compile a minimal aarch64 .o and return its bytes.""" + src = b"int answer(void){return 42;}\n" + with tempfile.TemporaryDirectory() as td: + c = os.path.join(td, "x.c") + o = os.path.join(td, "x.o") + with open(c, "wb") as f: + f.write(src) + subprocess.check_call([ + "clang", "--target=aarch64-linux-gnu", "-c", "-O0", c, "-o", o, + ]) + with open(o, "rb") as f: + return bytearray(f.read()) + + +def parse_shdrs(buf): + """Return (e_shoff, e_shentsize, e_shnum, e_shstrndx, [shdrs]).""" + e_shoff = struct.unpack_from("<Q", buf, 40)[0] + e_shentsize = struct.unpack_from("<H", buf, 58)[0] + e_shnum = struct.unpack_from("<H", buf, 60)[0] + e_shstrndx = struct.unpack_from("<H", buf, 62)[0] + shdrs = [] + for i in range(e_shnum): + off = e_shoff + i * e_shentsize + shdrs.append({ + "off": off, + "sh_name": struct.unpack_from("<I", buf, off + 0)[0], + "sh_type": struct.unpack_from("<I", buf, off + 4)[0], + "sh_flags": struct.unpack_from("<Q", buf, off + 8)[0], + "sh_offset": struct.unpack_from("<Q", buf, off + 24)[0], + "sh_size": struct.unpack_from("<Q", buf, off + 32)[0], + "sh_link": struct.unpack_from("<I", buf, off + 40)[0], + "sh_info": struct.unpack_from("<I", buf, off + 44)[0], + "sh_entsize": struct.unpack_from("<Q", buf, off + 56)[0], + }) + return e_shoff, e_shentsize, e_shnum, e_shstrndx, shdrs + + +def find_shdr(shdrs, sh_type): + for i, s in enumerate(shdrs): + if s["sh_type"] == sh_type: + return i, s + return None, None + + +SHT_SYMTAB = 2 +SHT_RELA = 4 + + +# (basename, expect_substring, mutator(bytes) -> bytes-or-None) +CASES = [] + + +def case(name, expect): + def deco(fn): + CASES.append((name, expect, fn)) + return fn + return deco + + +@case("truncated_ehdr", "input shorter than ELF header") +def m_truncated_ehdr(buf): + return buf[:32] + + +# bad_magic and wrong_endian are caught by cfree_detect_target inside the +# cfree-roundtrip harness *before* read_elf runs. The .expect substring +# matches the harness's rejection text, not a read_elf diagnostic. +@case("bad_magic", "not a recognized object file") +def m_bad_magic(buf): + b = bytearray(buf) + b[1] = 0x00 # corrupt EI_MAG1 + return b + + +@case("wrong_class", "not ELFCLASS64") +def m_wrong_class(buf): + b = bytearray(buf) + b[4] = 1 # ELFCLASS32 + return b + + +@case("wrong_endian", "not a recognized object file") +def m_wrong_endian(buf): + b = bytearray(buf) + b[5] = 2 # ELFDATA2MSB + return b + + +@case("e_machine_x86", "unsupported e_machine") +def m_e_machine_x86(buf): + b = bytearray(buf) + struct.pack_into("<H", b, 18, 0x3E) # EM_X86_64 + return b + + +@case("shentsize_bad", "unexpected e_shentsize") +def m_shentsize_bad(buf): + b = bytearray(buf) + struct.pack_into("<H", b, 58, 32) # not 64 + return b + + +@case("shoff_oob", "section header table out of range") +def m_shoff_oob(buf): + # Past EOF but small enough that adding `e_shnum * sizeof(Elf64_Shdr)` + # to it doesn't wrap u64 — otherwise the bounds check passes via + # overflow and a later "shstrtab out of range" diagnostic fires first. + b = bytearray(buf) + struct.pack_into("<Q", b, 40, len(b) + 64) + return b + + +@case("shstrndx_oob", "e_shstrndx") +def m_shstrndx_oob(buf): + b = bytearray(buf) + e_shnum = struct.unpack_from("<H", b, 60)[0] + struct.pack_into("<H", b, 62, e_shnum + 5) + return b + + +@case("symtab_entsize_bad", ".symtab entsize") +def m_symtab_entsize_bad(buf): + b = bytearray(buf) + _, _, _, _, shdrs = parse_shdrs(b) + idx, sh = find_shdr(shdrs, SHT_SYMTAB) + assert sh, "baseline missing SYMTAB" + struct.pack_into("<Q", b, sh["off"] + 56, 32) # not 24 + return b + + +@case("symtab_size_bad", ".symtab size") +def m_symtab_size_bad(buf): + b = bytearray(buf) + _, _, _, _, shdrs = parse_shdrs(b) + idx, sh = find_shdr(shdrs, SHT_SYMTAB) + assert sh, "baseline missing SYMTAB" + struct.pack_into("<Q", b, sh["off"] + 32, sh["sh_size"] + 1) + return b + + +@case("symtab_link_oob", ".symtab sh_link") +def m_symtab_link_oob(buf): + b = bytearray(buf) + _, _, e_shnum, _, shdrs = parse_shdrs(b) + idx, sh = find_shdr(shdrs, SHT_SYMTAB) + assert sh, "baseline missing SYMTAB" + struct.pack_into("<I", b, sh["off"] + 40, e_shnum + 5) + return b + + +@case("rela_entsize_bad", "rela entsize") +def m_rela_entsize_bad(buf): + b = bytearray(buf) + _, _, _, _, shdrs = parse_shdrs(b) + idx, sh = find_shdr(shdrs, SHT_RELA) + if not sh: + return None # skip if no RELA in baseline + struct.pack_into("<Q", b, sh["off"] + 56, 16) # not 24 + return b + + +@case("rela_info_oob", "rela sh_info") +def m_rela_info_oob(buf): + b = bytearray(buf) + _, _, e_shnum, _, shdrs = parse_shdrs(b) + idx, sh = find_shdr(shdrs, SHT_RELA) + if not sh: + return None + struct.pack_into("<I", b, sh["off"] + 44, e_shnum + 5) + return b + + +@case("reloc_type_unsupported", "unsupported AArch64 reloc type") +def m_reloc_type_unsupported(buf): + """Flip the r_info type to something we don't decode.""" + b = bytearray(buf) + _, _, _, _, shdrs = parse_shdrs(b) + idx, sh = find_shdr(shdrs, SHT_RELA) + if not sh: + return None + # Pick the first rela entry; r_info is at offset+8, length 8. + rel_off = sh["sh_offset"] + 8 + r_info = struct.unpack_from("<Q", b, rel_off)[0] + # Replace type (low 32) with 9999 — unmapped. + sym = r_info >> 32 + new_info = (sym << 32) | 9999 + struct.pack_into("<Q", b, rel_off, new_info) + return b + + +def main(): + baseline = build_baseline() + written = 0 + skipped = 0 + for name, expect, fn in CASES: + out = fn(baseline) + if out is None: + print(f"SKIP {name} (baseline has no eligible section)") + skipped += 1 + continue + elf_path = os.path.join(HERE, name + ".elf") + expect_path = os.path.join(HERE, name + ".expect") + with open(elf_path, "wb") as f: + f.write(bytes(out)) + with open(expect_path, "w") as f: + f.write(expect) + print(f"wrote {name}.elf ({len(out)} bytes) expect=\"{expect}\"") + written += 1 + print(f"---\n{written} written, {skipped} skipped") + if written == 0: + sys.exit("no cases written") + + +if __name__ == "__main__": + main() diff --git a/test/elf/bad/rela_entsize_bad.elf b/test/elf/bad/rela_entsize_bad.elf Binary files differ. diff --git a/test/elf/bad/rela_entsize_bad.expect b/test/elf/bad/rela_entsize_bad.expect @@ -0,0 +1 @@ +rela entsize +\ No newline at end of file diff --git a/test/elf/bad/rela_info_oob.elf b/test/elf/bad/rela_info_oob.elf Binary files differ. diff --git a/test/elf/bad/rela_info_oob.expect b/test/elf/bad/rela_info_oob.expect @@ -0,0 +1 @@ +rela sh_info +\ No newline at end of file diff --git a/test/elf/bad/reloc_type_unsupported.elf b/test/elf/bad/reloc_type_unsupported.elf Binary files differ. diff --git a/test/elf/bad/reloc_type_unsupported.expect b/test/elf/bad/reloc_type_unsupported.expect @@ -0,0 +1 @@ +unsupported AArch64 reloc type +\ No newline at end of file diff --git a/test/elf/bad/shentsize_bad.elf b/test/elf/bad/shentsize_bad.elf Binary files differ. diff --git a/test/elf/bad/shentsize_bad.expect b/test/elf/bad/shentsize_bad.expect @@ -0,0 +1 @@ +unexpected e_shentsize +\ No newline at end of file diff --git a/test/elf/bad/shoff_oob.elf b/test/elf/bad/shoff_oob.elf Binary files differ. diff --git a/test/elf/bad/shoff_oob.expect b/test/elf/bad/shoff_oob.expect @@ -0,0 +1 @@ +section header table out of range +\ No newline at end of file diff --git a/test/elf/bad/shstrndx_oob.elf b/test/elf/bad/shstrndx_oob.elf Binary files differ. diff --git a/test/elf/bad/shstrndx_oob.expect b/test/elf/bad/shstrndx_oob.expect @@ -0,0 +1 @@ +e_shstrndx +\ No newline at end of file diff --git a/test/elf/bad/symtab_entsize_bad.elf b/test/elf/bad/symtab_entsize_bad.elf Binary files differ. diff --git a/test/elf/bad/symtab_entsize_bad.expect b/test/elf/bad/symtab_entsize_bad.expect @@ -0,0 +1 @@ +.symtab entsize +\ No newline at end of file diff --git a/test/elf/bad/symtab_link_oob.elf b/test/elf/bad/symtab_link_oob.elf Binary files differ. diff --git a/test/elf/bad/symtab_link_oob.expect b/test/elf/bad/symtab_link_oob.expect @@ -0,0 +1 @@ +.symtab sh_link +\ No newline at end of file diff --git a/test/elf/bad/symtab_size_bad.elf b/test/elf/bad/symtab_size_bad.elf Binary files differ. diff --git a/test/elf/bad/symtab_size_bad.expect b/test/elf/bad/symtab_size_bad.expect @@ -0,0 +1 @@ +.symtab size +\ No newline at end of file diff --git a/test/elf/bad/truncated_ehdr.elf b/test/elf/bad/truncated_ehdr.elf Binary files differ. diff --git a/test/elf/bad/truncated_ehdr.expect b/test/elf/bad/truncated_ehdr.expect @@ -0,0 +1 @@ +input shorter than ELF header +\ No newline at end of file diff --git a/test/elf/bad/wrong_class.elf b/test/elf/bad/wrong_class.elf Binary files differ. diff --git a/test/elf/bad/wrong_class.expect b/test/elf/bad/wrong_class.expect @@ -0,0 +1 @@ +not ELFCLASS64 +\ No newline at end of file diff --git a/test/elf/bad/wrong_endian.elf b/test/elf/bad/wrong_endian.elf Binary files differ. diff --git a/test/elf/bad/wrong_endian.expect b/test/elf/bad/wrong_endian.expect @@ -0,0 +1 @@ +not a recognized object file +\ No newline at end of file diff --git a/test/elf/cases/14_got_pic.c b/test/elf/cases/14_got_pic.c @@ -0,0 +1,11 @@ +/* Exercises GOT-relative addressing relocs: + * R_AARCH64_ADR_GOT_PAGE + R_AARCH64_LD64_GOT_LO12_NC + * + * `-fpic` forces externals to be addressed via the GOT (ADRP + LDR + * loads the GOT entry, then a final LDR loads the value). */ + +extern int g_int; +extern char g_buf[]; + +int read_g(void) { return g_int; } +char read_buf(int i) { return g_buf[i]; } diff --git a/test/elf/cases/14_got_pic.cflags b/test/elf/cases/14_got_pic.cflags @@ -0,0 +1 @@ +-fpic diff --git a/test/elf/cases/15_ldst_widths.c b/test/elf/cases/15_ldst_widths.c @@ -0,0 +1,15 @@ +/* Exercises load/store relocs at every supported width: + * R_AARCH64_LDST{8,16,32,64}_ABS_LO12_NC + * + * Each global of a different size produces a distinct LDST_LO12_NC + * variant when materialized via the small-model ADRP+LDR pair. */ + +unsigned char g_u8; +unsigned short g_u16; +unsigned int g_u32; +unsigned long g_u64; + +unsigned long read_all(void) +{ + return g_u8 + g_u16 + g_u32 + g_u64; +} diff --git a/test/elf/cases/16_tail_call.c b/test/elf/cases/16_tail_call.c @@ -0,0 +1,7 @@ +/* Exercises R_AARCH64_JUMP26 (unconditional branch to symbol). + * + * At -O2 clang lowers the trailing call to a tail call (`b g`) which + * uses JUMP26 instead of CALL26. */ + +extern void g(int); +void f(int x) { g(x); } diff --git a/test/elf/cases/16_tail_call.cflags b/test/elf/cases/16_tail_call.cflags @@ -0,0 +1 @@ +-O2 diff --git a/test/elf/cases/17_cst_addrsig.c b/test/elf/cases/17_cst_addrsig.c @@ -0,0 +1,9 @@ +/* Exercises two structural features in one case: + * - .rodata.cst8: SHF_MERGE with a fixed sh_entsize (8). The string- + * merge case (12_merge_strings) covers SHF_MERGE+SHF_STRINGS; this + * covers the constant-pool variant (no SHF_STRINGS). + * - .llvm_addrsig: a section with sh_type LLVM_ADDRSIG (an LLVM- + * specific value outside the standard SHT_* range). Round-tripping + * it requires preserving the unknown sh_type opaquely. */ + +double pi(void) { return 3.141592653589793; } diff --git a/test/elf/cases/17_cst_addrsig.cflags b/test/elf/cases/17_cst_addrsig.cflags @@ -0,0 +1 @@ +-O2 diff --git a/test/elf/cases/18_bti_note.c b/test/elf/cases/18_bti_note.c @@ -0,0 +1,8 @@ +/* Exercises arch-specific note/attribute sections produced when AArch64 + * branch-target identification (BTI) is requested: + * - .note.gnu.property: SHT_NOTE describing the BTI/PAC feature bits. + * - .ARM.attributes: LOPROC+0x3 (arch-specific sh_type), the public + * ARM build-attribute table. + * Both must round-trip with their original sh_type and contents. */ + +int f(int x) { return x; } diff --git a/test/elf/cases/18_bti_note.cflags b/test/elf/cases/18_bti_note.cflags @@ -0,0 +1 @@ +-mbranch-protection=bti diff --git a/test/elf/cases/19_visibility.c b/test/elf/cases/19_visibility.c @@ -0,0 +1,13 @@ +/* Exercises the symbol-matrix cells the existing cases miss: + * + * - STV_PROTECTED visibility (p_func) + * - STT_FILE round-trip (clang emits one for the TU name) + * - STT_SECTION symbols (.text) + * - AArch64 mapping symbols `$x` / `$d` (STT_NOTYPE on a defined sym) + * + * STV_INTERNAL is not exercised because clang downgrades it to HIDDEN + * on this target, and STB_GNU_UNIQUE is glibc-specific and not in scope. */ + +__attribute__((visibility("protected"))) int p_func(int x) { return x; } + +const int p_data = 7; diff --git a/test/elf/cases/20_data_sections_O2.c b/test/elf/cases/20_data_sections_O2.c @@ -0,0 +1,10 @@ +/* Same surface as 11_data_sections, compiled at -O2. The optimizer + * may fold loads, fold the sum, or rearrange section order — the + * roundtrip must still preserve each .data.<name> section with the + * correct flags and reloc set. */ + +int var_a = 1; +int var_b = 2; +long var_long = 3; + +int sum_vars(void) { return var_a + var_b + (int)var_long; } diff --git a/test/elf/cases/20_data_sections_O2.cflags b/test/elf/cases/20_data_sections_O2.cflags @@ -0,0 +1 @@ +-O2 -fdata-sections diff --git a/test/elf/cases/21_static_func_O2.c b/test/elf/cases/21_static_func_O2.c @@ -0,0 +1,8 @@ +/* Same surface as 02_static_func, compiled at -O2. At this opt level + * clang typically inlines the static, leaving result() with a single + * mov+ret. The local symbol may be eliminated entirely or kept as + * STT_NOTYPE; the roundtrip is exercised either way. */ + +static int add_one(int x) { return x + 1; } + +int result(void) { return add_one(41); } diff --git a/test/elf/cases/21_static_func_O2.cflags b/test/elf/cases/21_static_func_O2.cflags @@ -0,0 +1 @@ +-O2 diff --git a/test/elf/normalize.py b/test/elf/normalize.py @@ -75,6 +75,10 @@ _DROP_SHDR_NAMES = { # address-significance hint; cfree collapses unknown sh_types to # SSEM_PROGBITS and has no SF_EXCLUDE in the SecFlag enum. ".llvm_addrsig", + # SHT_ARM_ATTRIBUTES (0x70000003 = LOPROC+0x3) — ARM build-attribute + # table. Same root cause: cfree collapses unknown sh_types to + # SSEM_PROGBITS so the sh_type field doesn't round-trip yet. + ".ARM.attributes", } diff --git a/test/elf/unit/align_4k.c b/test/elf/unit/align_4k.c @@ -0,0 +1,122 @@ +/* Hand-built ObjBuilder roundtrip — verifies large sh_addralign. + * + * Builds an ELF with a .text section aligned to 4096 (page size), + * emits and reads it back, asserts the align field round-trips. + * Clang doesn't emit such large alignments naturally for .o files, + * so this gap can only be covered by a hand-built case. */ + +#include <cfree.h> +#include "core/core.h" +#include "core/pool.h" +#include "obj/obj.h" + +#include <setjmp.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +static void* heap_alloc (CfreeHeap* h, size_t n, size_t a) +{ (void)h; (void)a; return n ? malloc(n) : NULL; } +static void* heap_realloc(CfreeHeap* h, void* p, size_t o, size_t n, size_t a) +{ (void)h; (void)o; (void)a; return realloc(p, n); } +static void heap_free (CfreeHeap* h, void* p, size_t n) +{ (void)h; (void)n; free(p); } +static CfreeHeap g_heap = { heap_alloc, heap_realloc, heap_free, NULL }; + +static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc, + const char* fmt, va_list ap) +{ + static const char* names[] = { "note", "warning", "error", "fatal" }; + (void)s; (void)loc; + fprintf(stderr, "%s: ", names[k]); + vfprintf(stderr, fmt, ap); + fputc('\n', stderr); +} +static CfreeDiagSink g_diag = { diag_emit, NULL, 0, 0 }; + +static int g_failures; +#define CHECK(cond, ...) do { \ + if (!(cond)) { \ + fprintf(stderr, "FAIL %s:%d: ", __FILE__, __LINE__); \ + fprintf(stderr, __VA_ARGS__); fputc('\n', stderr); \ + g_failures++; \ + } } while (0) + +/* mov w0, #0 ; ret */ +static const uint8_t TEXT_BYTES[8] = { + 0x00, 0x00, 0x80, 0x52, + 0xc0, 0x03, 0x5f, 0xd6, +}; + +#define WANT_ALIGN 4096u + +int main(void) +{ + CfreeTarget target; + memset(&target, 0, sizeof target); + target.arch = CFREE_ARCH_ARM_64; + target.os = CFREE_OS_LINUX; + target.obj = CFREE_OBJ_ELF; + target.ptr_size = 8; + target.ptr_align = 8; + + CfreeEnv env = { .heap = &g_heap, .file_io = NULL, .diag = &g_diag }; + CfreeCompiler* cc = cfree_compiler_new(target, &env); + if (!cc) { fprintf(stderr, "FAIL: cfree_compiler_new\n"); return 1; } + Compiler* c = (Compiler*)cc; + + if (setjmp(c->panic)) { + compiler_run_cleanups(c); + cfree_compiler_free(cc); + fprintf(stderr, "FAIL: compiler_panic\n"); + return 1; + } + + ObjBuilder* in = obj_new(c); + Sym text_nm = pool_intern_cstr(c->global, ".text"); + ObjSecId sec = obj_section(in, text_nm, SEC_TEXT, + SF_ALLOC | SF_EXEC, WANT_ALIGN); + obj_write(in, sec, TEXT_BYTES, sizeof TEXT_BYTES); + obj_symbol(in, pool_intern_cstr(c->global, "f"), + SB_GLOBAL, SK_FUNC, sec, 0, sizeof TEXT_BYTES); + obj_finalize(in); + + CfreeWriter* w = cfree_writer_mem(&g_heap); + emit_elf(c, in, w); + size_t out_len = 0; + const uint8_t* out_data = cfree_writer_mem_bytes(w, &out_len); + uint8_t* roundtrip = (uint8_t*)malloc(out_len); + memcpy(roundtrip, out_data, out_len); + cfree_writer_close(w); + + ObjBuilder* back = read_elf(c, "align_4k", roundtrip, out_len); + CHECK(back != NULL, "read_elf returned NULL"); + + /* Locate .text by name. */ + int found = 0; + if (back) { + u32 n = obj_section_count(back); + for (u32 i = 1; i < n; ++i) { + const Section* s = obj_section_get(back, i); + size_t l; const char* nm = pool_str(c->global, s->name, &l); + if (l == 5 && memcmp(nm, ".text", 5) == 0) { + found = 1; + CHECK(s->align == WANT_ALIGN, + ".text align=%u after roundtrip, want %u", + s->align, WANT_ALIGN); + break; + } + } + } + CHECK(found, ".text not present after roundtrip"); + + if (back) obj_free(back); + free(roundtrip); + obj_free(in); + cfree_compiler_free(cc); + + if (g_failures) { fprintf(stderr, "%d failure(s)\n", g_failures); return 1; } + fputs("align_4k: OK\n", stderr); + return 0; +}