commit 8fb3cda83aeb177c1c42186f0a2a9dae7847795a parent 71eb435e5b2049c2a305ae269c521e45aba33da3 Author: Ryan Sepassi <rsepassi@gmail.com> Date: Sat, 9 May 2026 05:00:25 -0700 test/elf: extend corpus with symbol, section, and behavioral cases cases/ (Layer B — structural roundtrip): 02_static_func STB_LOCAL binding 03_weak_def STB_WEAK on function and object 04_hidden_sym STV_HIDDEN visibility 05_common_sym SHN_COMMON / STT_COMMON tentative definitions (-fcommon) 06_tls SHF_TLS / STT_TLS (.tdata and .tbss) 07_init_array SHT_INIT_ARRAY / SHT_FINI_ARRAY via constructor/destructor 08_alias aliased symbols (two names at one address) 09_ifunc STT_GNU_IFUNC with resolver 10_func_sections per-function sections (-ffunction-sections) 11_data_sections per-variable sections (-fdata-sections) 12_merge_strings SHF_MERGE+SHF_STRINGS in .rodata.str1.1 13_comdat SHT_GROUP COMDAT via C++ inline (-x c++) exec/ (Layer D — behavioral oracle against lld): 07_two_tus split monolithic file into _a/_b (no logic change) 08_weak_sym strong definition overrides weak across TUs; exits 42 09_common_sym COMMON symbols coalesce; write visible across TUs (-fcommon) 10_init_array constructor runs before _start reads its side-effect 11_init_array_order prioritized ctors (101, 102) run ascending; exits 42 Harness: - run.sh Layer B: reads NN_name.cflags and appends to clang invocation - run.sh Layer D: reads <case_name>.cflags and applies to all TUs in group - normalize.py: add "group" block type for COMDAT SHT_GROUP sections (scrubs section indices, preserves group symbol and member names, sorts) Diffstat:
29 files changed, 624 insertions(+), 36 deletions(-)
diff --git a/test/elf/CORPUS.md b/test/elf/CORPUS.md @@ -0,0 +1,237 @@ +# ELF Test Corpus — Target Coverage + +What the `test/elf/` corpus should cover for full ELF object-file support, +independent of cfree's current implementation state. Each row is a +distinct case worth a discrete test (unit/, cases/, exec/, or bad/); +groups starred (★) are highest-leverage and should land first. + +Conventions: +- **U** = `unit/` (hand-built ObjBuilder roundtrip) +- **C** = `cases/` (clang `.o` → cfree-roundtrip → structural+behavioral diff) +- **E** = `exec/` (cfree-link → run, behavioral oracle) +- **B** = `bad/` (negative read_elf input) + +--- + +## 1. ELF header / target identification ★ + +| Case | Layer | Notes | +|---|---|---| +| `e_machine` per supported arch | C, U | aarch64, x86_64, riscv64, riscv32, arm32 (when supported) | +| `ELFCLASS32` 32-bit ELF | C, U | independent matrix from class | +| `ELFDATA2MSB` big-endian | C, U | aarch64-be, mips, etc. | +| `ELFOSABI_*` variations | U | NONE / LINUX / FREEBSD — emitter must round-trip whatever it reads | +| `e_flags` per-arch | C | RISC-V `RVE`/`RVC`, ARM EABI version | + +## 2. Section types ★ + +| `sh_type` | Layer | Specific case | +|---|---|---| +| `PROGBITS` (text/data/rodata) | C, U | covered by 01_return42 etc. | +| `NOBITS` | C, U | `.bss` of various sizes; `sh_addralign` 1/8/64/4096 | +| `SYMTAB` / `STRTAB` | U | round-trip preserves logical content (not byte layout) | +| `RELA` / `REL` | C | both encodings; `sh_flags & SHF_INFO_LINK` | +| `NOTE` | C | `.note.gnu.build-id`, `.note.ABI-tag`, `.note.gnu.property` | +| `INIT_ARRAY`/`FINI_ARRAY`/`PREINIT_ARRAY` | C, E | constructor ordering across TUs | +| `GROUP` (COMDAT) | C | C++-style inline funcs across two TUs (`09_comdat_inline.c`) | +| `LLVM_ADDRSIG` and other custom | C | unknown `sh_type` must round-trip via raw-type preservation | +| `GNU_HASH` / `HASH` | C | dynamic objects (post-shared-lib support) | +| `DYNSYM` / `DYNAMIC` | C | shared objects | + +## 3. Section flags + +| Flag | Coverage | +|---|---| +| `SHF_ALLOC` / `WRITE` / `EXECINSTR` | implicit in every case | +| `SHF_TLS` | `.tdata` / `.tbss` | +| `SHF_MERGE` + `SHF_STRINGS` | `.rodata.str1.1` / `.debug_str` | +| `SHF_MERGE` + fixed `sh_entsize` | `.rodata.cst{4,8,16}` constant pools | +| `SHF_GROUP` | every section inside a COMDAT | +| `SHF_LINK_ORDER` | `-flto` outputs, `.gcc_except_table` | +| `SHF_INFO_LINK` | every `.rela.*` | +| `SHF_EXCLUDE` | `.llvm_addrsig`; linker drop hint | +| `SHF_COMPRESSED` | zlib/zstd-compressed `.debug_*` | + +## 4. Symbol coverage ★ + +**Bindings:** `STB_LOCAL`, `STB_GLOBAL`, `STB_WEAK`. (`STB_GNU_UNIQUE` if cfree +ever needs it.) + +**Types:** `STT_NOTYPE`, `STT_FUNC`, `STT_OBJECT`, `STT_SECTION`, `STT_FILE`, +`STT_COMMON`, `STT_TLS`, `STT_GNU_IFUNC`. + +**Visibility:** `STV_DEFAULT`, `STV_HIDDEN`, `STV_PROTECTED`, `STV_INTERNAL`. + +**`shndx` values:** ordinary index, `SHN_UNDEF`, `SHN_ABS`, `SHN_COMMON`, +`SHN_XINDEX` (extended for >65279 sections). + +**Cases:** + +| Case | Layer | +|---|---| +| Plain global function definition | C | +| Static (file-local) function | C | +| Tentative definition (common) | C | +| `__attribute__((weak))` defined and undefined | C | +| `__attribute__((visibility("hidden")))` | C | +| TLS variable (`__thread`) | C, E | +| IFUNC (`__attribute__((ifunc("resolver")))`) | C, E | +| Aliased symbols (multiple names, same address) | C | +| Section symbols as relocation targets | C | +| File symbol (`STT_FILE`) round-trip | C | +| AArch64 mapping symbols `$x` / `$d` (STT_NOTYPE on defined sym) | C | + +## 5. Relocation coverage ★ + +For each supported arch, every reloc kind cfree's `RelocKind` enum maps must +have a unit test (round-trip) AND a behavioral test (linked + run gives +the right value). + +### AArch64 + +| Reloc | Test | Notes | +|---|---|---| +| `R_AARCH64_NONE` | U | sentinel | +| `R_AARCH64_ABS64` / `ABS32` | C, E | data pointers, absolute jump tables | +| `R_AARCH64_PREL64` / `PREL32` | C, E | `.eh_frame` FDE pointers | +| `R_AARCH64_CALL26` / `JUMP26` | E | direct calls, tail calls | +| `R_AARCH64_ADR_PREL_PG_HI21` + `ADD_ABS_LO12_NC` | E | small-model PIC addressing | +| `R_AARCH64_LDST{8,16,32,64,128}_ABS_LO12_NC` | E | LDR/STR offset materialization | +| `R_AARCH64_GOT_*` family | C, E | shared-lib path | +| `R_AARCH64_TLSGD_*` / `TLSIE_*` / `TLSLE_*` / `TLSDESC_*` | C, E | TLS access models | +| `R_AARCH64_PLT32` | C, E | PIE/shared call through PLT | + +### x86_64 (when added) + +`R_X86_64_64`, `_32`, `_PC32`, `_PC64`, `_PLT32`, `_GOTPCREL`, `_GOTPCRELX`, +`_REX_GOTPCRELX`, `_TLSGD`, `_GOTTPOFF`, `_TPOFF32/64`, `_DTPOFF32/64`. + +### RISC-V (when added) + +`R_RISCV_HI20`/`_LO12_I`/`_LO12_S`, `_BRANCH`, `_JAL`, `_CALL_PLT`, +`_PCREL_HI20`/`_PCREL_LO12_*`, `_RELAX`, `_TLS_GD_HI20`, etc. + +### Reloc edge cases (any arch) + +- Zero addend, positive, negative, near-overflow +- `r->sym == OBJ_SYM_NONE` (rare but legal — section-relative) +- `RELA` vs `REL` encodings on archs that distinguish (x86) +- Pair relocations (`R_AARCH64_LD_PREL_LO19` paired with prefetch hint) +- Relocations targeting weak undef (resolves to 0) +- Relocations targeting common symbols +- Relocations across COMDAT-merged content + +## 6. Special sections + +| Section | Coverage | +|---|---| +| `.text.<fnname>` (function sections) | C — `-ffunction-sections` | +| `.data.<varname>` | C — `-fdata-sections` | +| `.data.rel.ro` | C — relocatable read-only data | +| `.init_array.NNN` / `.fini_array.NNN` | E — priority ctors/dtors | +| `.tdata` / `.tbss` | C, E — TLS | +| `.gcc_except_table` + `.eh_frame` | C — exception tables | +| `.note.gnu.build-id` | C — reproducible-build identity | +| `.note.gnu.property` | C — CET/BTI/PAC markers (`AArch64-BTI`) | +| `.ARM.attributes` / `.riscv.attributes` / `.note.ABI-tag` | C | +| `.gnu.linkonce.t.<sym>` (legacy COMDAT) | C | +| `.debug_*` (DWARF) | C — opaque preservation; semantic equivalence later | +| `.eh_frame_hdr` | C — when shared/exe path emits it | +| `.got` / `.got.plt` / `.plt` | E — shared-lib link path | +| `.dynamic` / `.dynstr` / `.dynsym` | E — shared-object output | + +## 7. Layout / structure edge cases + +- Empty `.o` (NULL section only) +- Section count = 1, 65279, > 65279 (extended indexing via SHN_XINDEX in section 0) +- Symbol count > 65535 (extended via Elf64_Sym overflow path) +- Very large `.strtab` (> 1 MB) +- Sections with `sh_addralign` of 1, 4, 8, 16, 64, 4096 +- Per-section `sh_entsize` (mergeable, symtab, rela) +- Self-referential relocations (`X = &X + 8`) +- Multiple sections with the same name (legal pre-merge, common with `-ffunction-sections`) + +## 8. Archive (.a) ★ + +| Case | Layer | +|---|---| +| Empty archive | B | +| Single `.o` member | C-like (separate ar harness) | +| Multiple members, dependency on later member | E | +| BSD vs SysV format | C | +| Symbol index (`/`/`__.SYMDEF`) present and absent | C, E | +| Long filenames (`//` extended name table) | C | + +## 9. Negative inputs (`bad/`) + +Each blob has a `.expect` substring; harness asserts `compiler_panic` +exits cleanly (no segfault). + +| Blob | Trigger | +|---|---| +| `truncated_ehdr.elf` | < 64 bytes | +| `bad_magic.elf` | first 4 bytes wrong | +| `e_machine_x86.elf` | machine mismatch (when arch-validated) | +| `wrong_class.elf` | ELFCLASS32 in a 64-bit pipeline | +| `wrong_endian.elf` | ELFDATA2MSB in an LSB pipeline | +| `sh_offset_oob.elf` | `sh_offset + sh_size > file_size` | +| `sh_link_oob.elf` | `sh_link >= e_shnum` | +| `e_shstrndx_oob.elf` | bogus shstrndx | +| `symtab_entsize_bad.elf` | `sh_entsize != sizeof(Elf64_Sym)` | +| `rela_entsize_bad.elf` | `sh_entsize != 24` | +| `r_info_sym_oob.elf` | reloc sym index past symtab | +| `group_cycle.elf` | SHT_GROUP referencing itself | +| `nobits_with_data.elf` | `SHT_NOBITS` with non-zero `sh_offset` body | +| `huge_size.elf` | `sh_size = u64::max` | +| `string_no_nul.elf` | strtab without trailing `\0` | +| `unknown_machine.elf` | accepted as opaque or rejected by policy | + +## 10. Cross-tool agreement + +For every `cases/*.c`, the structural diff oracle should pass against: + +- `clang -O0` and `clang -O2` +- `gcc -O0` and `gcc -O2` (when available) +- `binutils-as` output (hand-written `.s`) +- `lld` and `ld.bfd` linker outputs (for shared/exe variants) + +Any case that diverges across these is a bug in either cfree or the +normalizer — not allowed to silently `.xfail`. + +## 11. Behavioral / runtime + +`exec/` already covers: exit code, in-section call, ADRP+ADD load, +`.rodata` load, `.data` load, BSS, two-TU link. Extend with: + +| Case | Exercises | +|---|---| +| Static initializer order across TUs | INIT_ARRAY priority | +| Weak symbol replaced by strong | resolution rule | +| Common symbol coalescing | tentative-def merging | +| Inline function shared via COMDAT | group dedup | +| TLS variable read/written from two TUs | `.tdata` + TLS relocs end-to-end | +| `dlopen`-style runtime relocation (when shared lands) | dynamic relocs | +| `setjmp`/`longjmp` across compilation unit | unwind interaction | + +--- + +## Stratification + +When picking what to land next, the prioritization is: + +1. **★ Reloc-kind matrix per arch** — every kind cfree claims to support + needs unit + behavioral coverage. This is the single highest-leverage + gap. +2. **★ Symbol kind/visibility matrix** — every `STT_*` × `STB_*` × `STV_*` + combo we emit must round-trip. +3. **★ Section type matrix** — every `sh_type` we admit, especially + `NOBITS`, `GROUP`, `INIT_ARRAY`. +4. Special sections with semantic flags (`SHF_TLS`, `SHF_MERGE`, etc.). +5. Negative inputs (`bad/`). +6. Layout edge cases (large/extended). +7. Cross-tool agreement (clang vs gcc, lld vs ld.bfd). +8. Archive support. +9. DWARF semantic equivalence (deferred until the consumer side cares). + +A "complete" corpus has a row for every cell in groups 1–4 and at least +one representative for every cell in 5–7. diff --git a/test/elf/cases/02_static_func.c b/test/elf/cases/02_static_func.c @@ -0,0 +1,7 @@ +/* Exercises STB_LOCAL: a file-static function and a non-static caller. + * The static should appear in .symtab with STB_LOCAL binding and a + * section-relative value; the global should be STB_GLOBAL. */ + +static int add_one(int x) { return x + 1; } + +int result(void) { return add_one(41); } diff --git a/test/elf/cases/03_weak_def.c b/test/elf/cases/03_weak_def.c @@ -0,0 +1,8 @@ +/* Exercises STB_WEAK: a weak function definition and a weak object. + * Both must round-trip with STB_WEAK binding preserved. */ + +__attribute__((weak)) int weak_fn(void) { return 42; } + +__attribute__((weak)) int weak_var = 7; + +int call_weak(void) { return weak_fn() + weak_var; } diff --git a/test/elf/cases/04_hidden_sym.c b/test/elf/cases/04_hidden_sym.c @@ -0,0 +1,8 @@ +/* Exercises STV_HIDDEN: hidden-visibility function and object. + * Both symbols must round-trip with STV_HIDDEN in st_other. */ + +__attribute__((visibility("hidden"))) int hidden_val = 7; + +__attribute__((visibility("hidden"))) int hidden_fn(void) { return hidden_val; } + +int public_fn(void) { return hidden_fn(); } diff --git a/test/elf/cases/05_common_sym.c b/test/elf/cases/05_common_sym.c @@ -0,0 +1,9 @@ +/* Exercises SHN_COMMON / STT_COMMON: a tentative definition. + * With -fcommon, an uninitialized file-scope variable emits a COMMON + * symbol (SHN_COMMON, st_value = alignment). The roundtrip must + * preserve the COMMON shndx and the alignment in st_value. */ + +int tentative_var; /* SHN_COMMON, align=4 */ +long tentative_long; /* SHN_COMMON, align=8 */ + +int read_tentative(void) { return tentative_var + (int)tentative_long; } diff --git a/test/elf/cases/05_common_sym.cflags b/test/elf/cases/05_common_sym.cflags @@ -0,0 +1 @@ +-fcommon diff --git a/test/elf/cases/06_tls.c b/test/elf/cases/06_tls.c @@ -0,0 +1,8 @@ +/* Exercises SHF_TLS / STT_TLS: thread-local variables. + * An initialised __thread var goes to .tdata; an uninitialised one goes + * to .tbss. Both must round-trip with SHF_TLS set and STT_TLS binding. */ + +__thread int tls_inited = 5; /* .tdata */ +__thread int tls_uninited; /* .tbss */ + +int get_tls(void) { return tls_inited + tls_uninited; } diff --git a/test/elf/cases/07_init_array.c b/test/elf/cases/07_init_array.c @@ -0,0 +1,11 @@ +/* Exercises SHT_INIT_ARRAY / SHT_FINI_ARRAY via constructor / destructor + * attributes. The constructor and destructor function pointers must + * appear as relocations in the respective array sections and survive + * the roundtrip intact. */ + +static int state = 0; + +__attribute__((constructor)) static void ctor(void) { state |= 1; } +__attribute__((destructor)) static void dtor(void) { state &= ~1; } + +int was_initialized(void) { return state; } diff --git a/test/elf/cases/08_alias.c b/test/elf/cases/08_alias.c @@ -0,0 +1,7 @@ +/* Exercises aliased symbols: two names at one address. + * `aliased` must appear in the symbol table as a distinct STB_GLOBAL + * symbol with the same value (and section index) as `original`. */ + +int original(void) { return 42; } + +int aliased(void) __attribute__((alias("original"))); diff --git a/test/elf/cases/09_ifunc.c b/test/elf/cases/09_ifunc.c @@ -0,0 +1,11 @@ +/* Exercises STT_GNU_IFUNC: a function whose implementation is selected + * by a runtime resolver. The symbol must survive the roundtrip with + * type=IFUNC and the resolver name preserved in the relocation. */ + +static int impl_a(void) { return 42; } + +static void *resolve(void) { return impl_a; } + +int ifunc_fn(void) __attribute__((ifunc("resolve"))); + +int call_ifunc(void) { return ifunc_fn(); } diff --git a/test/elf/cases/10_func_sections.c b/test/elf/cases/10_func_sections.c @@ -0,0 +1,8 @@ +/* Exercises -ffunction-sections: each function lands in its own + * .text.<name> section. The roundtrip must preserve all three sections + * with their individual sizes, flags (AX), and the CALL26 reloc + * from fn_c into fn_a / fn_b. */ + +int fn_a(void) { return 1; } +int fn_b(void) { return 2; } +int fn_c(void) { return fn_a() + fn_b(); } diff --git a/test/elf/cases/10_func_sections.cflags b/test/elf/cases/10_func_sections.cflags @@ -0,0 +1 @@ +-ffunction-sections diff --git a/test/elf/cases/11_data_sections.c b/test/elf/cases/11_data_sections.c @@ -0,0 +1,9 @@ +/* Exercises -fdata-sections: each global variable lands in its own + * .data.<name> section. The roundtrip must preserve each section + * independently with the correct size, alignment, and AW flags. */ + +int var_a = 1; +int var_b = 2; +long var_long = 3; + +int sum_vars(void) { return var_a + var_b + (int)var_long; } diff --git a/test/elf/cases/11_data_sections.cflags b/test/elf/cases/11_data_sections.cflags @@ -0,0 +1 @@ +-fdata-sections diff --git a/test/elf/cases/12_merge_strings.c b/test/elf/cases/12_merge_strings.c @@ -0,0 +1,9 @@ +/* Exercises SHF_MERGE+SHF_STRINGS in .rodata.str1.1. + * String literals with internal linkage land in a mergeable string + * section. The roundtrip must preserve sh_flags (MS) and sh_entsize=1. + * The duplicate "hello" may or may not be merged by the compiler — + * the normalizer compares the section type/flags, not byte content. */ + +const char *str_a(void) { return "hello"; } +const char *str_b(void) { return "world"; } +const char *str_c(void) { return "hello"; } /* possible duplicate */ diff --git a/test/elf/cases/13_comdat.c b/test/elf/cases/13_comdat.c @@ -0,0 +1,10 @@ +/* Exercises SHT_GROUP (COMDAT): C++ inline functions create one COMDAT + * group per definition. The SHT_GROUP section and SHF_GROUP flag on + * every member must survive the roundtrip. + * + * Compiled as C++ (-x c++) so that `inline` has the standard C++ + * semantics that produce COMDAT groups. */ + +inline int comdat_fn() { return 42; } + +int call_comdat() { return comdat_fn(); } diff --git a/test/elf/cases/13_comdat.cflags b/test/elf/cases/13_comdat.cflags @@ -0,0 +1 @@ +-x c++ diff --git a/test/elf/exec/07_two_tus.c b/test/elf/exec/07_two_tus.c @@ -1,22 +0,0 @@ -/* Layer D: cross-TU symbol resolution. The harness only feeds one .c - * per case, so this exercise lives in a single file with two - * independently-emitted TUs joined via __attribute__((section)) on the - * defined int. Equivalent to two .c files with `extern int answer` on - * one side and `int answer = 42;` on the other — both end up in one - * translation unit at -O0 with separate function bodies clang lowers - * via PG_HI21+ADD_LO12 to .data. */ - -static void __attribute__((noreturn)) sys_exit(long code) -{ - register long x0 __asm__("x0") = code; - register long x8 __asm__("x8") = 93; /* __NR_exit */ - __asm__ volatile("svc #0" :: "r"(x0), "r"(x8)); - __builtin_unreachable(); -} - -int answer = 42; /* exported global */ - -void _start(void) -{ - sys_exit(answer); -} diff --git a/test/elf/exec/07_two_tus_a.c b/test/elf/exec/07_two_tus_a.c @@ -0,0 +1,18 @@ +/* Layer D, multi-TU: TU A defines `_start`, calls `add` from TU B, and + * exits with the result. Tests cross-TU SB_GLOBAL resolution + * (R_AARCH64_CALL26 against a definition in another input). */ + +extern int add(int, int); + +static void __attribute__((noreturn)) sys_exit(long code) +{ + register long x0 __asm__("x0") = code; + register long x8 __asm__("x8") = 93; /* __NR_exit */ + __asm__ volatile("svc #0" :: "r"(x0), "r"(x8)); + __builtin_unreachable(); +} + +void _start(void) +{ + sys_exit(add(20, 22)); /* exits 42 */ +} diff --git a/test/elf/exec/07_two_tus_b.c b/test/elf/exec/07_two_tus_b.c @@ -0,0 +1,3 @@ +/* Layer D, multi-TU: TU B defines the global `add` referenced by TU A. */ + +int add(int x, int y) { return x + y; } diff --git a/test/elf/exec/08_weak_sym_a.c b/test/elf/exec/08_weak_sym_a.c @@ -0,0 +1,22 @@ +/* Layer D, multi-TU: weak-symbol resolution. + * + * TU A provides a weak definition of get_val() (returns 1) and _start. + * TU B provides a strong definition of get_val() (returns 42). + * The linker must pick the strong definition; expected exit code: 42. */ + +extern int get_val(void); + +static void __attribute__((noreturn)) sys_exit(long code) +{ + register long x0 __asm__("x0") = code; + register long x8 __asm__("x8") = 93; /* __NR_exit */ + __asm__ volatile("svc #0" :: "r"(x0), "r"(x8)); + __builtin_unreachable(); +} + +__attribute__((weak)) int get_val(void) { return 1; } /* loses to TU B */ + +void _start(void) +{ + sys_exit(get_val()); /* expects 42 */ +} diff --git a/test/elf/exec/08_weak_sym_b.c b/test/elf/exec/08_weak_sym_b.c @@ -0,0 +1,3 @@ +/* Layer D, multi-TU: strong definition that overrides TU A's weak one. */ + +int get_val(void) { return 42; } diff --git a/test/elf/exec/09_common_sym.cflags b/test/elf/exec/09_common_sym.cflags @@ -0,0 +1 @@ +-fcommon diff --git a/test/elf/exec/09_common_sym_a.c b/test/elf/exec/09_common_sym_a.c @@ -0,0 +1,24 @@ +/* Layer D, multi-TU: COMMON symbol coalescing. + * + * Both TUs declare `int shared` without an initialiser (-fcommon). + * The linker allocates exactly one copy. TU A writes 42 to shared, + * then calls read_shared() from TU B which reads the same storage. + * Expected exit code: 42. */ + +int shared; /* SHN_COMMON */ + +extern int read_shared(void); + +static void __attribute__((noreturn)) sys_exit(long code) +{ + register long x0 __asm__("x0") = code; + register long x8 __asm__("x8") = 93; + __asm__ volatile("svc #0" :: "r"(x0), "r"(x8)); + __builtin_unreachable(); +} + +void _start(void) +{ + shared = 42; + sys_exit(read_shared()); /* expects 42 */ +} diff --git a/test/elf/exec/09_common_sym_b.c b/test/elf/exec/09_common_sym_b.c @@ -0,0 +1,6 @@ +/* Layer D, multi-TU: second tentative definition of `shared`. + * Merged with TU A's COMMON by the linker into a single BSS allocation. */ + +int shared; + +int read_shared(void) { return shared; } diff --git a/test/elf/exec/10_init_array.c b/test/elf/exec/10_init_array.c @@ -0,0 +1,31 @@ +/* Layer D: SHT_INIT_ARRAY — constructor runs before _start reads the flag. + * + * There is no libc in this freestanding binary, so _start must manually + * walk __init_array_start / __init_array_end (provided by the linker). + * This tests that the linker emits a well-formed .init_array section with + * a valid relocation pointing at ctor(). + * + * Expected exit code: 1 (ctor set init_ran=1 before _start read it). */ + +static int init_ran = 0; + +__attribute__((constructor)) static void ctor(void) { init_ran = 1; } + +typedef void (*init_fn_t)(void); +extern init_fn_t __init_array_start[]; +extern init_fn_t __init_array_end[]; + +static void __attribute__((noreturn)) sys_exit(long code) +{ + register long x0 __asm__("x0") = code; + register long x8 __asm__("x8") = 93; + __asm__ volatile("svc #0" :: "r"(x0), "r"(x8)); + __builtin_unreachable(); +} + +void _start(void) +{ + for (init_fn_t *f = __init_array_start; f < __init_array_end; f++) + (*f)(); + sys_exit(init_ran); /* 1 = ctor ran; 0 = init_array not called */ +} diff --git a/test/elf/exec/11_init_array_order.c b/test/elf/exec/11_init_array_order.c @@ -0,0 +1,42 @@ +/* Layer D: SHT_INIT_ARRAY priority ordering. + * + * Two constructors with explicit priorities: 101 and 102. Lower numbers + * run first. ctor_101 sets `order` to 1 iff it sees 0; ctor_102 sets + * `order` to 42 iff it sees 1. Correct ordering yields exit code 42. + * + * Sections emitted: .init_array.00101, .init_array.00102. + * The linker must sort them ascending before placing in the output. */ + +static int order = 0; + +__attribute__((constructor(101))) static void ctor_101(void) +{ + if (order == 0) order = 1; /* fires first: 0 → 1 */ +} + +__attribute__((constructor(102))) static void ctor_102(void) +{ + if (order == 1) order = 42; /* fires second: 1 → 42 */ +} + +typedef void (*init_fn_t)(void); +extern init_fn_t __init_array_start[]; +extern init_fn_t __init_array_end[]; + +static void __attribute__((noreturn)) sys_exit(long code) +{ + register long x0 __asm__("x0") = code; + register long x8 __asm__("x8") = 93; + __asm__ volatile("svc #0" :: "r"(x0), "r"(x8)); + __builtin_unreachable(); +} + +void _start(void) +{ + for (init_fn_t *f = __init_array_start; f < __init_array_end; f++) + (*f)(); + /* 42 = both ctors ran in correct order + * 1 = ctor_102 ran first (wrong) + * 0 = neither ran */ + sys_exit(order); +} diff --git a/test/elf/normalize.py b/test/elf/normalize.py @@ -131,6 +131,17 @@ def _normalize_rela(line): % (off, rtype, sym_name.strip())) +# llvm-readelf COMDAT group section header: +# COMDAT group section [ N] `.group' [symbol_name] contains M sections: +_GROUP_HDR_RE = re.compile( + r"^(COMDAT )?group section\s+\[\s*\d+\]\s+`([^']+)'\s+\[([^\]]+)\]", + re.IGNORECASE, +) +# Member entry inside a group block: +# [ N] .section_name +_GROUP_ENTRY_RE = re.compile(r"^\s*\[\s*(\d+)\]\s+(.+)$") + + # Lines whose presence is sensitive to layout choices but says nothing # semantic: count of headers, where they live, etc. Drop them entirely. _DROP_PREFIXES = ( @@ -156,7 +167,7 @@ def normalize(text): # Sort all block kinds; section ordering and symbol ordering are not # semantic. (Relocation sections within a relocation block are # already named, so sorting is fine.) - if cur_kind in ("shdr", "sym", "rela"): + if cur_kind in ("shdr", "sym", "rela", "group"): cur_block.sort() out_blocks.extend(cur_block) cur_block = [] @@ -184,6 +195,14 @@ def normalize(text): cur_kind = "shdr" out_blocks.append(line) continue + gm = _GROUP_HDR_RE.search(line) + if gm: + flush() + cur_kind = "group" + # Canonical heading preserves the group symbol name but drops + # the section index and member count — both are layout-dependent. + out_blocks.append("COMDAT group [%s]:\n" % gm.group(3)) + continue if any(line.startswith(p) for p in _DROP_PREFIXES): continue @@ -211,6 +230,16 @@ def normalize(text): if cur_kind == "rela": cur_block.append(_normalize_rela(line)) continue + if cur_kind == "group": + # Replace the numeric section index with <idx>; keep the name. + # "[Index] Name" header and blank lines pass through after + # index scrubbing so the block can be sorted by section name. + em = _GROUP_ENTRY_RE.match(line) + if em: + cur_block.append("[<idx>] %s\n" % em.group(2).strip()) + else: + cur_block.append(line) + continue # Default: scrub addresses outside section bodies too. line = _HEX_ADDR.sub("<addr>", line) diff --git a/test/elf/run.sh b/test/elf/run.sh @@ -209,7 +209,14 @@ else xfail=0 [ -f "${src%.c}.xfail" ] && xfail=1 - if ! clang --target=aarch64-linux-gnu -c -O0 "$src" -o "$wd/golden.o" \ + # Per-case extra compiler flags: drop a NN_name.cflags file alongside + # the .c to pass additional flags (e.g. -ffunction-sections, -x c++). + extra_cflags="" + # shellcheck disable=SC2034 + [ -f "${src%.c}.cflags" ] && extra_cflags="$(cat "${src%.c}.cflags")" + + # shellcheck disable=SC2086 + if ! clang --target=aarch64-linux-gnu -c -O0 $extra_cflags "$src" -o "$wd/golden.o" \ 2> "$wd/clang.log"; then note_skip "$name" "clang -c failed (cross-compile not configured?)" continue @@ -324,43 +331,121 @@ printf '\n' # ----- Layer D: exec/*.c — exe behavioral comparison -------------------- # -# clang -c case.c -> golden.o -# clang -static -nostdlib golden.o -> golden.exe (lld) -# cfree ld -o cfree.exe golden.o -# qemu-aarch64 golden.exe / cfree.exe; diff stdout/stderr/exit. +# Case grouping: all .c files sharing the same leading [0-9]+ prefix form +# ONE test case. They are compiled to separate .o files and linked +# together (multi-TU). Single-file cases keep working unchanged. +# +# Layout convention: +# exec/01_exit_0.c -> case "01_exit_0" (1 TU) +# exec/07_multi_a.c + 07_multi_b.c -> case "07_multi" (2 TUs) +# exec/<prefix>.xfail or <case-name>.xfail marks a case as expected-fail. +# +# The case name is the longest common prefix of the group's basenames +# with any trailing '_' stripped — so the multi-TU files share a stem +# (e.g. 07_multi_) while each ends in a unique disambiguator. +# +# Per case: +# clang -c <each .c> -> case_<n>.o +# clang -fuse-ld=lld -static -nostdlib *.o -> golden.exe +# cfree ld -o cfree.exe *.o +# <runner> golden.exe / cfree.exe; diff stdout/stderr/exit. printf 'Layer D — exe behavioral comparison\n' +shopt -s nullglob exec_srcs=( "$TEST_DIR"/exec/[0-9]*.c ) CFREE_BIN="${CFREE:-$ROOT/build/cfree}" if [ ${#exec_srcs[@]} -eq 0 ]; then printf ' (no exec cases yet)\n' else + # Build the unique sorted list of numeric prefixes. + declare -A seen_prefix=() + prefixes=() for src in "${exec_srcs[@]}"; do - name="exec/$(basename "$src" .c)" + bn="$(basename "$src" .c)" + prefix="${bn%%[!0-9]*}" # leading digits + if [ -z "$prefix" ]; then continue; fi + if [ -z "${seen_prefix[$prefix]:-}" ]; then + seen_prefix[$prefix]=1 + prefixes+=("$prefix") + fi + done + IFS=$'\n' prefixes=( $(printf '%s\n' "${prefixes[@]}" | sort) ); unset IFS + + for prefix in "${prefixes[@]}"; do + # Gather sorted group members. + group=( "$TEST_DIR"/exec/${prefix}_*.c "$TEST_DIR"/exec/${prefix}.c ) + # Filter non-existent (the second pattern usually doesn't match). + real_group=() + for f in "${group[@]}"; do [ -f "$f" ] && real_group+=("$f"); done + if [ ${#real_group[@]} -eq 0 ]; then continue; fi + IFS=$'\n' real_group=( $(printf '%s\n' "${real_group[@]}" | sort) ); unset IFS + + # Case name = longest common prefix of basenames, trimmed of trailing '_'. + first_bn="$(basename "${real_group[0]}" .c)" + case_name="$first_bn" + for f in "${real_group[@]:1}"; do + bn="$(basename "$f" .c)" + new="" + n=0 + while [ $n -lt ${#case_name} ] && [ $n -lt ${#bn} ] \ + && [ "${case_name:$n:1}" = "${bn:$n:1}" ]; do + new+="${case_name:$n:1}" + n=$((n+1)) + done + case_name="$new" + done + case_name="${case_name%_}" + if [ -z "$case_name" ]; then case_name="$prefix"; fi + + name="exec/$case_name" if [ $have_clang -ne 1 ]; then note_skip "$name" "clang missing"; continue; fi if [ $have_runner -ne 1 ]; then note_skip "$name" "no aarch64 runner (qemu/podman)"; continue; fi if [ ! -x "$CFREE_BIN" ]; then note_skip "$name" "cfree binary not built"; continue; fi - stem="$(basename "$src" .c)" - wd="$BUILD_DIR/exec_$stem" + wd="$BUILD_DIR/exec_$case_name" mkdir -p "$wd" + # xfail sentinel: <case-name>.xfail (sits beside any one of the .c + # files in the group) marks the whole case expected-fail. xfail=0 - [ -f "${src%.c}.xfail" ] && xfail=1 + [ -f "$TEST_DIR/exec/$case_name.xfail" ] && xfail=1 + for f in "${real_group[@]}"; do + [ -f "${f%.c}.xfail" ] && xfail=1 + done + + # Per-case extra compiler flags: <case_name>.cflags beside any group + # member, or named after the shared numeric prefix. + exec_extra_cflags="" + if [ -f "$TEST_DIR/exec/$case_name.cflags" ]; then + exec_extra_cflags="$(cat "$TEST_DIR/exec/$case_name.cflags")" + fi - if ! clang --target=aarch64-linux-gnu -c -O0 -ffreestanding -fno-pic \ - "$src" -o "$wd/case.o" 2> "$wd/clang_c.log"; then + # Compile each TU to its own .o. + objs=() + compile_ok=1 + for f in "${real_group[@]}"; do + obj="$wd/$(basename "$f" .c).o" + # shellcheck disable=SC2086 + if ! clang --target=aarch64-linux-gnu -c -O0 -ffreestanding -fno-pic \ + $exec_extra_cflags \ + "$f" -o "$obj" 2> "$wd/clang_c_$(basename "$f" .c).log"; then + compile_ok=0 + break + fi + objs+=("$obj") + done + if [ $compile_ok -ne 1 ]; then note_skip "$name" "clang -c failed (cross-compile not configured?)" continue fi if ! clang --target=aarch64-linux-gnu -fuse-ld=lld -static -nostdlib \ - "$wd/case.o" -o "$wd/golden.exe" 2> "$wd/link_golden.log"; then + "${objs[@]}" -o "$wd/golden.exe" 2> "$wd/link_golden.log"; then note_skip "$name" "clang/lld link of golden.exe failed" continue fi - if ! "$CFREE_BIN" ld -o "$wd/cfree.exe" "$wd/case.o" \ + if ! "$CFREE_BIN" ld -o "$wd/cfree.exe" "${objs[@]}" \ 2> "$wd/link_cfree.log"; then if [ $xfail -eq 1 ]; then note_pass "$name (xfail: cfree ld rejected)"