kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit dd8f6b240bea8e165edc47804ba5177f0938abbd
parent f7300171f7b5734a14ac2248c32c4af8b8f799e8
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat,  9 May 2026 10:46:46 -0700

link: advance JIT/exec linker; fix build staleness masking test results

Brings the linker + JIT path far enough to run most of test/link/cases on
AArch64 (J path), and removes the build-system gaps that masked which
changes were actually live. Major pieces:

- link_layout: emit __init/fini_array_start/end as boundary symbols;
  COMMON coalescing + BSS allocation; keep INIT/FINI_ARRAY sections in
  the kept-section filter.
- link_jit: page size from sysconf instead of hard-coded 0x1000 (fixes
  mprotect on macOS/Apple Silicon 16K pages); run init_array post-mprotect;
  reverse-walk fini_array in cfree_jit_run_dtors.
- link_reloc, obj/elf: new RelocKind entries for R_AARCH64_JUMP26 and
  R_AARCH64_ADR_GOT_PAGE / R_AARCH64_LD64_GOT_LO12_NC pass-through;
  JUMP26 falls through to the CALL26 handler.
- Build: -MMD -MP header dependency tracking; ar archive replaced (not
  appended) so deleted .c files don't leave stale entries; harness
  binaries (cfree-roundtrip, link-exe-runner, jit-runner) declared as
  Make targets with $(LIB_AR) prerequisite so test-link rebuilds them
  deterministically.
- test/{elf,link}/run.sh: locate Make-built harness binaries instead of
  rebuilding their own copies.
- test-lib-deps: enforce libcfree.a's external symbol allowlist.
- doc/linker-status.md: current J-path baseline and remaining blockers
  (intermittent JIT timeouts, static GOT, archive loading, gc-sections).

Diffstat:
MMakefile | 12++++++++++--
Adoc/linker-status.md | 149+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ascripts/lib_external_deps.py | 58++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/api/pipeline.c | 3+++
Msrc/link/link.h | 1+
Msrc/link/link_jit.c | 44+++++++++++++++++++++++++++++++++++++-------
Msrc/link/link_layout.c | 203+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
Msrc/link/link_reloc.c | 3++-
Msrc/obj/elf.h | 3+++
Msrc/obj/elf_reloc_aarch64.c | 6++++++
Msrc/obj/obj.h | 4+++-
Mtest/elf/run.sh | 26++++----------------------
Atest/lib_deps.allowlist | 11+++++++++++
Mtest/link/run.sh | 29++++++++++++++++-------------
Mtest/test.mk | 46+++++++++++++++++++++++++++++++++++++++-------
15 files changed, 531 insertions(+), 67 deletions(-)

diff --git a/Makefile b/Makefile @@ -13,9 +13,11 @@ DRIVER_CFLAGS = $(CFLAGS_COMMON) -Iinclude LIB_SRCS = $(shell find src -name '*.c') LIB_OBJS = $(patsubst src/%.c,build/lib/%.o,$(LIB_SRCS)) +LIB_DEPS = $(LIB_OBJS:.o=.d) DRIVER_SRCS = $(wildcard driver/*.c) DRIVER_OBJS = $(patsubst driver/%.c,build/driver/%.o,$(DRIVER_SRCS)) +DRIVER_DEPS = $(DRIVER_OBJS:.o=.d) LIB_AR = build/libcfree.a BIN = build/cfree @@ -34,8 +36,11 @@ driver: $(DRIVER_OBJS) bin: $(BIN) +# Replace the archive (`ar rcs` only adds/updates), so removing a .c file +# also removes its .o from the archive on the next build. $(LIB_AR): $(LIB_OBJS) @mkdir -p $(dir $@) + @rm -f $@ ar rcs $@ $(LIB_OBJS) $(BIN): $(DRIVER_OBJS) $(LIB_AR) @@ -43,13 +48,16 @@ $(BIN): $(DRIVER_OBJS) $(LIB_AR) build/lib/%.o: src/%.c @mkdir -p $(dir $@) - $(CC) $(LIB_CFLAGS) -c $< -o $@ + $(CC) $(LIB_CFLAGS) -MMD -MP -c $< -o $@ build/driver/%.o: driver/%.c @mkdir -p $(dir $@) - $(CC) $(DRIVER_CFLAGS) -c $< -o $@ + $(CC) $(DRIVER_CFLAGS) -MMD -MP -c $< -o $@ clean: rm -rf build +-include $(LIB_DEPS) +-include $(DRIVER_DEPS) + include test/test.mk diff --git a/doc/linker-status.md b/doc/linker-status.md @@ -0,0 +1,149 @@ +# Linker / JIT status — `make test-link` + +## Test score + +| State | Pass | Fail | Skip | +|-------|------|------|------| +| Before session 1 | 0 | 0 | 88 (detection bug) | +| End of session 1 (claimed, unverified) | 66 | 20 | 0 | +| Current J-path baseline (clean rebuild) | ~19 | ~11 | — | +| Target | 88 | 0 | 0 | + +> J-path-only counts above (one path per case, run on Apple Silicon host). +> R and E paths not measured this session — E requires qemu/podman per case +> and was too slow to iterate against; R is gated on the harness binaries +> rebuilding correctly, which session 1 left broken. + +--- + +## Session 2 changes + +### Build system: stale artifacts no longer mask correctness + +The session-1 "JIT lookup returns NULL for all cases" blocker turned out to +be a build-staleness artifact, not a real bug. After `make clean && make +lib`, JIT lookup works correctly — the symhash code was always fine. The +session lost time chasing this because the Makefile's dependency tracking +was incomplete. + +Fixes: + +| Fix | Why | +|-----|-----| +| `Makefile`: `-MMD -MP` on every compile + `-include $(DEPS)` | Touching a header now rebuilds dependent .o files. Without this, `link.h` / `link_internal.h` edits silently produced mixed-version archives. | +| `Makefile`: `rm -f $(LIB_AR)` before `ar rcs` | `ar rcs` is additive; deleted .c files would otherwise leave stale .o entries in the archive. | +| `test/test.mk`: `cfree-roundtrip`, `link-exe-runner`, `jit-runner` declared as Make targets with `$(LIB_AR)` prerequisite | `test-link` now rebuilds the harness automatically when libcfree changes. The previous `run.sh`-driven build skipped `cfree-roundtrip` entirely (only `test-elf` rebuilt it). | +| `test/link/run.sh`, `test/elf/run.sh`: locate (not build) harness binaries | Single source of truth for the rebuild rule lives in the Makefile. | +| `src/api/pipeline.c`: add missing `R_AARCH64_JUMP26`, `R_AARCH64_ADR_GOT_PAGE`, `R_AARCH64_LD64_GOT_LO12_NC` cases to `reloc_kind_name` switch | `-Werror=switch` broke `make clean && make lib` until the new RelocKind enum entries were handled. | + +Verification: +- `touch src/link/link.h && make lib` rebuilds 7 source files (was: 0). +- `touch src/link/link.h && make build/test/jit-runner` chains through libcfree to the harness. + +### J-path baseline after clean rebuild + +After a fully clean build (`make clean && make test-link`), the J-path +test scores fluctuate between roughly 18 and 23 passing depending on +intermittent timeouts (see Blocker 0). + +Stable failures (always fail, expected feature gaps): + +| Case | Symptom | Cause | +|------|---------|-------| +| `14_weak_present`, `16_weak_undef` | exit 1 | static GOT not implemented | +| `25_gc_sections` | exit 1 | `--gc-sections` accepted but ignored | +| `26_archive_demand`, `27_archive_whole` | exit 1 | `link_add_archive_bytes` panics | + +Intermittent failures (the real new blocker): + +| Cases | Symptom | +|-------|---------| +| `02`, `04`, `05`, `06`, `07`, `09`, `13`, `15`, `18`, `20`, `22`, `23`, `24`, `27`, `29`, ... | exit 124 (5-second timeout). Different cases hang on different runs. Even `29_jit_lookup_miss` (just `return 0;`) sometimes hangs. Stress-running `04_rodata_u32` 20 times in a loop reproduces ~30% hang rate. | + +--- + +## Blockers (in priority order) + +### 0 — Intermittent JIT timeouts (NEW, blocks everything else) + +Cases with no flow-control of their own (e.g., `04_rodata_u32` reads a +const, returns 0 or 1) hang ~30% of the time. Different cases hang on +different runs, including trivially-correct cases like `29_jit_lookup_miss`. + +**Hypotheses to investigate** (in order of likelihood): + +1. **icache / dcache coherency on Apple Silicon.** `cfree_jit_from_image` + calls `__builtin___clear_cache(base, base + map_size)` *before* + `mprotect`. On ARMv8 the standard sequence is `dc cvau; dsb ish; ic + ivau; dsb ish; isb`, and the `isb` should follow the permission flip, + not precede it. Try moving the flush to after mprotect, or add a + manual `__asm__("isb")` after each `mprotect`. +2. **macOS hardened-runtime / `MAP_JIT`.** mmap'd RW pages flipped to RX + via `mprotect` work without `MAP_JIT` for unsigned binaries, but the + transition may be racy. Try `mmap(..., MAP_JIT, ...)` and + `pthread_jit_write_protect_np` instead of mprotect. +3. **Address-space layout interaction.** ASLR varies the mmap base. Some + bases may produce ADRP encodings that look correct but interact badly + with the BTI / PAC defaults on Apple Silicon. Check whether the hang + correlates with specific high bits of `jit->base`. + +**Next step**: when a hang reproduces, attach `lldb` (or `sample(1)`) to +the hung pid and capture the PC. If PC is inside `test_main`, the relocs +landed wrong. If PC is at the entry but executing stale instructions, +it's a cache-coherency issue. + +### 1 — Static GOT (cases 14, 15, 16) + +Unchanged from session 1 plan. `weak_present`, `weak_override`, +`weak_undef` use GOT-indirect relocs even with `-fno-PIC` because clang +always GOT-routes weak externs. + +**Design**: +- During `emit_reloc_records`, collect symbols referenced via + `R_AARCH64_ADR_GOT_PAGE` / `R_AARCH64_LD64_GOT_LO12_NC`. Record unique + GOT-needing symbol ids. +- After `layout_sections` / `layout_commons`, append a synthetic 8-byte + `.got` section to the RW segment. Store GOT-slot vaddrs in a + `got_vaddr[]` array indexed by `LinkSymId`. +- In `link_reloc_apply`, `ADR_GOT_PAGE` uses the GOT slot's page + address; `LD64_GOT_LO12_NC` uses its page-aligned offset. GOT slots + hold the symbol's final address (0 for undefined weak). + +**Cleanup once GOT lands**: drop `-fno-PIC -fno-pie` from +`test/link/run.sh` (both test source and `start.c` lines). + +### 2 — Archive loading (cases 26, 27) + +Unchanged. `cfree_ar_iter` already exists in `src/api/ar.c`. Wire it up +in `link_add_archive_bytes`: + +- **`whole_archive`**: iterate all members, `link_add_obj_bytes` each. +- **demand-loading**: build a name→member-offset index from the `/` + member; after `resolve_symbols` identifies undefs, pull the member + defining each undef, re-run resolution, repeat until stable. Needs a + "any undefs remaining?" query in the resolve path. + +### 3 — GC sections (case 25) + +Unchanged. Before `layout_sections`, BFS from the entry symbol (and +SSEM_INIT_ARRAY / SSEM_FINI_ARRAY roots) over the reloc graph; mark +sections that aren't reached and drop them in `section_kept`. + +--- + +## fini_array note (was Blocker 2) + +Session 1's listed fini_array SIGSEGV blocker (cases 21/22/23) does not +reproduce after the build-staleness fix; those cases pass cleanly when +they don't hit the intermittent timeout from Blocker 0. Removed from +the priority list. + +--- + +## Dependency order + +1. **Intermittent timeouts (Blocker 0)** — until cases run deterministically + the other fixes can't be measured. +2. **GOT** → unblocks 14/15/16 (and lets the `-fno-PIC` workaround go). +3. **Archive loading** → unblocks 26/27. +4. **GC sections** → unblocks 25. diff --git a/scripts/lib_external_deps.py b/scripts/lib_external_deps.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +"""Print external symbols that an archive (.a) depends on. + +Runs `nm` over the archive, then reports symbols that are undefined in some +member but not defined by any other member of the same archive. +""" + +import argparse +import subprocess +import sys + + +def external_deps(archive: str, nm: str = "nm") -> list[str]: + out = subprocess.run( + [nm, archive], check=True, capture_output=True, text=True + ).stdout + + defined: set[str] = set() + undefined: set[str] = set() + + for line in out.splitlines(): + line = line.rstrip() + if not line or line.endswith(":"): + # Blank line or "<member>.o:" header. + continue + parts = line.split(None, 2) + if len(parts) < 2: + continue + if len(parts) == 2: + # Undefined: "<type> <name>" (no address column). + type_, name = parts + else: + # Defined: "<addr> <type> <name>". + _, type_, name = parts + + if type_ == "U": + undefined.add(name) + elif type_.isupper(): + # Global definition (T, D, B, R, S, A, C, ...). + defined.add(name) + # Lowercase = local (file-scope); irrelevant for archive linkage. + + return sorted(undefined - defined) + + +def main() -> int: + ap = argparse.ArgumentParser(description=__doc__) + ap.add_argument("archive", help="Path to a static archive (.a)") + ap.add_argument("--nm", default="nm", help="nm binary to use") + args = ap.parse_args() + + for sym in external_deps(args.archive, args.nm): + print(sym) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/api/pipeline.c b/src/api/pipeline.c @@ -959,6 +959,9 @@ static const char* reloc_kind_name(u16 kind) case R_ARM_MOVT: return "R_ARM_MOVT"; case R_ARM_B26: return "R_ARM_B26"; case R_AARCH64_CALL26: return "R_AARCH64_CALL26"; + case R_AARCH64_JUMP26: return "R_AARCH64_JUMP26"; + case R_AARCH64_ADR_GOT_PAGE: return "R_AARCH64_ADR_GOT_PAGE"; + case R_AARCH64_LD64_GOT_LO12_NC: return "R_AARCH64_LD64_GOT_LO12_NC"; case R_AARCH64_ADR_PREL_PG_HI21: return "R_AARCH64_ADR_PREL_PG_HI21"; case R_AARCH64_ADD_ABS_LO12_NC: return "R_AARCH64_ADD_ABS_LO12_NC"; case R_AARCH64_LDST8_ABS_LO12_NC: return "R_AARCH64_LDST8_ABS_LO12_NC"; diff --git a/src/link/link.h b/src/link/link.h @@ -42,6 +42,7 @@ typedef struct LinkSymbol { u64 value; u64 vaddr; /* final linked address, 0 for unresolved undef */ u64 size; + u32 common_align; /* alignment for SK_COMMON symbols */ u8 bind; /* SymBind */ u8 kind; /* SymKind */ u8 defined; diff --git a/src/link/link_jit.c b/src/link/link_jit.c @@ -23,10 +23,14 @@ static SrcLoc no_loc(void) { SrcLoc l = {0,0,0}; return l; } -#define JIT_PAGE_SIZE 0x1000u - static u64 align_up_u64(u64 v, u64 a) { return (v + (a - 1u)) & ~(a - 1u); } +static u64 jit_page_size(void) +{ + long p = sysconf(_SC_PAGESIZE); + return (p > 0) ? (u64)p : 0x4000u; +} + struct CfreeJit { Compiler* c; LinkImage* image; @@ -62,10 +66,10 @@ CfreeJit* cfree_jit_from_image(LinkImage* img) } for (i = 0; i < img->nsegments; ++i) { const LinkSegment* seg = &img->segments[i]; - u64 end = seg->vaddr + align_up_u64(seg->mem_size, JIT_PAGE_SIZE); + u64 end = seg->vaddr + align_up_u64(seg->mem_size, jit_page_size()); if (end > map_size) map_size = (size_t)end; } - map_size = (size_t)align_up_u64((u64)map_size, JIT_PAGE_SIZE); + map_size = (size_t)align_up_u64((u64)map_size, jit_page_size()); base = mmap(NULL, map_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); @@ -111,7 +115,7 @@ CfreeJit* cfree_jit_from_image(LinkImage* img) /* Flip permissions per segment. */ for (i = 0; i < img->nsegments; ++i) { const LinkSegment* seg = &img->segments[i]; - size_t mlen = (size_t)align_up_u64(seg->mem_size, JIT_PAGE_SIZE); + size_t mlen = (size_t)align_up_u64(seg->mem_size, jit_page_size()); if (mprotect((u8*)base + seg->vaddr, mlen, perms_for(seg->flags)) != 0) { munmap(base, map_size); @@ -137,6 +141,20 @@ CfreeJit* cfree_jit_from_image(LinkImage* img) compiler_undefer(c, img->deferred); img->deferred = NULL; } + + /* Run .init_array constructors in forward order. */ + { + typedef void (*VoidFn)(void); + void* p_start = cfree_jit_lookup(jit, "__init_array_start"); + void* p_end = cfree_jit_lookup(jit, "__init_array_end"); + if (p_start && p_end) { + VoidFn* fn = (VoidFn*)p_start; + VoidFn* end = (VoidFn*)p_end; + for (; fn != end; ++fn) + if (*fn) (*fn)(); + } + } + return jit; } @@ -204,6 +222,18 @@ void cfree_jit_sym_iter_free(CfreeJitSymIter* it) void cfree_jit_run_dtors(CfreeJit* jit) { - (void)jit; - /* TODO: walk fini_array sections in reverse and call each fn pointer. */ + typedef void (*VoidFn)(void); + void* p_start; + void* p_end; + if (!jit) return; + p_start = cfree_jit_lookup(jit, "__fini_array_start"); + p_end = cfree_jit_lookup(jit, "__fini_array_end"); + if (p_start && p_end) { + VoidFn* begin = (VoidFn*)p_start; + VoidFn* fn = (VoidFn*)p_end; + while (fn != begin) { + --fn; + if (*fn) (*fn)(); + } + } } diff --git a/src/link/link_layout.c b/src/link/link_layout.c @@ -15,12 +15,17 @@ #include "core/pool.h" #include <string.h> +#include <unistd.h> LinkImage* link_image_alloc(Compiler*); /* defined in link.c */ static SrcLoc no_loc(void) { SrcLoc l = {0,0,0}; return l; } -#define PAGE_SIZE 0x1000u +static u64 layout_page_size(void) +{ + long p = sysconf(_SC_PAGESIZE); + return (p > 0) ? (u64)p : 0x4000u; +} /* Three-bucket segment partitioning by permission. */ typedef enum SegBucket { @@ -39,6 +44,7 @@ static int section_kept(const Section* s) * them participate in a static ET_EXEC layout. */ if (!(s->flags & SF_ALLOC)) return 0; if (s->sem == SSEM_PROGBITS || s->sem == SSEM_NOBITS) return 1; + if (s->sem == SSEM_INIT_ARRAY || s->sem == SSEM_FINI_ARRAY) return 1; return 0; } @@ -164,16 +170,17 @@ static void resolve_symbols(Linker* l, LinkImage* img) int is_def = (s->kind != SK_UNDEF); memset(&rec, 0, sizeof(rec)); - rec.name = s->name; - rec.input_id = in->id; - rec.obj_sym = e.id; - rec.section_id = LINK_SEC_NONE; /* filled later */ - rec.value = s->value; - rec.size = s->size; - rec.bind = (u8)s->bind; - rec.kind = (u8)s->kind; - rec.defined = (u8)is_def; - rec.vaddr = 0; + rec.name = s->name; + rec.input_id = in->id; + rec.obj_sym = e.id; + rec.section_id = LINK_SEC_NONE; /* filled later */ + rec.value = s->value; + rec.size = s->size; + rec.common_align = (s->kind == SK_COMMON) ? (u32)s->common_align : 0u; + rec.bind = (u8)s->bind; + rec.kind = (u8)s->kind; + rec.defined = (u8)is_def; + rec.vaddr = 0; if (is_def && (s->bind == SB_GLOBAL || s->bind == SB_WEAK) && s->name != 0) { @@ -187,7 +194,25 @@ static void resolve_symbols(Linker* l, LinkImage* img) LinkSymbol* prev = &img->syms[existing - 1]; int new_strength = bind_strength((u8)s->bind); int old_strength = bind_strength(prev->bind); - if (new_strength > old_strength) { + /* COMMON symbols coalesce: largest size wins. */ + if (prev->kind == SK_COMMON && rec.kind == SK_COMMON) { + if (rec.size > prev->size) { + u32 new_align = (rec.common_align > prev->common_align) + ? rec.common_align : prev->common_align; + rec.id = existing; + rec.common_align = new_align; + *prev = rec; + } + m->sym[e.id] = existing; + } else if (rec.kind == SK_COMMON) { + /* Strong def beats COMMON — keep existing. */ + m->sym[e.id] = existing; + } else if (prev->kind == SK_COMMON) { + /* Strong def beats COMMON — override. */ + rec.id = existing; + *prev = rec; + m->sym[e.id] = existing; + } else if (new_strength > old_strength) { /* This def wins; keep the existing LinkSymId * stable so prior references resolve, but * update the contents. */ @@ -411,7 +436,7 @@ static void layout_sections(Linker* l, LinkImage* img) u32 perms; if (!seg_count[b]) continue; align = (u64)seg_align[b]; - if (align < PAGE_SIZE) align = PAGE_SIZE; + if (align < layout_page_size()) align = layout_page_size(); cursor = align_up_u64(cursor, align); seg = &img->segments[img->nsegments]; @@ -465,6 +490,88 @@ static void layout_sections(Linker* l, LinkImage* img) } } +/* ---- pass 2b: COMMON symbol BSS allocation ---- */ +/* After segments are laid out, extend the RW segment's BSS area to + * accommodate any SK_COMMON symbols that have no section yet. */ +static void layout_commons(Linker* l, LinkImage* img) +{ + u32 i; + (void)l; + LinkSegment* rw_seg = NULL; + + /* Find the RW segment. */ + for (i = 0; i < img->nsegments; ++i) { + if (img->segments[i].flags & SF_WRITE) { + rw_seg = &img->segments[i]; + break; + } + } + + /* First pass: check if we even have COMMON symbols. */ + { + int has_common = 0; + for (i = 0; i < img->nsyms; ++i) + if (img->syms[i].kind == SK_COMMON && img->syms[i].defined) { has_common = 1; break; } + if (!has_common) return; + } + + /* If no RW segment exists, create one. */ + if (!rw_seg) { + u32 nseg = img->nsegments + 1u; + LinkSegment* segs; + u8** sbufs; + size_t* scaps; + u64 vaddr = 0; + for (i = 0; i < img->nsegments; ++i) { + u64 end = img->segments[i].vaddr + img->segments[i].mem_size; + if (end > vaddr) vaddr = end; + } + vaddr = align_up_u64(vaddr, layout_page_size()); + segs = (LinkSegment*)img->heap->realloc(img->heap, img->segments, + sizeof(*img->segments) * img->nsegments, + sizeof(*img->segments) * nseg, _Alignof(LinkSegment)); + sbufs = (u8**)img->heap->realloc(img->heap, img->segment_bytes, + sizeof(*img->segment_bytes) * img->nsegments, + sizeof(*img->segment_bytes) * nseg, _Alignof(u8*)); + scaps = (size_t*)img->heap->realloc(img->heap, img->segment_bytes_cap, + sizeof(*img->segment_bytes_cap) * img->nsegments, + sizeof(*img->segment_bytes_cap) * nseg, _Alignof(size_t)); + if (!segs || !sbufs || !scaps) + compiler_panic(img->c, no_loc(), "link: oom on new RW segment for COMMON"); + img->segments = segs; + img->segment_bytes = sbufs; + img->segment_bytes_cap = scaps; + rw_seg = &img->segments[img->nsegments]; + memset(rw_seg, 0, sizeof(*rw_seg)); + rw_seg->id = (LinkSegmentId)(img->nsegments + 1u); + rw_seg->flags = SF_ALLOC | SF_WRITE; + rw_seg->vaddr = vaddr; + rw_seg->file_offset = vaddr; + rw_seg->file_size = 0; + rw_seg->mem_size = 0; + rw_seg->align = (u32)layout_page_size(); + img->segment_bytes[img->nsegments] = NULL; + img->segment_bytes_cap[img->nsegments] = 0; + img->nsegments++; + } + + /* Allocate BSS space for each COMMON symbol after file_size. */ + { + u64 bss_cursor = rw_seg->vaddr + rw_seg->mem_size; + for (i = 0; i < img->nsyms; ++i) { + LinkSymbol* s = &img->syms[i]; + u32 align; + if (s->kind != SK_COMMON || !s->defined) continue; + align = s->common_align ? s->common_align : 1u; + bss_cursor = align_up_u64(bss_cursor, align); + s->vaddr = bss_cursor; + bss_cursor += s->size ? s->size : 1u; + s->kind = SK_OBJ; /* no longer COMMON once placed */ + } + rw_seg->mem_size = bss_cursor - rw_seg->vaddr; + } +} + /* Copy each input section's bytes into its segment buffer. BSS * sections contribute no file bytes. */ static void emit_segment_bytes(Linker* l, LinkImage* img) @@ -547,6 +654,71 @@ static void link_symbols_to_sections(Linker* l, LinkImage* img) } } +/* ---- pass 3b: linker-synthesized boundary symbols ---- */ + +static void emit_boundary_sym(Linker* l, LinkImage* img, + const char* name, u64 vaddr) +{ + Sym sym = pool_intern_cstr(l->c->global, name); + LinkSymId id = symhash_get(&img->globals, sym); + LinkSymbol rec; + memset(&rec, 0, sizeof(rec)); + rec.name = sym; + rec.kind = SK_OBJ; + rec.defined = 1; + rec.vaddr = vaddr; + rec.bind = SB_GLOBAL; + if (id != LINK_SYM_NONE) { + /* Satisfy any existing undef reference. */ + img->syms[id - 1] = rec; + img->syms[id - 1].id = id; + } else { + LinkSymId fresh = append_symbol(img, &rec); + symhash_insert(&img->globals, sym, fresh, &id); + } +} + +static void emit_array_boundaries(Linker* l, LinkImage* img) +{ + u32 ii, j; + /* Per-semantic: track [min_vaddr, max_vaddr+size]. */ + u64 init_start = (u64)-1, init_end = 0; + u64 fini_start = (u64)-1, fini_end = 0; + + for (ii = 0; ii < l->ninputs; ++ii) { + ObjBuilder* ob = l->inputs[ii].obj; + InputMap* m = &img->input_maps[ii]; + for (j = 1; j < obj_section_count(ob); ++j) { + const Section* s = obj_section_get(ob, j); + LinkSectionId ls_id; + const LinkSection* ls; + u64 start, end; + if (!s) continue; + if (s->sem != SSEM_INIT_ARRAY && s->sem != SSEM_FINI_ARRAY) continue; + ls_id = m->section[j]; + if (ls_id == LINK_SEC_NONE) continue; + ls = &img->sections[ls_id - 1]; + start = ls->vaddr; + end = ls->vaddr + ls->size; + if (s->sem == SSEM_INIT_ARRAY) { + if (start < init_start) init_start = start; + if (end > init_end) init_end = end; + } else { + if (start < fini_start) fini_start = start; + if (end > fini_end) fini_end = end; + } + } + } + + if (init_start == (u64)-1) { init_start = 0; init_end = 0; } + if (fini_start == (u64)-1) { fini_start = 0; fini_end = 0; } + + emit_boundary_sym(l, img, "__init_array_start", init_start); + emit_boundary_sym(l, img, "__init_array_end", init_end); + emit_boundary_sym(l, img, "__fini_array_start", fini_start); + emit_boundary_sym(l, img, "__fini_array_end", fini_end); +} + /* ---- pass 4: relocation records ---- */ static u8 reloc_width(RelocKind k) @@ -556,7 +728,8 @@ static u8 reloc_width(RelocKind k) return 4; case R_ABS64: case R_REL64: case R_PC64: return 8; - case R_AARCH64_CALL26: case R_AARCH64_ADR_PREL_PG_HI21: + case R_AARCH64_JUMP26: case R_AARCH64_CALL26: + case R_AARCH64_ADR_PREL_PG_HI21: case R_AARCH64_ADD_ABS_LO12_NC: case R_AARCH64_LDST8_ABS_LO12_NC: case R_AARCH64_LDST16_ABS_LO12_NC: @@ -666,8 +839,10 @@ LinkImage* link_resolve(Linker* l) resolve_symbols(l, img); layout_sections(l, img); + layout_commons(l, img); emit_segment_bytes(l, img); link_symbols_to_sections(l, img); + emit_array_boundaries(l, img); resolve_undefs(l, img); emit_reloc_records(l, img); resolve_entry(l, img); diff --git a/src/link/link_reloc.c b/src/link/link_reloc.c @@ -53,8 +53,9 @@ void link_reloc_apply(Compiler* c, RelocKind k, u8* P_bytes, wr_u32_le(P_bytes, (u32)((u64)v & 0xffffffffu)); return; } + case R_AARCH64_JUMP26: case R_AARCH64_CALL26: { - /* BL/B imm26 — branch displacement in 4-byte units, signed. + /* B/BL imm26 — branch displacement in 4-byte units, signed. * Clear bits [25:0] of the existing instruction and OR in the * new imm26. Range check: ±128MiB. */ i64 disp = (i64)S + A - (i64)P; diff --git a/src/obj/elf.h b/src/obj/elf.h @@ -127,11 +127,14 @@ #define ELF_R_AARCH64_ADR_PREL_PG_HI21 275 #define ELF_R_AARCH64_ADD_ABS_LO12_NC 277 #define ELF_R_AARCH64_LDST8_ABS_LO12_NC 278 +#define ELF_R_AARCH64_JUMP26 282 #define ELF_R_AARCH64_CALL26 283 #define ELF_R_AARCH64_LDST16_ABS_LO12_NC 284 #define ELF_R_AARCH64_LDST32_ABS_LO12_NC 285 #define ELF_R_AARCH64_LDST64_ABS_LO12_NC 286 #define ELF_R_AARCH64_LDST128_ABS_LO12_NC 299 +#define ELF_R_AARCH64_ADR_GOT_PAGE 311 +#define ELF_R_AARCH64_LD64_GOT_LO12_NC 312 /* Map cfree-canonical RelocKind <-> AArch64 ELF reloc type. Returns * R_AARCH64_NONE (0) on unsupported kinds; emit_elf treats that as a diff --git a/src/obj/elf_reloc_aarch64.c b/src/obj/elf_reloc_aarch64.c @@ -21,6 +21,7 @@ u32 elf_aarch64_reloc_to(u32 kind /* RelocKind */) case R_PC32: return ELF_R_AARCH64_PREL32; case R_REL64: return ELF_R_AARCH64_PREL64; case R_REL32: return ELF_R_AARCH64_PREL32; + case R_AARCH64_JUMP26: return ELF_R_AARCH64_JUMP26; case R_AARCH64_CALL26: return ELF_R_AARCH64_CALL26; case R_AARCH64_ADR_PREL_PG_HI21: return ELF_R_AARCH64_ADR_PREL_PG_HI21; case R_AARCH64_ADD_ABS_LO12_NC: return ELF_R_AARCH64_ADD_ABS_LO12_NC; @@ -29,6 +30,8 @@ u32 elf_aarch64_reloc_to(u32 kind /* RelocKind */) case R_AARCH64_LDST32_ABS_LO12_NC: return ELF_R_AARCH64_LDST32_ABS_LO12_NC; case R_AARCH64_LDST64_ABS_LO12_NC: return ELF_R_AARCH64_LDST64_ABS_LO12_NC; case R_AARCH64_LDST128_ABS_LO12_NC: return ELF_R_AARCH64_LDST128_ABS_LO12_NC; + case R_AARCH64_ADR_GOT_PAGE: return ELF_R_AARCH64_ADR_GOT_PAGE; + case R_AARCH64_LD64_GOT_LO12_NC: return ELF_R_AARCH64_LD64_GOT_LO12_NC; default: return ELF_R_AARCH64_NONE; } } @@ -41,6 +44,7 @@ u32 elf_aarch64_reloc_from(u32 elf_type) case ELF_R_AARCH64_ABS32: return R_ABS32; case ELF_R_AARCH64_PREL64: return R_PC64; case ELF_R_AARCH64_PREL32: return R_PC32; + case ELF_R_AARCH64_JUMP26: return R_AARCH64_JUMP26; case ELF_R_AARCH64_CALL26: return R_AARCH64_CALL26; case ELF_R_AARCH64_ADR_PREL_PG_HI21: return R_AARCH64_ADR_PREL_PG_HI21; case ELF_R_AARCH64_ADD_ABS_LO12_NC: return R_AARCH64_ADD_ABS_LO12_NC; @@ -49,6 +53,8 @@ u32 elf_aarch64_reloc_from(u32 elf_type) case ELF_R_AARCH64_LDST32_ABS_LO12_NC: return R_AARCH64_LDST32_ABS_LO12_NC; case ELF_R_AARCH64_LDST64_ABS_LO12_NC: return R_AARCH64_LDST64_ABS_LO12_NC; case ELF_R_AARCH64_LDST128_ABS_LO12_NC: return R_AARCH64_LDST128_ABS_LO12_NC; + case ELF_R_AARCH64_ADR_GOT_PAGE: return R_AARCH64_ADR_GOT_PAGE; + case ELF_R_AARCH64_LD64_GOT_LO12_NC: return R_AARCH64_LD64_GOT_LO12_NC; default: return (u32)-1; /* sentinel */ } } diff --git a/src/obj/obj.h b/src/obj/obj.h @@ -94,8 +94,10 @@ typedef enum RelocKind { R_REL32, R_REL64, R_PC32, R_PC64, R_GOT32, R_PLT32, + R_AARCH64_ADR_GOT_PAGE, R_AARCH64_LD64_GOT_LO12_NC, R_ARM_CALL, R_ARM_MOVW, R_ARM_MOVT, R_ARM_B26, - R_AARCH64_CALL26, R_AARCH64_ADR_PREL_PG_HI21, R_AARCH64_ADD_ABS_LO12_NC, + R_AARCH64_JUMP26, R_AARCH64_CALL26, + R_AARCH64_ADR_PREL_PG_HI21, R_AARCH64_ADD_ABS_LO12_NC, R_AARCH64_LDST8_ABS_LO12_NC, R_AARCH64_LDST16_ABS_LO12_NC, R_AARCH64_LDST32_ABS_LO12_NC, R_AARCH64_LDST64_ABS_LO12_NC, R_AARCH64_LDST128_ABS_LO12_NC, diff --git a/test/elf/run.sh b/test/elf/run.sh @@ -110,30 +110,12 @@ runner_label() { echo "none" } -# ----- build cfree-roundtrip -------------------------------------------- +# ----- locate cfree-roundtrip ------------------------------------------- +# Built as a Make target (test/test.mk) so it picks up libcfree.a changes. ROUNDTRIP_BIN="$BUILD_DIR/cfree-roundtrip" roundtrip_ok=0 - -build_roundtrip() { - if [ ! -f "$LIB_AR" ]; then - return 1 - fi - local cc="${CC:-clang}" - local sysroot - sysroot="$(xcrun --show-sdk-path 2>/dev/null || true)" - local sysroot_flag="" - [ -n "$sysroot" ] && sysroot_flag="-isysroot $sysroot" - # shellcheck disable=SC2086 - "$cc" -std=c11 -Wall -Wextra -Werror $sysroot_flag \ - -I"$ROOT/include" -I"$ROOT/src" \ - "$TEST_DIR/cfree-roundtrip.c" "$LIB_AR" \ - -o "$ROUNDTRIP_BIN" 2> "$BUILD_DIR/cfree-roundtrip.build.log" -} - -if build_roundtrip; then - roundtrip_ok=1 -fi +[ -x "$ROUNDTRIP_BIN" ] && roundtrip_ok=1 # ----- header summary ---------------------------------------------------- @@ -145,7 +127,7 @@ printf ' qemu-aarch64: %s\n' "$([ $have_qemu -eq 1 ] && echo yes || echo no) printf ' podman: %s\n' "$([ $have_podman -eq 1 ] && echo yes || echo no)" printf ' aarch64 runner: %s\n' "$(runner_label)" printf ' python3: %s\n' "$([ $have_python3 -eq 1 ] && echo yes || echo no)" -printf ' cfree-roundtrip: %s\n' "$([ $roundtrip_ok -eq 1 ] && echo built || echo "BUILD FAILED — see $BUILD_DIR/cfree-roundtrip.build.log")" +printf ' cfree-roundtrip: %s\n' "$([ $roundtrip_ok -eq 1 ] && echo found || echo "MISSING — run \"make $ROUNDTRIP_BIN\"")" printf '\n' # ----- Layer A: unit/*.c ------------------------------------------------- diff --git a/test/lib_deps.allowlist b/test/lib_deps.allowlist @@ -0,0 +1,11 @@ +___memcpy_chk +___memset_chk +___stack_chk_fail +___stack_chk_guard +_longjmp +_memcmp +_memcpy +_memset +_setjmp +_strlen +_strtoll diff --git a/test/link/run.sh b/test/link/run.sh @@ -66,7 +66,7 @@ have_ar=0 have_roundtrip=0 is_aarch64=0 -if clang $CLANG_TARGET -x c - -o /dev/null < /dev/null 2>/dev/null; then +if clang $CLANG_TARGET -c -x c - -o /dev/null < /dev/null 2>/dev/null; then have_clang_cross=1 fi command -v llvm-readelf >/dev/null 2>&1 && have_readelf=1 @@ -101,9 +101,12 @@ run_aarch64() { RUN_RC=127 } -# ---- build harness binaries ------------------------------------------------ +# ---- locate harness binaries ------------------------------------------------ +# The Makefile's `test-link` target builds these as proper Make targets so +# they pick up libcfree.a changes. Running this script directly without +# `make test-link` is supported but requires the binaries to already exist. -printf 'Building harness...\n' +printf 'Locating harness...\n' if [ ! -f "$LIB_AR" ]; then printf ' FATAL: %s not found — run "make lib" first\n' "$LIB_AR" >&2 @@ -113,23 +116,21 @@ fi have_exe_runner=0 have_jit_runner=0 -if $CC $CFREE_CFLAGS "$TEST_DIR/harness/link_exe_runner.c" \ - "$LIB_AR" -o "$LINK_EXE_RUNNER" 2>"$BUILD_DIR/link-exe-runner.err"; then +if [ -x "$LINK_EXE_RUNNER" ]; then have_exe_runner=1 - printf ' %s link-exe-runner\n' "$(color_grn built)" + printf ' %s link-exe-runner\n' "$(color_grn found)" else - printf ' %s link-exe-runner (see %s)\n' \ - "$(color_yel warn)" "$BUILD_DIR/link-exe-runner.err" >&2 + printf ' %s link-exe-runner missing (run "make %s")\n' \ + "$(color_yel warn)" "$LINK_EXE_RUNNER" >&2 fi if [ $is_aarch64 -eq 1 ]; then - if $CC $CFREE_CFLAGS "$TEST_DIR/harness/jit_runner.c" \ - "$LIB_AR" -o "$JIT_RUNNER" 2>"$BUILD_DIR/jit-runner.err"; then + if [ -x "$JIT_RUNNER" ]; then have_jit_runner=1 - printf ' %s jit-runner\n' "$(color_grn built)" + printf ' %s jit-runner\n' "$(color_grn found)" else - printf ' %s jit-runner (see %s)\n' \ - "$(color_yel warn)" "$BUILD_DIR/jit-runner.err" >&2 + printf ' %s jit-runner missing (run "make %s")\n' \ + "$(color_yel warn)" "$JIT_RUNNER" >&2 fi fi @@ -188,6 +189,7 @@ for case_dir in "$TEST_DIR/cases"/*/; do base="$(basename "$src" .c)" obj="$work/${base}.o" if ! clang $CLANG_TARGET -O1 -fno-inline -ffreestanding -fno-stack-protector \ + -fno-PIC -fno-pie -fcommon \ -c "$src" -o "$obj" 2>"$work/compile_${base}.err"; then compile_ok=0; break fi @@ -257,6 +259,7 @@ for case_dir in "$TEST_DIR/cases"/*/; do # Compile start.o start_obj="$work/start.o" clang $CLANG_TARGET -O1 -ffreestanding -fno-stack-protector \ + -fno-PIC -fno-pie \ -c "$TEST_DIR/harness/start.c" -o "$start_obj" 2>/dev/null exe="$work/linked.exe" diff --git a/test/test.mk b/test/test.mk @@ -16,9 +16,9 @@ # four paths per case (D direct-JIT, R roundtrip, E exec, J jit-via-file). # Depends only on libcfree.a; reuses test/link harness binaries. -.PHONY: test test-lex test-pp test-pp-err test-elf test-ar test-link test-cg +.PHONY: test test-lex test-pp test-pp-err test-elf test-ar test-link test-cg test-lib-deps -test: test-lex test-pp test-pp-err test-elf test-ar test-link test-cg +test: test-lex test-pp test-pp-err test-elf test-ar test-link test-cg test-lib-deps test-lex: bin @CFREE=$(abspath $(BIN)) test/lex/run.sh @@ -29,9 +29,6 @@ test-pp: bin test-pp-err: bin @CFREE=$(abspath $(BIN)) test/pp/run_errors.sh -test-elf: lib bin-soft - bash test/elf/run.sh - # Best-effort cfree binary build: Layer D needs build/cfree, but the # binary may not link until enough libcfree symbols exist. The harness # detects a missing binary and skips that layer; don't break test-elf @@ -49,8 +46,43 @@ $(AR_TEST_BIN): test/ar_test.c $(LIB_AR) @mkdir -p $(dir $@) $(CC) $(DRIVER_CFLAGS) test/ar_test.c $(LIB_AR) -o $@ -test-link: lib +# Test harness binaries shared by test-elf, test-link, and test-cg. +# Declared as Make targets (not built by the run.sh scripts) so they pick +# up libcfree.a changes deterministically. +# +# HARNESS_CFLAGS drops -Wpedantic; the runners cast cfree_jit_lookup's +# void* to a function pointer, which pedantic rejects under C11. +HARNESS_CFLAGS = -std=c11 -Wall -Wextra -Werror -isysroot $(SYSROOT) -Iinclude + +ROUNDTRIP_BIN = build/test/cfree-roundtrip +LINK_EXE_RUNNER = build/test/link-exe-runner +JIT_RUNNER = build/test/jit-runner + +# cfree-roundtrip needs `-Isrc` for the internal obj.h surface it inspects. +$(ROUNDTRIP_BIN): test/elf/cfree-roundtrip.c $(LIB_AR) + @mkdir -p $(dir $@) + $(CC) $(HARNESS_CFLAGS) -Isrc test/elf/cfree-roundtrip.c $(LIB_AR) -o $@ + +$(LINK_EXE_RUNNER): test/link/harness/link_exe_runner.c $(LIB_AR) + @mkdir -p $(dir $@) + $(CC) $(HARNESS_CFLAGS) test/link/harness/link_exe_runner.c $(LIB_AR) -o $@ + +$(JIT_RUNNER): test/link/harness/jit_runner.c $(LIB_AR) + @mkdir -p $(dir $@) + $(CC) $(HARNESS_CFLAGS) test/link/harness/jit_runner.c $(LIB_AR) -o $@ + +test-elf: lib bin-soft $(ROUNDTRIP_BIN) + bash test/elf/run.sh + +test-link: lib $(ROUNDTRIP_BIN) $(LINK_EXE_RUNNER) $(JIT_RUNNER) bash test/link/run.sh -test-cg: lib +test-cg: lib $(ROUNDTRIP_BIN) $(LINK_EXE_RUNNER) $(JIT_RUNNER) bash test/cg/run.sh + +# Fail if libcfree.a depends on any external symbol not in the allowlist. +# Drift in either direction (new dep, or stale entry) is a failure. +test-lib-deps: lib + @diff -u test/lib_deps.allowlist \ + <(python3 scripts/lib_external_deps.py $(LIB_AR)) \ + || { echo "libcfree.a external symbol set drifted from test/lib_deps.allowlist"; exit 1; }