kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 66b1fcd57ed61a89e6c5745445b42c767da7b4b7
parent 52657d87b5877bbb89423ba0b30d8fda0b154bbf
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat,  9 May 2026 04:15:52 -0700

link: add linker with ELF, JIT, layout, and reloc support

Diffstat:
Asrc/link/link.c | 413+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/link/link_elf.c | 258+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/link/link_internal.h | 109+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/link/link_jit.c | 203+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/link/link_layout.c | 676+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/link/link_reloc.c | 133+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 1792 insertions(+), 0 deletions(-)

diff --git a/src/link/link.c b/src/link/link.c @@ -0,0 +1,413 @@ +/* Linker: lifecycle, input registration, and LinkImage accessors. + * + * Resolution and layout live in link_layout.c; relocation application + * in link_reloc.c; format-specific emit in link_elf.c; JIT mapping in + * link_jit.c. + * + * Discipline (see link.h:136): inputs are never destroyed by + * link_resolve, LinkInputId / ObjBuilder* mappings are stable for the + * Linker's lifetime, and resolution produces a fresh LinkImage rather + * than mutating the Linker. The single-shot implementation must keep + * those invariants so a future incremental re-resolve can plug in. */ + +#include "link/link.h" +#include "link/link_internal.h" + +#include "core/heap.h" +#include "core/pool.h" + +#include <cfree.h> + +#include <string.h> + +/* ---- SrcLoc helper ---- */ + +static SrcLoc no_loc(void) { SrcLoc l = {0,0,0}; return l; } + +/* ---- SymHash ---- */ + +#define SYMHASH_INIT_CAP 16u + +static u32 sym_hash_mix(Sym s) +{ + /* xorshift on 32-bit; cheap and good enough for interned ids. */ + u32 x = (u32)s + 0x9e3779b9u; + x ^= x >> 16; x *= 0x7feb352du; + x ^= x >> 15; x *= 0x846ca68bu; + x ^= x >> 16; + return x; +} + +static void symhash_resize(SymHash* h, u32 new_cap) +{ + SymHashEntry* new_slots; + u32 i, mask; + new_slots = (SymHashEntry*)h->heap->alloc( + h->heap, sizeof(*new_slots) * new_cap, _Alignof(SymHashEntry)); + /* Caller is expected to have ruled out OOM at the API boundary; if + * the host heap returns NULL we leak the resize and skip. The next + * insert will retry — symhash is only used inside link_resolve which + * panics on real allocator failure upstream. */ + if (!new_slots) return; + memset(new_slots, 0, sizeof(*new_slots) * new_cap); + mask = new_cap - 1; + for (i = 0; i < h->cap; ++i) { + Sym n = h->slots[i].name; + u32 j; + if (n == 0) continue; + j = sym_hash_mix(n) & mask; + while (new_slots[j].name != 0) j = (j + 1) & mask; + new_slots[j] = h->slots[i]; + } + if (h->slots) h->heap->free(h->heap, h->slots, sizeof(*h->slots) * h->cap); + h->slots = new_slots; + h->cap = new_cap; +} + +void symhash_init(SymHash* h, Heap* heap) +{ + memset(h, 0, sizeof(*h)); + h->heap = heap; +} + +void symhash_fini(SymHash* h) +{ + if (h->slots) h->heap->free(h->heap, h->slots, sizeof(*h->slots) * h->cap); + h->slots = NULL; + h->cap = h->used = 0; +} + +static void symhash_ensure(SymHash* h) +{ + if (h->cap == 0) symhash_resize(h, SYMHASH_INIT_CAP); + /* Grow at 75% load. */ + else if (h->used * 4u >= h->cap * 3u) symhash_resize(h, h->cap * 2u); +} + +int symhash_insert(SymHash* h, Sym name, LinkSymId id, LinkSymId* existing_out) +{ + u32 mask, j; + symhash_ensure(h); + mask = h->cap - 1; + j = sym_hash_mix(name) & mask; + while (h->slots[j].name != 0) { + if (h->slots[j].name == name) { + if (existing_out) *existing_out = h->slots[j].id; + return 0; + } + j = (j + 1) & mask; + } + h->slots[j].name = name; + h->slots[j].id = id; + h->used++; + return 1; +} + +void symhash_set(SymHash* h, Sym name, LinkSymId id) +{ + u32 mask, j; + symhash_ensure(h); + mask = h->cap - 1; + j = sym_hash_mix(name) & mask; + while (h->slots[j].name != 0 && h->slots[j].name != name) + j = (j + 1) & mask; + if (h->slots[j].name == 0) { + h->slots[j].name = name; + h->used++; + } + h->slots[j].id = id; +} + +LinkSymId symhash_get(const SymHash* h, Sym name) +{ + u32 mask, j; + if (h->cap == 0 || name == 0) return LINK_SYM_NONE; + mask = h->cap - 1; + j = sym_hash_mix(name) & mask; + while (h->slots[j].name != 0) { + if (h->slots[j].name == name) return h->slots[j].id; + j = (j + 1) & mask; + } + return LINK_SYM_NONE; +} + +/* ---- Linker lifecycle ---- */ + +#define INPUTS_INIT_CAP 8u + +static void linker_release(Linker* l) +{ + u32 i; + if (!l) return; + /* Free the ObjBuilders we own (the ones we read from bytes inputs). + * link_add_obj inputs are caller-owned and stay alive. */ + for (i = 0; i < l->ninputs; ++i) { + LinkInput* in = &l->inputs[i]; + if (in->kind == LINK_INPUT_OBJ_BYTES && in->obj) obj_free(in->obj); + } + if (l->inputs) l->heap->free(l->heap, l->inputs, + sizeof(*l->inputs) * l->inputs_cap); + l->heap->free(l->heap, l, sizeof(*l)); +} + +static void linker_cleanup(void* arg) { linker_release((Linker*)arg); } + +Linker* link_new(Compiler* c) +{ + Heap* h = (Heap*)c->env->heap; + Linker* l = (Linker*)h->alloc(h, sizeof(*l), _Alignof(Linker)); + if (!l) return NULL; + memset(l, 0, sizeof(*l)); + l->c = c; + l->heap = h; + l->entry_name = pool_intern_cstr(c->global, "_start"); + /* Match the rest of libcfree's lifetime story: the new'd Linker is + * registered for cleanup in case a panic fires before link_free. */ + l->deferred = compiler_defer(c, linker_cleanup, l); + return l; +} + +void link_free(Linker* l) +{ + Compiler* c; + CompilerCleanup* d; + if (!l) return; + c = l->c; + d = l->deferred; + linker_release(l); + if (d) compiler_undefer(c, d); +} + +/* ---- input registration ---- */ + +static void inputs_grow(Linker* l) +{ + u32 new_cap; + LinkInput* p; + if (l->ninputs < l->inputs_cap) return; + new_cap = l->inputs_cap ? l->inputs_cap * 2u : INPUTS_INIT_CAP; + p = (LinkInput*)l->heap->realloc( + l->heap, l->inputs, + sizeof(*l->inputs) * l->inputs_cap, + sizeof(*l->inputs) * new_cap, + _Alignof(LinkInput)); + if (!p) compiler_panic(l->c, no_loc(), "link: out of memory growing inputs"); + l->inputs = p; + l->inputs_cap = new_cap; +} + +LinkInputId link_add_obj(Linker* l, ObjBuilder* ob) +{ + LinkInput* in; + LinkInputId id; + if (!l || !ob) return LINK_INPUT_NONE; + inputs_grow(l); + id = (LinkInputId)(l->ninputs + 1); + in = &l->inputs[l->ninputs++]; + memset(in, 0, sizeof(*in)); + in->id = id; + in->kind = LINK_INPUT_OBJ; + in->obj = ob; + return id; +} + +LinkInputId link_add_obj_bytes(Linker* l, const char* name, + const u8* data, size_t len) +{ + /* Detect format from magic and dispatch to the matching reader. + * The returned ObjBuilder is owned by the linker and freed via the + * input cleanup. ELF only this cut. */ + ObjBuilder* ob; + LinkInput* in; + LinkInputId id; + CfreeBinFmt fmt; + if (!l || !data || !len) return LINK_INPUT_NONE; + fmt = cfree_detect_fmt(data, len); + switch (fmt) { + case CFREE_BIN_ELF: + ob = read_elf(l->c, name, data, len); + break; + default: + compiler_panic(l->c, no_loc(), + "link_add_obj_bytes: only ELF is supported in this cut"); + } + if (!ob) compiler_panic(l->c, no_loc(), + "link_add_obj_bytes: read_elf returned NULL for '%s'", + name ? name : "(unnamed)"); + inputs_grow(l); + id = (LinkInputId)(l->ninputs + 1); + in = &l->inputs[l->ninputs++]; + memset(in, 0, sizeof(*in)); + in->id = id; + in->kind = LINK_INPUT_OBJ_BYTES; + in->obj = ob; /* re-uses the ObjBuilder slot for ownership */ + in->name = name ? pool_intern_cstr(l->c->global, name) : 0; + return id; +} + +LinkInputId link_add_archive_bytes(Linker* l, const char* name, + const u8* data, size_t len, + u8 whole_archive, u8 link_mode, + u8 group_id) +{ + (void)name; (void)data; (void)len; + (void)whole_archive; (void)link_mode; (void)group_id; + compiler_panic(l->c, no_loc(), + "link_add_archive_bytes: not yet implemented " + "(this cut accepts ObjBuilder* inputs only)"); + return LINK_INPUT_NONE; +} + +void link_set_entry(Linker* l, const char* name) +{ + if (!l || !name) return; + l->entry_name = pool_intern_cstr(l->c->global, name); +} + +void link_set_script(Linker* l, const CfreeLinkScript* script) +{ + if (!l || !script) return; + compiler_panic(l->c, no_loc(), + "link_set_script: linker scripts not yet implemented"); +} + +void link_set_extern_resolver(Linker* l, LinkExternResolver fn, void* user) +{ + if (!l) return; + l->resolver = fn; + l->resolver_user = user; +} + +void link_set_gc_sections(Linker* l, int enable) +{ + if (!l) return; + l->gc_sections = enable; + /* Accepted but ignored this cut. Quiet by design — driver/ld.c may + * pass 0 unconditionally and we don't want to noise that. */ +} + +/* ---- LinkImage accessors ---- */ + +const LinkSymbol* link_symbol(LinkImage* img, LinkSymId id) +{ + if (!img || id == LINK_SYM_NONE || id > img->nsyms) return NULL; + return &img->syms[id - 1]; +} + +LinkSymId link_symbol_lookup(LinkImage* img, Sym name) +{ + if (!img) return LINK_SYM_NONE; + return symhash_get(&img->globals, name); +} + +u32 link_segment_count(LinkImage* img) { return img ? img->nsegments : 0; } + +const LinkSegment* link_segment_get(LinkImage* img, u32 id) +{ + if (!img || id == LINK_SEG_NONE || id > img->nsegments) return NULL; + return &img->segments[id - 1]; +} + +const u8* link_segment_bytes(LinkImage* img, LinkSegmentId id, size_t* size_out) +{ + if (size_out) *size_out = 0; + if (!img || id == LINK_SEG_NONE || id > img->nsegments) return NULL; + if (size_out) *size_out = (size_t)img->segments[id - 1].file_size; + return img->segment_bytes[id - 1]; +} + +u32 link_section_count(LinkImage* img) { return img ? img->nsections : 0; } + +const LinkSection* link_section_get(LinkImage* img, LinkSectionId id) +{ + if (!img || id == LINK_SEC_NONE || id > img->nsections) return NULL; + return &img->sections[id - 1]; +} + +u32 link_reloc_apply_count(LinkImage* img) { return img ? img->nrelocs : 0; } + +const LinkRelocApply* link_reloc_apply_get(LinkImage* img, u32 id) +{ + if (!img || id >= img->nrelocs) return NULL; + return &img->relocs[id]; +} + +/* ---- LinkImage free / cleanup ---- */ + +static void link_image_release(LinkImage* img) +{ + u32 i; + if (!img) return; + if (img->segment_bytes) { + for (i = 0; i < img->nsegments; ++i) { + if (img->segment_bytes[i]) + img->heap->free(img->heap, img->segment_bytes[i], + img->segment_bytes_cap[i]); + } + img->heap->free(img->heap, img->segment_bytes, + sizeof(*img->segment_bytes) * img->nsegments); + img->heap->free(img->heap, img->segment_bytes_cap, + sizeof(*img->segment_bytes_cap) * img->nsegments); + } + if (img->segments) img->heap->free(img->heap, img->segments, + sizeof(*img->segments) * img->nsegments); + if (img->sections) img->heap->free(img->heap, img->sections, + sizeof(*img->sections) * img->nsections); + if (img->syms) img->heap->free(img->heap, img->syms, + sizeof(*img->syms) * img->syms_cap); + if (img->relocs) img->heap->free(img->heap, img->relocs, + sizeof(*img->relocs) * img->relocs_cap); + if (img->input_maps) { + for (i = 0; i < img->ninput_maps; ++i) { + InputMap* m = &img->input_maps[i]; + if (m->sym) img->heap->free(img->heap, m->sym, + sizeof(*m->sym) * m->nsym); + if (m->section) img->heap->free(img->heap, m->section, + sizeof(*m->section) * m->nsection); + } + img->heap->free(img->heap, img->input_maps, + sizeof(*img->input_maps) * img->ninput_maps); + } + symhash_fini(&img->globals); + img->heap->free(img->heap, img, sizeof(*img)); +} + +static void link_image_cleanup(void* arg) +{ + link_image_release((LinkImage*)arg); +} + +LinkImage* link_image_alloc(Compiler* c) +{ + Heap* h = (Heap*)c->env->heap; + LinkImage* img = (LinkImage*)h->alloc(h, sizeof(*img), _Alignof(LinkImage)); + if (!img) compiler_panic(c, no_loc(), "link: out of memory allocating image"); + memset(img, 0, sizeof(*img)); + img->c = c; + img->heap = h; + symhash_init(&img->globals, h); + img->deferred = compiler_defer(c, link_image_cleanup, img); + return img; +} + +void link_image_free(LinkImage* img) +{ + if (!img) return; + if (img->deferred) compiler_undefer(img->c, img->deferred); + link_image_release(img); +} + +/* ---- public emit dispatcher ---- */ + +void link_emit_image_writer(LinkImage* img, Writer* w) +{ + if (!img || !w) return; + switch (img->c->target.obj) { + case CFREE_OBJ_ELF: + link_emit_elf_aarch64(img, w); + return; + default: + compiler_panic(img->c, no_loc(), + "link_emit_image_writer: only ELF is implemented"); + } +} diff --git a/src/link/link_elf.c b/src/link/link_elf.c @@ -0,0 +1,258 @@ +/* link_emit_elf_aarch64: write a static ET_EXEC ELF64 image to the + * caller-provided Writer. + * + * Layout: ELF header + program headers occupy a separate first PT_LOAD + * mapped read-only at IMAGE_BASE. The kept segments follow, each + * page-aligned. We do this in two passes: + * 1. Determine headers_size = sizeof(Ehdr) + nseg_total * sizeof(Phdr), + * where nseg_total = nsegments + 1 (the headers PT_LOAD itself). + * Bump every image-relative vaddr / file_offset on sections, + * symbols, and reloc records by align_up(headers_size, PAGE). + * 2. Apply relocations with the absolute addresses (S, P offset by + * IMAGE_BASE), then write headers and segment bytes via the + * Writer. + * + * Image addresses on the LinkImage remain image-relative (base 0) on + * entry; we mutate them in place since this consumer takes ownership + * of the image at link_emit_image_writer + link_image_free time. */ + +#include "link/link.h" +#include "link/link_internal.h" + +#include "core/heap.h" +#include "core/pool.h" + +#include <string.h> + +static SrcLoc no_loc(void) { SrcLoc l = {0,0,0}; return l; } + +/* ---- ELF64 constants (subset; sysv-abi values) ---- */ + +#define EI_NIDENT 16 + +/* e_ident */ +#define ELFMAG0 0x7f +#define ELFMAG1 'E' +#define ELFMAG2 'L' +#define ELFMAG3 'F' +#define ELFCLASS64 2 +#define ELFDATA2LSB 1 +#define EV_CURRENT 1 +#define ELFOSABI_SYSV 0 + +/* e_type */ +#define ET_EXEC 2 + +/* e_machine */ +#define EM_AARCH64 183 + +/* p_type / p_flags */ +#define PT_LOAD 1 +#define PF_X 1u +#define PF_W 2u +#define PF_R 4u + +#define PAGE_SIZE 0x1000u +#define IMAGE_BASE 0x400000ULL + +typedef struct __attribute__((packed)) Ehdr64 { + u8 e_ident[EI_NIDENT]; + u16 e_type; + u16 e_machine; + u32 e_version; + u64 e_entry; + u64 e_phoff; + u64 e_shoff; + u32 e_flags; + u16 e_ehsize; + u16 e_phentsize; + u16 e_phnum; + u16 e_shentsize; + u16 e_shnum; + u16 e_shstrndx; +} Ehdr64; + +typedef struct __attribute__((packed)) Phdr64 { + u32 p_type; + u32 p_flags; + u64 p_offset; + u64 p_vaddr; + u64 p_paddr; + u64 p_filesz; + u64 p_memsz; + u64 p_align; +} Phdr64; + +static u64 align_up_u64(u64 v, u64 a) { return (v + (a - 1u)) & ~(a - 1u); } + +static u32 perms_to_pflags(u32 secflags) +{ + u32 f = 0; + f |= PF_R; + if (secflags & SF_EXEC) f |= PF_X; + if (secflags & SF_WRITE) f |= PF_W; + return f; +} + +static void shift_image_addresses(LinkImage* img, u64 delta) +{ + u32 i; + for (i = 0; i < img->nsegments; ++i) { + img->segments[i].file_offset += delta; + img->segments[i].vaddr += delta; + } + for (i = 0; i < img->nsections; ++i) { + img->sections[i].file_offset += delta; + img->sections[i].vaddr += delta; + } + for (i = 0; i < img->nrelocs; ++i) { + img->relocs[i].write_file_offset += delta; + img->relocs[i].write_vaddr += delta; + } + for (i = 0; i < img->nsyms; ++i) { + LinkSymbol* s = &img->syms[i]; + if (s->kind == SK_ABS) continue; /* extern / absolute */ + if (!s->defined) continue; + s->vaddr += delta; + } +} + +static void apply_all_relocs(LinkImage* img) +{ + u32 i; + for (i = 0; i < img->nrelocs; ++i) { + LinkRelocApply* r = &img->relocs[i]; + const LinkSymbol* tgt = &img->syms[r->target - 1]; + const LinkSection* sec = &img->sections[r->link_section_id - 1]; + const LinkSegment* seg = &img->segments[sec->segment_id - 1]; + u64 S, P; + u8* P_bytes; + S = tgt->vaddr + IMAGE_BASE; + if (tgt->kind == SK_ABS) S = tgt->vaddr; /* already absolute */ + P = r->write_vaddr + IMAGE_BASE; + P_bytes = img->segment_bytes[seg->id - 1] + + (size_t)(r->write_file_offset - seg->file_offset); + link_reloc_apply(img->c, r->kind, P_bytes, S, r->addend, P); + } +} + +static void write_bytes(Writer* w, const void* data, size_t n) +{ + w->write(w, data, n); +} + +static void write_zeroes(Writer* w, size_t n) +{ + static const u8 zeroes[256] = {0}; + while (n) { + size_t step = n > sizeof(zeroes) ? sizeof(zeroes) : n; + w->write(w, zeroes, step); + n -= step; + } +} + +void link_emit_elf_aarch64(LinkImage* img, Writer* w) +{ + Ehdr64 ehdr; + Phdr64* phdrs; + Heap* heap = img->heap; + u32 nphdr_total = img->nsegments + 1u; /* +1 for the headers PT_LOAD */ + u64 headers_size = sizeof(Ehdr64) + (u64)nphdr_total * sizeof(Phdr64); + u64 headers_load = align_up_u64(headers_size, PAGE_SIZE); + u64 cur_off; + u32 i; + + if (img->c->target.arch != CFREE_ARCH_ARM_64) { + compiler_panic(img->c, no_loc(), + "link_emit_elf: only AArch64 is implemented"); + } + if (img->entry_sym == LINK_SYM_NONE) { + compiler_panic(img->c, no_loc(), + "link_emit_elf: no resolved entry symbol"); + } + + /* Reserve headers_load bytes at the front of the image: bump every + * vaddr/file_offset on segments/sections/relocs/symbols. */ + shift_image_addresses(img, headers_load); + + /* All addresses are now final (image-relative + headers_load). + * Apply relocations into the segment buffers. */ + apply_all_relocs(img); + + /* Build program headers. + * Phdr 0 is the read-only headers segment mapped at IMAGE_BASE. + * Phdrs 1..nsegments are the kept segments. */ + phdrs = (Phdr64*)heap->alloc(heap, sizeof(Phdr64) * nphdr_total, + _Alignof(Phdr64)); + if (!phdrs) compiler_panic(img->c, no_loc(), + "link_emit_elf: oom on phdrs"); + memset(phdrs, 0, sizeof(Phdr64) * nphdr_total); + + phdrs[0].p_type = PT_LOAD; + phdrs[0].p_flags = PF_R; + phdrs[0].p_offset = 0; + phdrs[0].p_vaddr = IMAGE_BASE; + phdrs[0].p_paddr = IMAGE_BASE; + phdrs[0].p_filesz = headers_size; + phdrs[0].p_memsz = headers_size; + phdrs[0].p_align = PAGE_SIZE; + + for (i = 0; i < img->nsegments; ++i) { + const LinkSegment* seg = &img->segments[i]; + Phdr64* p = &phdrs[i + 1]; + p->p_type = PT_LOAD; + p->p_flags = perms_to_pflags(seg->flags); + p->p_offset = seg->file_offset; + p->p_vaddr = IMAGE_BASE + seg->vaddr; + p->p_paddr = IMAGE_BASE + seg->vaddr; + p->p_filesz = seg->file_size; + p->p_memsz = seg->mem_size; + p->p_align = seg->align ? seg->align : PAGE_SIZE; + } + + /* Build ehdr. */ + memset(&ehdr, 0, sizeof(ehdr)); + ehdr.e_ident[0] = ELFMAG0; + ehdr.e_ident[1] = ELFMAG1; + ehdr.e_ident[2] = ELFMAG2; + ehdr.e_ident[3] = ELFMAG3; + ehdr.e_ident[4] = ELFCLASS64; + ehdr.e_ident[5] = ELFDATA2LSB; + ehdr.e_ident[6] = EV_CURRENT; + ehdr.e_ident[7] = ELFOSABI_SYSV; + ehdr.e_type = ET_EXEC; + ehdr.e_machine = EM_AARCH64; + ehdr.e_version = EV_CURRENT; + ehdr.e_entry = IMAGE_BASE + img->syms[img->entry_sym - 1].vaddr; + ehdr.e_phoff = sizeof(Ehdr64); + ehdr.e_shoff = 0; + ehdr.e_flags = 0; + ehdr.e_ehsize = sizeof(Ehdr64); + ehdr.e_phentsize = sizeof(Phdr64); + ehdr.e_phnum = (u16)nphdr_total; + ehdr.e_shentsize = sizeof(struct { u32 a, b, c, d; }); /* unused */ + ehdr.e_shentsize = 0; + ehdr.e_shnum = 0; + ehdr.e_shstrndx = 0; + + /* Write to the Writer in file order: ehdr, phdrs, pad to first + * segment's file_offset, segment bytes (each padded to its + * own file_offset). */ + write_bytes(w, &ehdr, sizeof(ehdr)); + write_bytes(w, phdrs, sizeof(Phdr64) * nphdr_total); + cur_off = sizeof(ehdr) + sizeof(Phdr64) * nphdr_total; + + /* Pad up to the first segment. */ + for (i = 0; i < img->nsegments; ++i) { + const LinkSegment* seg = &img->segments[i]; + if (seg->file_size == 0) continue; /* pure-BSS segment */ + if (cur_off < seg->file_offset) { + write_zeroes(w, (size_t)(seg->file_offset - cur_off)); + cur_off = seg->file_offset; + } + write_bytes(w, img->segment_bytes[seg->id - 1], (size_t)seg->file_size); + cur_off += seg->file_size; + } + + heap->free(heap, phdrs, sizeof(Phdr64) * nphdr_total); +} diff --git a/src/link/link_internal.h b/src/link/link_internal.h @@ -0,0 +1,109 @@ +#ifndef CFREE_LINK_INTERNAL_H +#define CFREE_LINK_INTERNAL_H + +/* Shared private definitions for the linker (link.c, link_layout.c, + * link_reloc.c, link_elf.c, link_jit.c). Not part of any public surface; + * not included by anything outside src/link/. */ + +#include "core/core.h" +#include "obj/obj.h" +#include "link/link.h" + +/* Per-input mapping built during link_resolve. ObjSymId / ObjSecId are + * scoped to a single ObjBuilder, so the linker maintains an explicit + * lookup from each input's id space into the global LinkSymId / + * LinkSectionId space. Indices are dense (0..count-1 within the input); + * the array is sized to the input's nsymbols / nsections at allocation + * time. Index 0 of each id space is the "none" sentinel and maps to the + * matching LINK_*_NONE. */ +typedef struct InputMap { + LinkSymId* sym; /* size = ObjBuilder.nsymbols */ + u32 nsym; + LinkSectionId* section; /* size = ObjBuilder.nsections */ + u32 nsection; +} InputMap; + +/* Open-addressed name -> LinkSymId hash for global / weak definitions + * and lookups (cfree_jit_lookup, entry-symbol resolution). Locals never + * land in this table. + * + * Capacity is always a power of two; we keep load factor < 0.75 by + * doubling on insert. Sym ids are 32-bit interned strings — compare + * with ==. id == 0 is the empty slot (Sym 0 is the "none" sentinel + * and never appears as a real name). */ +typedef struct SymHashEntry { + Sym name; + LinkSymId id; +} SymHashEntry; + +typedef struct SymHash { + Heap* heap; + SymHashEntry* slots; + u32 cap; + u32 used; +} SymHash; + +void symhash_init(SymHash*, Heap*); +void symhash_fini(SymHash*); +/* Returns 1 on insert, 0 if name already present (and writes the existing + * id into *existing_out when non-NULL). The replace policy lives in the + * caller — this is a flat insert/lookup map. */ +int symhash_insert(SymHash*, Sym name, LinkSymId id, LinkSymId* existing_out); +void symhash_set(SymHash*, Sym name, LinkSymId id); /* unconditional */ +LinkSymId symhash_get(const SymHash*, Sym name); + +struct CfreeJit; /* forward; see link_jit.c */ + +struct Linker { + Compiler* c; + Heap* heap; + LinkInput* inputs; /* dyn array; LinkInputId = index + 1 */ + u32 ninputs; + u32 inputs_cap; + Sym entry_name; + int gc_sections; + LinkExternResolver resolver; + void* resolver_user; + CompilerCleanup* deferred; /* registered by link_new */ +}; + +struct LinkImage { + Compiler* c; + Heap* heap; + CompilerCleanup* deferred; /* registered by link_resolve */ + + LinkSymbol* syms; /* id = index + 1 */ + u32 nsyms; + u32 syms_cap; + SymHash globals; /* name -> LinkSymId for global/weak */ + + LinkSection* sections; /* id = index + 1 */ + u32 nsections; + + LinkSegment* segments; /* id = index + 1 */ + u32 nsegments; + u8** segment_bytes; /* one per segment; size = file_size */ + size_t* segment_bytes_cap; /* allocation size for free */ + + LinkRelocApply* relocs; + u32 nrelocs; + u32 relocs_cap; + + LinkSymId entry_sym; + + InputMap* input_maps; /* one per input; indexed by input_id-1 */ + u32 ninput_maps; +}; + +/* Apply one relocation in place. P_bytes points at the first byte of the + * relocation site within the final memory; S is the resolved final + * address of the target symbol; A the addend; P the final address of + * the relocation site. Panics on unsupported kinds. */ +void link_reloc_apply(Compiler*, RelocKind, u8* P_bytes, + u64 S, i64 A, u64 P); + +/* Public link_emit_image_writer dispatches by Compiler.target.obj. The + * ELF AArch64 implementation lives in link_elf.c. */ +void link_emit_elf_aarch64(LinkImage*, Writer*); + +#endif diff --git a/src/link/link_jit.c b/src/link/link_jit.c @@ -0,0 +1,203 @@ +/* JIT mapper. Takes a resolved LinkImage, mmaps a fresh writable + * region, copies segments, applies relocations against the runtime + * base, mprotects to final permissions, and returns an owning CfreeJit + * handle. + * + * Lookup is by interned Sym name (object-local handles never escape). + * Inspector entries (cfree_jit_view, _addr_to_sym, _sym_iter_*) are + * stubbed for now — the linker can land without dragging the inspector + * surface up. */ + +#include "link/link.h" +#include "link/link_internal.h" + +#include "core/heap.h" +#include "core/pool.h" + +#include <cfree.h> + +#include <string.h> + +#include <sys/mman.h> +#include <unistd.h> + +static SrcLoc no_loc(void) { SrcLoc l = {0,0,0}; return l; } + +#define JIT_PAGE_SIZE 0x1000u + +static u64 align_up_u64(u64 v, u64 a) { return (v + (a - 1u)) & ~(a - 1u); } + +struct CfreeJit { + Compiler* c; + LinkImage* image; + void* base; + size_t map_size; +}; + +static int perms_for(u32 secflags) +{ + int p = PROT_READ; + if (secflags & SF_EXEC) p |= PROT_EXEC; + if (secflags & SF_WRITE) p |= PROT_WRITE; + return p; +} + +CfreeJit* cfree_jit_from_image(LinkImage* img) +{ + Compiler* c; + Heap* heap; + CfreeJit* jit; + void* base; + size_t map_size = 0; + u32 i; + + if (!img) return NULL; + c = img->c; + heap = img->heap; + + /* Total mmap size = top of last segment, page-aligned. */ + if (img->nsegments == 0) { + compiler_panic(c, no_loc(), + "cfree_jit_from_image: image has no segments"); + } + for (i = 0; i < img->nsegments; ++i) { + const LinkSegment* seg = &img->segments[i]; + u64 end = seg->vaddr + align_up_u64(seg->mem_size, JIT_PAGE_SIZE); + if (end > map_size) map_size = (size_t)end; + } + map_size = (size_t)align_up_u64((u64)map_size, JIT_PAGE_SIZE); + + base = mmap(NULL, map_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (base == MAP_FAILED) { + compiler_panic(c, no_loc(), + "cfree_jit_from_image: mmap failed"); + } + /* mmap returns zeroed memory — BSS is naturally zero. */ + + /* Copy each segment's file bytes to (base + vaddr). */ + for (i = 0; i < img->nsegments; ++i) { + const LinkSegment* seg = &img->segments[i]; + if (seg->file_size == 0) continue; + memcpy((u8*)base + seg->vaddr, + img->segment_bytes[i], + (size_t)seg->file_size); + } + + /* Apply relocations with runtime base. */ + for (i = 0; i < img->nrelocs; ++i) { + const LinkRelocApply* r = &img->relocs[i]; + const LinkSymbol* tgt = &img->syms[r->target - 1]; + u64 S, P; + u8* P_bytes; + if (tgt->kind == SK_ABS) { + /* extern resolver result OR true absolute symbol — vaddr + * already holds the runtime address. */ + S = tgt->vaddr; + } else { + S = tgt->vaddr + (u64)(uintptr_t)base; + } + P = r->write_vaddr + (u64)(uintptr_t)base; + P_bytes = (u8*)base + r->write_vaddr; + link_reloc_apply(c, r->kind, P_bytes, S, r->addend, P); + } + +#ifdef __aarch64__ + /* Flush the data caches we just wrote and invalidate the icache + * so the CPU sees the new instructions before the mprotect flip. */ + __builtin___clear_cache((char*)base, (char*)base + map_size); +#endif + + /* Flip permissions per segment. */ + for (i = 0; i < img->nsegments; ++i) { + const LinkSegment* seg = &img->segments[i]; + size_t mlen = (size_t)align_up_u64(seg->mem_size, JIT_PAGE_SIZE); + if (mprotect((u8*)base + seg->vaddr, mlen, + perms_for(seg->flags)) != 0) { + munmap(base, map_size); + compiler_panic(c, no_loc(), + "cfree_jit_from_image: mprotect failed"); + } + } + + jit = (CfreeJit*)heap->alloc(heap, sizeof(*jit), _Alignof(CfreeJit)); + if (!jit) { + munmap(base, map_size); + compiler_panic(c, no_loc(), + "cfree_jit_from_image: oom on jit handle"); + } + jit->c = c; + jit->image = img; + jit->base = base; + jit->map_size = map_size; + + /* Take ownership of the image: undefer it from the compiler so a + * future panic doesn't reap something we still hold. */ + if (img->deferred) { + compiler_undefer(c, img->deferred); + img->deferred = NULL; + } + return jit; +} + +void cfree_jit_free(CfreeJit* jit) +{ + Heap* heap; + if (!jit) return; + heap = (Heap*)jit->c->env->heap; + if (jit->base && jit->map_size) munmap(jit->base, jit->map_size); + if (jit->image) { + /* link_image_free unfederes (no-op now) and releases storage. */ + link_image_free(jit->image); + } + heap->free(heap, jit, sizeof(*jit)); +} + +void* cfree_jit_lookup(CfreeJit* jit, const char* name) +{ + Sym sym; + LinkSymId id; + const LinkSymbol* s; + if (!jit || !name) return NULL; + sym = pool_intern_cstr(jit->c->global, name); + id = symhash_get(&jit->image->globals, sym); + if (id == LINK_SYM_NONE) return NULL; + s = &jit->image->syms[id - 1]; + if (!s->defined) return NULL; + if (s->kind == SK_ABS) return (void*)(uintptr_t)s->vaddr; + return (u8*)jit->base + s->vaddr; +} + +/* ---- inspector entries (stubs; out of scope for this cut) ---- */ + +const CfreeObjFile* cfree_jit_view(CfreeJit* jit) +{ + (void)jit; + return NULL; +} + +int cfree_jit_addr_to_sym(CfreeJit* jit, uint64_t addr, + const char** name_out, uint64_t* off_out) +{ + (void)jit; (void)addr; + if (name_out) *name_out = NULL; + if (off_out) *off_out = 0; + return 1; +} + +CfreeJitSymIter* cfree_jit_sym_iter_new(CfreeJit* jit) +{ + (void)jit; + return NULL; +} + +int cfree_jit_sym_iter_next(CfreeJitSymIter* it, CfreeJitSym* out) +{ + (void)it; (void)out; + return 0; +} + +void cfree_jit_sym_iter_free(CfreeJitSymIter* it) +{ + (void)it; +} diff --git a/src/link/link_layout.c b/src/link/link_layout.c @@ -0,0 +1,676 @@ +/* link_resolve: builds a fresh LinkImage from the Linker's inputs. + * + * Image-relative discipline: every vaddr / file_offset on the produced + * image treats the image as based at 0. Consumers (link_emit_elf_aarch64, + * cfree_jit_from_image) add their own runtime base before patching + * relocations or writing PT_LOAD headers. Segment byte buffers hold raw + * input section bytes — no relocations are applied here, in line with + * the incremental-link discipline (link.h:136). */ + +#include "link/link.h" +#include "link/link_internal.h" + +#include "core/buf.h" +#include "core/heap.h" +#include "core/pool.h" + +#include <string.h> + +LinkImage* link_image_alloc(Compiler*); /* defined in link.c */ + +static SrcLoc no_loc(void) { SrcLoc l = {0,0,0}; return l; } + +#define PAGE_SIZE 0x1000u + +/* Three-bucket segment partitioning by permission. */ +typedef enum SegBucket { + SEG_RX = 0, /* SF_ALLOC | SF_EXEC */ + SEG_R = 1, /* SF_ALLOC, no EXEC, no WRITE */ + SEG_RW = 2, /* SF_ALLOC | SF_WRITE (incl. BSS) */ + SEG_NBUCKETS = 3, +} SegBucket; + +static u64 align_up_u64(u64 v, u64 a) { return (v + (a - 1u)) & ~(a - 1u); } + +static int section_kept(const Section* s) +{ + /* This cut keeps allocatable progbits/nobits sections only. Debug, + * symtab/strtab, group, and note sections are dropped — none of + * them participate in a static ET_EXEC layout. */ + if (!(s->flags & SF_ALLOC)) return 0; + if (s->sem == SSEM_PROGBITS || s->sem == SSEM_NOBITS) return 1; + return 0; +} + +static SegBucket bucket_for(u16 flags) +{ + if (flags & SF_EXEC) return SEG_RX; + if (flags & SF_WRITE) return SEG_RW; + return SEG_R; +} + +/* ---- LinkImage growth helpers ---- */ + +static void syms_grow(LinkImage* img, u32 want) +{ + u32 new_cap; + LinkSymbol* p; + if (want <= img->syms_cap) return; + new_cap = img->syms_cap ? img->syms_cap : 16u; + while (new_cap < want) new_cap *= 2u; + p = (LinkSymbol*)img->heap->realloc( + img->heap, img->syms, + sizeof(*img->syms) * img->syms_cap, + sizeof(*img->syms) * new_cap, + _Alignof(LinkSymbol)); + if (!p) compiler_panic(img->c, no_loc(), "link: oom growing symbols"); + img->syms = p; + img->syms_cap = new_cap; +} + +static LinkSymId append_symbol(LinkImage* img, const LinkSymbol* tmpl) +{ + LinkSymbol* s; + syms_grow(img, img->nsyms + 1u); + s = &img->syms[img->nsyms]; + *s = *tmpl; + s->id = (LinkSymId)(img->nsyms + 1u); + img->nsyms++; + return s->id; +} + +static void relocs_grow(LinkImage* img, u32 want) +{ + u32 new_cap; + LinkRelocApply* p; + if (want <= img->relocs_cap) return; + new_cap = img->relocs_cap ? img->relocs_cap : 32u; + while (new_cap < want) new_cap *= 2u; + p = (LinkRelocApply*)img->heap->realloc( + img->heap, img->relocs, + sizeof(*img->relocs) * img->relocs_cap, + sizeof(*img->relocs) * new_cap, + _Alignof(LinkRelocApply)); + if (!p) compiler_panic(img->c, no_loc(), "link: oom growing relocs"); + img->relocs = p; + img->relocs_cap = new_cap; +} + +/* ---- per-input symbol/section maps ---- */ + +static void map_alloc(LinkImage* img, InputMap* m, u32 nsym, u32 nsection) +{ + Heap* h = img->heap; + m->nsym = nsym; + m->sym = (LinkSymId*)h->alloc(h, sizeof(*m->sym) * nsym, _Alignof(LinkSymId)); + if (!m->sym) compiler_panic(img->c, no_loc(), "link: oom on input symbol map"); + memset(m->sym, 0, sizeof(*m->sym) * nsym); + m->nsection = nsection; + m->section = (LinkSectionId*)h->alloc(h, sizeof(*m->section) * nsection, + _Alignof(LinkSectionId)); + if (!m->section) compiler_panic(img->c, no_loc(), + "link: oom on input section map"); + memset(m->section, 0, sizeof(*m->section) * nsection); +} + +/* ---- pass 1: collect symbols and pick section layout ---- */ + +/* Defined-symbol replacement policy: a stronger binding wins; a + * stronger binding seen second replaces the existing record in place. + * Two SB_GLOBAL definitions of the same name are a hard error. */ +static int bind_strength(u8 bind) +{ + /* Higher == stronger. */ + switch (bind) { + case SB_GLOBAL: return 3; + case SB_WEAK: return 2; + case SB_LOCAL: return 1; + default: return 0; + } +} + +static void resolve_symbols(Linker* l, LinkImage* img) +{ + u32 ii; + /* Per-input pass: register every ObjSym (locals included), and + * insert defined globals/weaks into img->globals. Locals stay + * out of the hash. */ + for (ii = 0; ii < l->ninputs; ++ii) { + LinkInput* in = &l->inputs[ii]; + ObjBuilder* ob = in->obj; + InputMap* m = &img->input_maps[ii]; + u32 nsym = obj_section_count(ob); (void)nsym; + ObjSymIter* it; + ObjSymEntry e; + + /* obj.h: ObjSymId 0 is the "none" sentinel; the iterator skips + * it. We need an upper bound for the per-input symbol map, + * which is the builder's nsymbols (count incl. id-0 sentinel). + * Walk via the iterator to learn how many real entries there + * are; allocate the map to a safe upper bound by counting. */ + u32 nsyms_in_input = 0; + it = obj_symiter_new(ob); + while (obj_symiter_next(it, &e)) ++nsyms_in_input; + obj_symiter_free(it); + + map_alloc(img, m, nsyms_in_input + 1u /* +1 for id-0 slot */, + obj_section_count(ob)); + + it = obj_symiter_new(ob); + while (obj_symiter_next(it, &e)) { + const ObjSym* s = e.sym; + LinkSymbol rec; + LinkSymId existing; + int is_def = (s->kind != SK_UNDEF); + + memset(&rec, 0, sizeof(rec)); + rec.name = s->name; + rec.input_id = in->id; + rec.obj_sym = e.id; + rec.section_id = LINK_SEC_NONE; /* filled later */ + rec.value = s->value; + rec.size = s->size; + rec.bind = (u8)s->bind; + rec.kind = (u8)s->kind; + rec.defined = (u8)is_def; + rec.vaddr = 0; + + if (is_def && (s->bind == SB_GLOBAL || s->bind == SB_WEAK) + && s->name != 0) { + /* Try to insert. On collision, apply replacement + * policy in-place against the existing LinkSymbol. */ + LinkSymId fresh = (LinkSymId)(img->nsyms + 1u); + if (symhash_insert(&img->globals, s->name, fresh, &existing)) { + /* No collision — append a new slot. */ + m->sym[e.id] = append_symbol(img, &rec); + } else { + LinkSymbol* prev = &img->syms[existing - 1]; + int new_strength = bind_strength((u8)s->bind); + int old_strength = bind_strength(prev->bind); + if (new_strength > old_strength) { + /* This def wins; keep the existing LinkSymId + * stable so prior references resolve, but + * update the contents. */ + rec.id = existing; + *prev = rec; + m->sym[e.id] = existing; + } else if (new_strength == old_strength + && new_strength == bind_strength(SB_GLOBAL)) { + /* Two strong defs — fatal. */ + size_t namelen; + const char* nm = pool_str(l->c->global, s->name, &namelen); + compiler_panic(l->c, no_loc(), + "link: duplicate definition of " + "global symbol '%.*s'", + (int)namelen, nm); + } else { + /* New def is weaker — keep existing, point + * the per-input map at the existing id so + * relocations from this input still resolve. */ + m->sym[e.id] = existing; + } + } + } else { + /* Locals + undefs each get their own LinkSymId. Globals + * with name == 0 (rare; unnamed temporaries promoted + * to global by some assemblers) also land here. */ + m->sym[e.id] = append_symbol(img, &rec); + } + } + obj_symiter_free(it); + } +} + +static void resolve_undefs(Linker* l, LinkImage* img) +{ + u32 i; + /* For every symbol that's still SK_UNDEF and visible by name, look + * it up in the global hash. If still undef, try the resolver. If + * still undef, fatal. */ + for (i = 0; i < img->nsyms; ++i) { + LinkSymbol* s = &img->syms[i]; + if (s->defined) continue; + if (s->name != 0) { + LinkSymId hit = symhash_get(&img->globals, s->name); + if (hit != LINK_SYM_NONE && hit != s->id) { + LinkSymbol* def = &img->syms[hit - 1]; + if (def->defined) { + /* Re-point this undef at the global definition by + * copying the resolved fields. The id remains + * stable so per-input maps don't need to be + * rewritten — relocations just look up via this + * symbol's eventual vaddr. */ + s->section_id = def->section_id; + s->value = def->value; + s->vaddr = def->vaddr; + s->kind = def->kind; + s->bind = def->bind; + s->defined = 1; + continue; + } + } + } + if (l->resolver && s->name != 0) { + size_t namelen; + const char* nm = pool_str(l->c->global, s->name, &namelen); + /* The resolver expects a NUL-terminated C string; pool + * strings are NUL-terminated by pool_intern. */ + (void)namelen; + void* p = l->resolver(l->resolver_user, nm); + if (p) { + s->kind = SK_ABS; + s->vaddr = (u64)(uintptr_t)p; + s->defined = 1; + continue; + } + } + { + size_t namelen; + const char* nm = s->name ? pool_str(l->c->global, s->name, &namelen) + : (namelen = 0, ""); + compiler_panic(l->c, no_loc(), + "link: undefined reference to '%.*s'", + (int)namelen, nm); + } + } +} + +/* ---- pass 2: section assignment + segment layout ---- */ + +typedef struct SecRef { + u32 input_idx; + ObjSecId obj_sec_id; + LinkSectionId link_sec_id; +} SecRef; + +static void layout_sections(Linker* l, LinkImage* img) +{ + Heap* h = img->heap; + /* First pass: count kept sections. */ + u32 ii, j; + u32 total_kept = 0; + for (ii = 0; ii < l->ninputs; ++ii) { + ObjBuilder* ob = l->inputs[ii].obj; + for (j = 1; j < obj_section_count(ob); ++j) { + const Section* s = obj_section_get(ob, j); + if (s && section_kept(s)) ++total_kept; + } + } + + img->sections = total_kept + ? (LinkSection*)h->alloc(h, sizeof(*img->sections) * total_kept, + _Alignof(LinkSection)) + : NULL; + if (total_kept && !img->sections) + compiler_panic(img->c, no_loc(), "link: oom on sections"); + + /* Three segment buckets; tracks per-bucket size during scan and + * per-section file_offset within the bucket. */ + u64 seg_size[SEG_NBUCKETS] = {0}; + u32 seg_align[SEG_NBUCKETS] = {1, 1, 1}; + u32 seg_count[SEG_NBUCKETS] = {0}; + /* For BSS: track separately to set mem_size > file_size. We track + * trailing nobits per bucket — only SEG_RW gets BSS in practice. */ + u64 seg_bss_extra[SEG_NBUCKETS] = {0}; + + /* Walk inputs in stable order and append to buckets. */ + for (ii = 0; ii < l->ninputs; ++ii) { + ObjBuilder* ob = l->inputs[ii].obj; + InputMap* m = &img->input_maps[ii]; + for (j = 1; j < obj_section_count(ob); ++j) { + const Section* s = obj_section_get(ob, j); + SegBucket bucket; + u32 align; + u64 ofs; + LinkSection* ls; + LinkSectionId lsid; + + if (!s || !section_kept(s)) continue; + bucket = bucket_for(s->flags); + align = s->align ? s->align : 1u; + + /* Bump bucket size up to alignment, then place. BSS + * (NOBITS) only contributes to mem_size; everything + * preceding it in the bucket has already accumulated + * file_size. */ + if (s->sem == SSEM_NOBITS) { + /* Place after current file_size + any prior bss. */ + u64 cursor = seg_size[bucket] + seg_bss_extra[bucket]; + cursor = align_up_u64(cursor, align); + seg_bss_extra[bucket] = cursor + (u64)s->bss_size - seg_size[bucket]; + ofs = cursor; + } else { + /* If we'd be appending NOBITS bytes ahead of progbits, + * promote to file bytes (rare with our bucket policy + * but defensive). */ + seg_size[bucket] += seg_bss_extra[bucket]; + seg_bss_extra[bucket] = 0; + ofs = align_up_u64(seg_size[bucket], align); + seg_size[bucket] = ofs + (u64)s->bytes.total; + } + + if (align > seg_align[bucket]) seg_align[bucket] = align; + seg_count[bucket]++; + + lsid = (LinkSectionId)(img->nsections + 1u); + ls = &img->sections[img->nsections++]; + memset(ls, 0, sizeof(*ls)); + ls->id = lsid; + ls->input_id = l->inputs[ii].id; + ls->obj_section_id = j; + ls->segment_id = LINK_SEG_NONE; /* filled below */ + ls->input_offset = ofs; /* offset within segment */ + ls->file_offset = ofs; /* image-relative; segment offset added later */ + ls->vaddr = ofs; /* image-relative; segment vaddr added later */ + ls->size = (s->sem == SSEM_NOBITS) ? s->bss_size : s->bytes.total; + ls->flags = s->flags; + ls->align = align; + /* Stash the bucket in the section's segment_id slot + * temporarily — fixed up after segments are created. */ + ls->segment_id = (LinkSegmentId)(bucket + 1u); /* 1..3 sentinel */ + m->section[j] = lsid; + } + } + + /* Materialize one LinkSegment per non-empty bucket, then assign + * absolute (image-relative) vaddr/file_offset to each segment and + * fix up section.{vaddr,file_offset,segment_id}. */ + { + LinkSegmentId bucket_seg[SEG_NBUCKETS] = {0}; + u64 cursor = 0; + u32 b; + u32 nseg = 0; + for (b = 0; b < SEG_NBUCKETS; ++b) + if (seg_count[b]) ++nseg; + + img->segments = nseg + ? (LinkSegment*)h->alloc(h, sizeof(*img->segments) * nseg, + _Alignof(LinkSegment)) + : NULL; + img->segment_bytes = nseg + ? (u8**)h->alloc(h, sizeof(*img->segment_bytes) * nseg, + _Alignof(u8*)) + : NULL; + img->segment_bytes_cap = nseg + ? (size_t*)h->alloc(h, sizeof(*img->segment_bytes_cap) * nseg, + _Alignof(size_t)) + : NULL; + if (nseg && (!img->segments || !img->segment_bytes || + !img->segment_bytes_cap)) + compiler_panic(img->c, no_loc(), "link: oom on segments"); + if (nseg) { + memset(img->segment_bytes, 0, + sizeof(*img->segment_bytes) * nseg); + memset(img->segment_bytes_cap, 0, + sizeof(*img->segment_bytes_cap) * nseg); + } + + for (b = 0; b < SEG_NBUCKETS; ++b) { + LinkSegment* seg; + u64 file_size, mem_size, align; + u32 perms; + if (!seg_count[b]) continue; + align = (u64)seg_align[b]; + if (align < PAGE_SIZE) align = PAGE_SIZE; + cursor = align_up_u64(cursor, align); + + seg = &img->segments[img->nsegments]; + file_size = seg_size[b]; + mem_size = seg_size[b] + seg_bss_extra[b]; + perms = SF_ALLOC; + if (b == SEG_RX) perms |= SF_EXEC; + if (b == SEG_RW) perms |= SF_WRITE; + + memset(seg, 0, sizeof(*seg)); + seg->id = (LinkSegmentId)(img->nsegments + 1u); + seg->flags = perms; + seg->file_offset = cursor; + seg->vaddr = cursor; + seg->mem_size = mem_size; + seg->file_size = file_size; + seg->align = (u32)align; + seg->nsections = seg_count[b]; + bucket_seg[b] = seg->id; + cursor += mem_size; + img->nsegments++; + } + + /* Allocate segment buffers and fix up section offsets/vaddrs. */ + for (b = 0; b < SEG_NBUCKETS; ++b) { + if (!bucket_seg[b]) continue; + { + LinkSegment* seg = &img->segments[bucket_seg[b] - 1]; + if (seg->file_size) { + img->segment_bytes[bucket_seg[b] - 1] = (u8*)h->alloc( + h, (size_t)seg->file_size, 16); + if (!img->segment_bytes[bucket_seg[b] - 1]) + compiler_panic(img->c, no_loc(), + "link: oom on segment bytes"); + img->segment_bytes_cap[bucket_seg[b] - 1] = + (size_t)seg->file_size; + memset(img->segment_bytes[bucket_seg[b] - 1], 0, + (size_t)seg->file_size); + } + } + } + + for (j = 0; j < img->nsections; ++j) { + LinkSection* ls = &img->sections[j]; + u32 b2 = (u32)(ls->segment_id - 1u); /* sentinel-stash */ + LinkSegment* seg = &img->segments[bucket_seg[b2] - 1]; + ls->segment_id = seg->id; + ls->vaddr += seg->vaddr; + ls->file_offset += seg->file_offset; + } + } +} + +/* Copy each input section's bytes into its segment buffer. BSS + * sections contribute no file bytes. */ +static void emit_segment_bytes(Linker* l, LinkImage* img) +{ + u32 j; + for (j = 0; j < img->nsections; ++j) { + LinkSection* ls = &img->sections[j]; + ObjBuilder* ob = l->inputs[ls->input_id - 1].obj; + const Section* s = obj_section_get(ob, ls->obj_section_id); + LinkSegment* seg = &img->segments[ls->segment_id - 1]; + u8* dst; + if (!s || s->sem == SSEM_NOBITS) continue; + if (s->bytes.total == 0) continue; + dst = img->segment_bytes[seg->id - 1] + + (size_t)(ls->file_offset - seg->file_offset); + buf_flatten(&s->bytes, dst); + } +} + +/* ---- pass 3: assign symbol vaddrs (now that section vaddrs are final) ---- + * + * Map per-input ObjSecId -> LinkSectionId on every defined symbol, then + * compute its final image-relative vaddr. Run after resolve_symbols and + * layout_sections so both the per-input maps and section vaddrs exist. */ +static void link_symbols_to_sections(Linker* l, LinkImage* img) +{ + u32 ii; + for (ii = 0; ii < l->ninputs; ++ii) { + ObjBuilder* ob = l->inputs[ii].obj; + InputMap* m = &img->input_maps[ii]; + ObjSymIter* it = obj_symiter_new(ob); + ObjSymEntry e; + while (obj_symiter_next(it, &e)) { + LinkSymId lsid = m->sym[e.id]; + LinkSymbol* ls; + if (lsid == LINK_SYM_NONE) continue; + ls = &img->syms[lsid - 1]; + if (!ls->defined) continue; + if (ls->kind == SK_ABS && ls->vaddr != 0) continue; + if (e.sym->section_id == OBJ_SEC_NONE) continue; + /* Only update from this input if this is the input that + * contributed the winning definition. */ + if (ls->input_id != l->inputs[ii].id) continue; + ls->section_id = m->section[e.sym->section_id]; + } + obj_symiter_free(it); + } + /* Now compute vaddrs. */ + { + u32 i; + for (i = 0; i < img->nsyms; ++i) { + LinkSymbol* s = &img->syms[i]; + if (s->kind == SK_ABS && s->vaddr != 0) continue; + if (!s->defined) continue; + if (s->section_id == LINK_SEC_NONE) continue; + s->vaddr = img->sections[s->section_id - 1].vaddr + s->value; + } + } + /* Resolve undef-against-global once defs are addressed. */ + { + u32 i; + for (i = 0; i < img->nsyms; ++i) { + LinkSymbol* s = &img->syms[i]; + if (s->defined) continue; + if (s->name == 0) continue; + { + LinkSymId hit = symhash_get(&img->globals, s->name); + if (hit != LINK_SYM_NONE && hit != s->id) { + LinkSymbol* def = &img->syms[hit - 1]; + if (def->defined) { + s->section_id = def->section_id; + s->value = def->value; + s->vaddr = def->vaddr; + s->kind = def->kind; + s->defined = 1; + } + } + } + } + } +} + +/* ---- pass 4: relocation records ---- */ + +static u8 reloc_width(RelocKind k) +{ + switch (k) { + case R_ABS32: case R_REL32: case R_PC32: case R_GOT32: case R_PLT32: + return 4; + case R_ABS64: case R_REL64: case R_PC64: + return 8; + case R_AARCH64_CALL26: case R_AARCH64_ADR_PREL_PG_HI21: + case R_AARCH64_ADD_ABS_LO12_NC: + case R_AARCH64_LDST8_ABS_LO12_NC: + case R_AARCH64_LDST16_ABS_LO12_NC: + case R_AARCH64_LDST32_ABS_LO12_NC: + case R_AARCH64_LDST64_ABS_LO12_NC: + case R_AARCH64_LDST128_ABS_LO12_NC: + return 4; + default: + return 0; + } +} + +static void emit_reloc_records(Linker* l, LinkImage* img) +{ + u32 ii; + for (ii = 0; ii < l->ninputs; ++ii) { + ObjBuilder* ob = l->inputs[ii].obj; + InputMap* m = &img->input_maps[ii]; + u32 nsec = obj_section_count(ob); + u32 total = 0; + u32 j, k; + const Reloc* base; + for (j = 0; j < nsec; ++j) total += obj_reloc_count(ob, j); + if (total == 0) continue; + /* obj_relocs returns the start of the flat array regardless of + * the section_id argument; we filter by r->section_id below. */ + base = obj_relocs(ob, 0); + for (k = 0; k < total; ++k) { + const Reloc* r = &base[k]; + const Section* s = obj_section_get(ob, r->section_id); + LinkSymId target; + LinkSection* ls; + LinkRelocApply rec; + if (!s || !section_kept(s)) continue; + if (r->sym == OBJ_SYM_NONE || r->sym >= m->nsym) + compiler_panic(l->c, no_loc(), + "link: reloc references unknown symbol"); + target = m->sym[r->sym]; + if (target == LINK_SYM_NONE) + compiler_panic(l->c, no_loc(), + "link: reloc references unmapped symbol"); + ls = &img->sections[m->section[r->section_id] - 1]; + memset(&rec, 0, sizeof(rec)); + rec.input_id = l->inputs[ii].id; + rec.section_id = r->section_id; + rec.link_section_id = ls->id; + rec.offset = r->offset; + rec.width = reloc_width((RelocKind)r->kind); + rec.write_vaddr = ls->vaddr + r->offset; + rec.write_file_offset = ls->file_offset + r->offset; + rec.kind = (RelocKind)r->kind; + rec.target = target; + rec.addend = r->addend; + if (rec.width == 0) + compiler_panic(l->c, no_loc(), + "link: unsupported reloc kind %u", + (unsigned)r->kind); + relocs_grow(img, img->nrelocs + 1u); + img->relocs[img->nrelocs++] = rec; + } + } +} + +/* ---- entry symbol ---- */ + +static void resolve_entry(Linker* l, LinkImage* img) +{ + LinkSymId id; + LinkSymbol* s; + if (l->entry_name == 0) return; + id = symhash_get(&img->globals, l->entry_name); + if (id == LINK_SYM_NONE) { + size_t namelen; + const char* nm = pool_str(l->c->global, l->entry_name, &namelen); + compiler_panic(l->c, no_loc(), + "link: entry symbol '%.*s' not defined", + (int)namelen, nm); + } + s = &img->syms[id - 1]; + if (!s->defined) { + size_t namelen; + const char* nm = pool_str(l->c->global, l->entry_name, &namelen); + compiler_panic(l->c, no_loc(), + "link: entry symbol '%.*s' is undefined", + (int)namelen, nm); + } + img->entry_sym = id; +} + +/* ---- public ---- */ + +LinkImage* link_resolve(Linker* l) +{ + LinkImage* img = link_image_alloc(l->c); + Heap* h = img->heap; + + /* Per-input map storage. */ + img->ninput_maps = l->ninputs; + img->input_maps = l->ninputs + ? (InputMap*)h->alloc(h, sizeof(*img->input_maps) * l->ninputs, + _Alignof(InputMap)) + : NULL; + if (l->ninputs && !img->input_maps) + compiler_panic(l->c, no_loc(), "link: oom on input maps"); + if (l->ninputs) + memset(img->input_maps, 0, sizeof(*img->input_maps) * l->ninputs); + + resolve_symbols(l, img); + layout_sections(l, img); + emit_segment_bytes(l, img); + link_symbols_to_sections(l, img); + resolve_undefs(l, img); + emit_reloc_records(l, img); + resolve_entry(l, img); + + return img; +} diff --git a/src/link/link_reloc.c b/src/link/link_reloc.c @@ -0,0 +1,133 @@ +/* AArch64 relocation application. + * + * Pure function: takes the resolved final addresses (S, P) and the + * addend (A), and patches `width` bytes at the relocation site. + * Callers (link_emit_elf_aarch64, cfree_jit_from_image) compute the + * runtime base offset themselves; this routine sees only final values. + * + * Encoding references: ARM ARMv8-A "ELF for the ARM 64-bit Architecture + * (AArch64)" §5.7 (relocation types). */ + +#include "link/link_internal.h" + +#include <string.h> + +static SrcLoc no_loc(void) { SrcLoc l = {0,0,0}; return l; } + +static void wr_u32_le(u8* p, u32 v) +{ + p[0] = (u8)(v & 0xffu); + p[1] = (u8)((v >> 8 ) & 0xffu); + p[2] = (u8)((v >> 16) & 0xffu); + p[3] = (u8)((v >> 24) & 0xffu); +} + +static u32 rd_u32_le(const u8* p) +{ + return (u32)p[0] | ((u32)p[1] << 8) | + ((u32)p[2] << 16) | ((u32)p[3] << 24); +} + +static void wr_u64_le(u8* p, u64 v) +{ + wr_u32_le(p, (u32)(v & 0xffffffffu)); + wr_u32_le(p + 4, (u32)((v >> 32) & 0xffffffffu)); +} + +void link_reloc_apply(Compiler* c, RelocKind k, u8* P_bytes, + u64 S, i64 A, u64 P) +{ + switch (k) { + case R_ABS32: { + u64 v = S + (u64)A; + wr_u32_le(P_bytes, (u32)(v & 0xffffffffu)); + return; + } + case R_ABS64: { + u64 v = S + (u64)A; + wr_u64_le(P_bytes, v); + return; + } + case R_REL32: { + i64 v = (i64)S + A - (i64)P; + wr_u32_le(P_bytes, (u32)((u64)v & 0xffffffffu)); + return; + } + case R_AARCH64_CALL26: { + /* BL/B imm26 — branch displacement in 4-byte units, signed. + * Clear bits [25:0] of the existing instruction and OR in the + * new imm26. Range check: ±128MiB. */ + i64 disp = (i64)S + A - (i64)P; + u32 instr; + u32 imm26; + if (disp & 3) + compiler_panic(c, no_loc(), + "link: CALL26 misaligned displacement"); + if (disp < -(i64)(1 << 27) || disp >= (i64)(1 << 27)) + compiler_panic(c, no_loc(), + "link: CALL26 out of range (need ±128MiB)"); + imm26 = (u32)((disp >> 2) & 0x3ffffffu); + instr = rd_u32_le(P_bytes); + instr = (instr & 0xfc000000u) | imm26; + wr_u32_le(P_bytes, instr); + return; + } + case R_AARCH64_ADR_PREL_PG_HI21: { + /* ADRP — page-relative imm21, encoded as immlo[30:29] + + * immhi[23:5]. Effective immediate is (S+A) page minus P page, + * shifted right by 12, sign-extended to 33 bits. */ + i64 page_s = ((i64)S + A) & ~(i64)0xfff; + i64 page_p = (i64)P & ~(i64)0xfff; + i64 disp = page_s - page_p; + i64 imm21 = disp >> 12; + u32 instr; + u32 immlo, immhi; + if (imm21 < -(i64)(1 << 20) || imm21 >= (i64)(1 << 20)) + compiler_panic(c, no_loc(), + "link: ADR_PREL_PG_HI21 out of range (need ±4GiB)"); + immlo = (u32)(imm21 & 0x3u); + immhi = (u32)((imm21 >> 2) & 0x7ffffu); + instr = rd_u32_le(P_bytes); + instr = (instr & 0x9f00001fu) | (immlo << 29) | (immhi << 5); + wr_u32_le(P_bytes, instr); + return; + } + case R_AARCH64_ADD_ABS_LO12_NC: { + /* ADD (immediate) imm12 at bits [21:10]. NC = no overflow check. */ + u64 v = ((u64)S + (u64)A) & 0xfffu; + u32 instr = rd_u32_le(P_bytes); + instr = (instr & ~(0xfffu << 10)) | ((u32)v << 10); + wr_u32_le(P_bytes, instr); + return; + } + case R_AARCH64_LDST8_ABS_LO12_NC: + case R_AARCH64_LDST16_ABS_LO12_NC: + case R_AARCH64_LDST32_ABS_LO12_NC: + case R_AARCH64_LDST64_ABS_LO12_NC: + case R_AARCH64_LDST128_ABS_LO12_NC: { + /* LDR/STR with imm12 at bits [21:10]; the imm is scaled by the + * access size, so we right-shift the low 12 bits of (S+A) by + * the size scale before encoding. NC = no overflow check. */ + u32 shift = + (k == R_AARCH64_LDST8_ABS_LO12_NC) ? 0u : + (k == R_AARCH64_LDST16_ABS_LO12_NC) ? 1u : + (k == R_AARCH64_LDST32_ABS_LO12_NC) ? 2u : + (k == R_AARCH64_LDST64_ABS_LO12_NC) ? 3u : 4u; + u64 lo12 = ((u64)S + (u64)A) & 0xfffu; + u64 imm12 = lo12 >> shift; + u32 instr = rd_u32_le(P_bytes); + if (lo12 & ((1u << shift) - 1u)) + compiler_panic(c, no_loc(), + "link: LDST%u_ABS_LO12_NC misaligned address", + 1u << (3 + shift)); + instr = (instr & ~(0xfffu << 10)) | ((u32)(imm12 & 0xfffu) << 10); + wr_u32_le(P_bytes, instr); + return; + } + default: + compiler_panic(c, no_loc(), + "link: unsupported reloc kind %u (this cut implements " + "AArch64 ABS32/64, REL32, CALL26, ADR_PREL_PG_HI21, " + "ADD_ABS_LO12_NC only)", (unsigned)k); + } +}