kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit f80b5dd6d6c855d8baa47ce108d2e859564354fc
parent 516e7b47cb094dd0efd52a836af95a5a45672102
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu,  4 Jun 2026 16:39:01 -0700

link: fix FreeBSD hosted static executables

Diffstat:
Mdriver/lib/hosted.c | 39++++++++++++++++++++++++---------------
Msrc/link/link_layout.c | 126++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------
Msrc/link/link_reloc_layout.c | 155++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
Msrc/obj/elf/link.c | 55++++++++++++++++++++++++++++++++++++++++++-------------
4 files changed, 277 insertions(+), 98 deletions(-)

diff --git a/driver/lib/hosted.c b/driver/lib/hosted.c @@ -435,25 +435,34 @@ static int hosted_resolve_freebsd(const DriverHostedRequest* req, DRIVER_HOSTED_INPUT_OBJECT) != 0) return 1; if (static_link) { + /* FreeBSD 15 split the raw syscall stubs out of libc into libsys; the + * compiler builtins / soft-float helpers (e.g. rv64's binary128 __multf3, + * which libc references because the RISC-V psABI makes long double a + * 128-bit quad) live in libcompiler_rt.a (a.k.a. libgcc.a). libc, libsys + * and libcompiler_rt are mutually recursive, so after the first libc.a we + * append the ones the sysroot provides and re-list libc.a to pick up the + * back-references they introduce -- kit resolves each archive against the + * inputs before it and has no --start-group. */ + int has_libsys = hosted_libdir_has(req->env, dirs, "libsys.a"); + int has_crt = hosted_libdir_has(req->env, dirs, "libcompiler_rt.a"); if (hosted_add_required_search(plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER, req, dirs, "libc.a", DRIVER_HOSTED_INPUT_ARCHIVE) != 0) return 1; - /* FreeBSD 15 split the raw syscall stubs out of libc into libsys; link it - * after libc when the sysroot provides it (pre-15 roots won't have it). - * libc.a and libsys.a are mutually recursive (libc calls the syscall - * stubs in libsys; libsys's stubs call back into libc), so re-list libc.a - * after libsys.a -- kit resolves each archive against the inputs before - * it, so the second occurrence picks up the back-references libsys - * introduces. (Equivalent to GNU ld's `--start-group libc libsys`.) */ - if (hosted_libdir_has(req->env, dirs, "libsys.a") && - (hosted_add_required_search(plan->after, &plan->nafter, - DRIVER_HOSTED_MAX_AFTER, req, dirs, - "libsys.a", DRIVER_HOSTED_INPUT_ARCHIVE) != - 0 || - hosted_add_required_search(plan->after, &plan->nafter, - DRIVER_HOSTED_MAX_AFTER, req, dirs, "libc.a", - DRIVER_HOSTED_INPUT_ARCHIVE) != 0)) + if (has_libsys && + hosted_add_required_search( + plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER, req, dirs, + "libsys.a", DRIVER_HOSTED_INPUT_ARCHIVE) != 0) + return 1; + if (has_crt && hosted_add_required_search(plan->after, &plan->nafter, + DRIVER_HOSTED_MAX_AFTER, req, + dirs, "libcompiler_rt.a", + DRIVER_HOSTED_INPUT_ARCHIVE) != 0) + return 1; + if ((has_libsys || has_crt) && + hosted_add_required_search(plan->after, &plan->nafter, + DRIVER_HOSTED_MAX_AFTER, req, dirs, "libc.a", + DRIVER_HOSTED_INPUT_ARCHIVE) != 0) return 1; } else { if (hosted_add_required_search(plan->after, &plan->nafter, diff --git a/src/link/link_layout.c b/src/link/link_layout.c @@ -28,6 +28,8 @@ LinkImage* link_image_alloc(Compiler*); /* defined in link.c */ +#define LINK_ELF_SHF_COMPRESSED 0x800u + /* Page size used for ELF segment alignment. We pull from env->execmem * when present (matches the eventual JIT mapping granularity) and fall * back to 16 KiB otherwise — large enough for any current Linux/aarch64 @@ -55,8 +57,15 @@ int link_section_kept(const Section* s) { int link_section_kept_fileonly(const Section* s) { /* Non-allocatable .debug_* sections. They get no PT_LOAD segment but * are carried through to the file so addr2line / gdb resolve - * file:line on the linked image. */ - return s && !s->removed && s->kind == SEC_DEBUG; + * file:line on the linked image. + * + * ELF SHF_COMPRESSED debug sections carry compressed bytes but relocation + * offsets refer to the uncompressed DWARF stream. Until the object model has + * a decompression/recompression path, dropping them is the only safe linked + * executable behavior. */ + return s && !s->removed && s->kind == SEC_DEBUG && + !(s->ext_kind == OBJ_EXT_ELF && + (s->ext_flags & LINK_ELF_SHF_COMPRESSED)); } SegBucket link_bucket_for(u16 flags) { @@ -311,54 +320,63 @@ void link_layout_sections(Linker* l, LinkImage* img, const GcLive* g) { u64 seg_bss_extra[SEG_NBUCKETS] = {0}; /* Pass 2: place sections, grouped by name within each bucket and - * in first-occurrence order across groups. */ - for (u32 gi = 0; gi < ngroups; ++gi) { - for (u32 k = groups[gi].head; k != PLACE_NONE; k = entries[k].next) { - PlaceEntry* pe = &entries[k]; - SegBucket bucket = pe->bucket; - - ObjBuilder* ob = LinkInputs_at(&l->inputs, pe->input_idx)->obj; - InputMap* m = &img->input_maps[pe->input_idx]; - const Section* s = obj_section_get(ob, pe->obj_sec_id); - u32 align = s->align ? s->align : 1u; - u64 ofs; - LinkSection* ls; - LinkSectionId lsid; + * in first-occurrence order across groups. NOBITS (.bss/.tbss) sections + * are placed in a second sub-pass so every bucket's file image stays a + * contiguous prefix: ELF requires bss to trail, and for TLS specifically + * a .tbss ahead of .tdata makes the loader copy garbage file bytes as the + * zero-init image (FreeBSD/riscv _init_tls then crashes on a stale TLS + * pointer). */ + for (int bss_phase = 0; bss_phase < 2; ++bss_phase) { + for (u32 gi = 0; gi < ngroups; ++gi) { + for (u32 k = groups[gi].head; k != PLACE_NONE; k = entries[k].next) { + PlaceEntry* pe = &entries[k]; + SegBucket bucket = pe->bucket; + + ObjBuilder* ob = LinkInputs_at(&l->inputs, pe->input_idx)->obj; + InputMap* m = &img->input_maps[pe->input_idx]; + const Section* s = obj_section_get(ob, pe->obj_sec_id); + u32 align = s->align ? s->align : 1u; + u64 ofs; + LinkSection* ls; + LinkSectionId lsid; + int is_bss = (s->sem == SSEM_NOBITS || s->kind == SEC_BSS); + + if (is_bss != bss_phase) continue; + if (is_bss) { + u64 cursor = seg_size[bucket] + seg_bss_extra[bucket]; + cursor = ALIGN_UP(cursor, (u64)(align)); + seg_bss_extra[bucket] = cursor + (u64)pe->size - seg_size[bucket]; + ofs = cursor; + } else { + seg_size[bucket] += seg_bss_extra[bucket]; + seg_bss_extra[bucket] = 0; + ofs = ALIGN_UP(seg_size[bucket], (u64)(align)); + seg_size[bucket] = ofs + (u64)pe->size; + } - if (s->sem == SSEM_NOBITS || s->kind == SEC_BSS) { - u64 cursor = seg_size[bucket] + seg_bss_extra[bucket]; - cursor = ALIGN_UP(cursor, (u64)(align)); - seg_bss_extra[bucket] = cursor + (u64)pe->size - seg_size[bucket]; - ofs = cursor; - } else { - seg_size[bucket] += seg_bss_extra[bucket]; - seg_bss_extra[bucket] = 0; - ofs = ALIGN_UP(seg_size[bucket], (u64)(align)); - seg_size[bucket] = ofs + (u64)pe->size; + if (align > seg_align[bucket]) seg_align[bucket] = align; + seg_count[bucket]++; + + lsid = (LinkSectionId)(img->nsections + 1u); + ls = &img->sections[img->nsections++]; + memset(ls, 0, sizeof(*ls)); + ls->id = lsid; + ls->input_id = LinkInputs_at(&l->inputs, pe->input_idx)->id; + ls->obj_section_id = pe->obj_sec_id; + ls->obj_atom_id = pe->obj_atom_id; + ls->segment_id = LINK_SEG_NONE; + ls->obj_offset = pe->obj_offset; + ls->input_offset = ofs; + ls->file_offset = ofs; + ls->vaddr = ofs; + ls->size = pe->size; + ls->flags = s->flags; + ls->align = align; + ls->name = s->name; + ls->sem = (s->kind == SEC_BSS) ? SSEM_NOBITS : s->sem; + ls->segment_id = (LinkSegmentId)(bucket + 1u); /* 1..3 sentinel */ + map_placed_unit(m, pe->obj_sec_id, pe->obj_atom_id, lsid); } - - if (align > seg_align[bucket]) seg_align[bucket] = align; - seg_count[bucket]++; - - lsid = (LinkSectionId)(img->nsections + 1u); - ls = &img->sections[img->nsections++]; - memset(ls, 0, sizeof(*ls)); - ls->id = lsid; - ls->input_id = LinkInputs_at(&l->inputs, pe->input_idx)->id; - ls->obj_section_id = pe->obj_sec_id; - ls->obj_atom_id = pe->obj_atom_id; - ls->segment_id = LINK_SEG_NONE; - ls->obj_offset = pe->obj_offset; - ls->input_offset = ofs; - ls->file_offset = ofs; - ls->vaddr = ofs; - ls->size = pe->size; - ls->flags = s->flags; - ls->align = align; - ls->name = s->name; - ls->sem = (s->kind == SEC_BSS) ? SSEM_NOBITS : s->sem; - ls->segment_id = (LinkSegmentId)(bucket + 1u); /* 1..3 sentinel */ - map_placed_unit(m, pe->obj_sec_id, pe->obj_atom_id, lsid); } } @@ -879,7 +897,8 @@ static void link_layout_sections_scripted(Linker* l, LinkImage* img, img->segment_bytes[img->nsegments] = (u8*)h->alloc(h, (size_t)file_size_accum, 16); if (!img->segment_bytes[img->nsegments]) - compiler_panic(img->c, SRCLOC_NONE, "link: oom on scripted segment bytes"); + compiler_panic(img->c, SRCLOC_NONE, + "link: oom on scripted segment bytes"); img->segment_bytes_cap[img->nsegments] = (size_t)file_size_accum; memset(img->segment_bytes[img->nsegments], 0, (size_t)file_size_accum); } @@ -1250,7 +1269,14 @@ LinkImage* link_resolve(Linker* l) { link_emit_tls_boundaries(l, img); link_emit_encoding_section_boundaries(l, img); link_emit_boundary_sym(l, img, "__dso_handle", 0); - link_emit_boundary_sym(l, img, "_DYNAMIC", 0); + /* `_DYNAMIC` marks the dynamic section; in a static image it must be + * absolute 0 so libc's static-vs-dynamic probe (FreeBSD's __libc_start1 + * gates _init_tls() on `&_DYNAMIC != NULL`) takes the static path. Only + * define it for dynamic output, where layout_dyn places it at the real + * .dynamic vaddr; for static, the weak undef from crt/libc already + * resolved to SK_ABS 0, and defining it here as a rebased SK_OBJ symbol + * would wrongly make `&_DYNAMIC` non-zero. */ + if (l->emit_pie) link_emit_boundary_sym(l, img, "_DYNAMIC", 0); link_emit_boundary_sym(l, img, "_GLOBAL_OFFSET_TABLE_", 0); /* PE/COFF: mingw CRT references `__ImageBase` for ASLR-relative * addressing and base-relocation bookkeeping. The PE emitter diff --git a/src/link/link_reloc_layout.c b/src/link/link_reloc_layout.c @@ -23,6 +23,7 @@ #include "link/link.h" #include "link/link_arch.h" #include "link/link_internal.h" +#include "obj/format.h" /* Nominal (non-zero) width reported for the variable-length ULEB128 * RISC-V relocs. See the comment in reloc_width(): this value only has @@ -266,6 +267,7 @@ static u8 reloc_width(RelocKind k) { case R_X64_GOTPCRELX: case R_X64_REX_GOTPCRELX: case R_X64_GOTPC32: + case R_X64_GOTTPOFF: return 4; case R_ABS64: case R_REL64: @@ -296,6 +298,8 @@ static u8 reloc_width(RelocKind k) { case R_AARCH64_LDST128_ABS_LO12_NC: case R_AARCH64_ADR_GOT_PAGE: case R_AARCH64_LD64_GOT_LO12_NC: + case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: + case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: case R_AARCH64_TLSLE_ADD_TPREL_HI12: case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: case R_AARCH64_TLVP_LOAD_PAGE21: @@ -368,6 +372,22 @@ static u8 reloc_width(RelocKind k) { } } +/* TLS Initial-Exec relocs that load a GOT slot holding the symbol's + * TP-relative offset (rather than its address). They take an ordinary GOT + * slot, but the slot is filled with the tpoff value at link time -- see the + * slot_is_tls handling in link_layout_got. */ +static int reloc_is_tls_got(u16 kind) { + switch (kind) { + case R_X64_GOTTPOFF: + case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: + case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: + case R_RV_TLS_GOT_HI20: + return 1; + default: + return 0; + } +} + static int reloc_uses_got(u16 kind) { switch (kind) { case R_AARCH64_ADR_GOT_PAGE: @@ -378,7 +398,7 @@ static int reloc_uses_got(u16 kind) { case R_RV_GOT_HI20: return 1; default: - return 0; + return reloc_is_tls_got(kind); } } @@ -430,8 +450,8 @@ u32 link_iplt_alloc_sections(LinkImage* img, u32 nsec) { * max segment end, so a region appended after another lands just past it * (page-aligned), matching the hand-rolled ALIGN_UP(prev_end, page). */ LinkSectionId link_synth_region(LinkImage* img, Linker* l, Sym name, u16 perms, - u16 sem, u64 size, u32 sec_align, u64* out_vaddr, - u8** out_bytes) { + u16 sem, u64 size, u32 sec_align, + u64* out_vaddr, u8** out_bytes) { Heap* h = img->heap; u64 page = link_layout_page_size(l); u64 base_vaddr = 0; @@ -460,7 +480,8 @@ LinkSectionId link_synth_region(LinkImage* img, Linker* l, Sym name, u16 perms, bytes = (u8*)h->alloc(h, (size_t)size, 16); img->segment_bytes[seg_idx] = bytes; img->segment_bytes_cap[seg_idx] = (size_t)size; - if (!bytes) compiler_panic(img->c, SRCLOC_NONE, "link: oom on synth region bytes"); + if (!bytes) + compiler_panic(img->c, SRCLOC_NONE, "link: oom on synth region bytes"); memset(bytes, 0, (size_t)size); img->nsegments += 1u; @@ -512,8 +533,8 @@ void link_emit_internal_abs64(LinkImage* img, LinkSectionId lsid, u32 offset, * `slot_target` GOT slot. */ static void emit_stub_apply_relocs(LinkImage* img, LinkSectionId stub_lsid, u32 stub_base_offset, u64 stub_vaddr, - const LinkArchIPltReloc* relocs, - u32 nrelocs, LinkSymId slot_target) { + const LinkArchIPltReloc* relocs, u32 nrelocs, + LinkSymId slot_target) { u32 ri; for (ri = 0; ri < nrelocs; ++ri) { LinkRelocApply rrec; @@ -642,9 +663,8 @@ void link_layout_jit_stubs(Linker* l, LinkImage* img, u32 map_size, stub_id = link_append_symbol(img, &stub_rec); stub_map[orig] = stub_id; - emit_stub_apply_relocs(img, stubs_sec_id, - (u32)(i * arch->iplt_stub_size), stub_vaddr, - stub_relocs, nstub_relocs, slot_id); + emit_stub_apply_relocs(img, stubs_sec_id, (u32)(i * arch->iplt_stub_size), + stub_vaddr, stub_relocs, nstub_relocs, slot_id); link_emit_internal_abs64(img, slots_sec_id, (u32)(i * 8u), slot_vaddr, resolver_id); @@ -656,12 +676,39 @@ void link_layout_jit_stubs(Linker* l, LinkImage* img, u32 map_size, /* ---- pass 3c: GOT layout ---- */ +/* Fill a TLS Initial-Exec GOT slot with the target's TP-relative offset. + * Emitted as an internal raw-64-bit tpoff reloc so apply_all_relocs computes + * the offset in the same coordinate system it uses for ordinary local-exec + * sites: x86_64 variant II (X - tls_memsz) via R_X64_TPOFF64, AArch64/RISC-V + * variant I ((X - tls_vaddr) + TCB) via R_AARCH64_TPOFF64. The slot is then + * loaded by the GOTTPOFF / GOTTPREL site. */ +static void link_emit_internal_tpoff64(LinkImage* img, Linker* l, + LinkSectionId lsid, u32 offset, + u64 write_vaddr, LinkSymId target) { + LinkRelocApply rrec; + memset(&rrec, 0, sizeof(rrec)); + rrec.input_id = LINK_INPUT_NONE; + rrec.section_id = OBJ_SEC_NONE; + rrec.link_section_id = lsid; + rrec.offset = offset; + rrec.width = 8; + rrec.write_vaddr = write_vaddr; + rrec.write_file_offset = write_vaddr; + rrec.kind = (l->c->target.arch == KIT_ARCH_X86_64) ? R_X64_TPOFF64 + : R_AARCH64_TPOFF64; + rrec.target = target; + rrec.addend = 0; + *link_append_reloc_slot(img) = rrec; +} + void link_layout_got(Linker* l, LinkImage* img, u32 map_size, LinkSymId** got_map_out) { Heap* h = img->heap; LinkSymId* got_map; LinkSymId* slot_targets = NULL; + u8* slot_is_tls = NULL; u32 slot_cap = 0; + u32 tls_cap = 0; u32 nslot = 0; u32 ii, k; u64 base_vaddr; @@ -691,10 +738,18 @@ void link_layout_got(Linker* l, LinkImage* img, u32 map_size, if (r->sym == OBJ_SYM_NONE || r->sym >= m->nsym) continue; target = m->sym[r->sym]; if (target == LINK_SYM_NONE) continue; - if (got_map[target] != LINK_SYM_NONE) continue; + if (got_map[target] != LINK_SYM_NONE) { + /* A later reloc on the same target may reveal it is a TLS slot + * even if the slot was created by a non-TLS reference first. */ + if (reloc_is_tls_got(r->kind)) slot_is_tls[got_map[target] - 1u] = 1u; + continue; + } if (VEC_GROW(h, slot_targets, slot_cap, nslot + 1u)) compiler_panic(img->c, SRCLOC_NONE, "link: oom on got slot list"); + if (VEC_GROW(h, slot_is_tls, tls_cap, nslot + 1u)) + compiler_panic(img->c, SRCLOC_NONE, "link: oom on got slot tls map"); slot_targets[nslot] = target; + slot_is_tls[nslot] = reloc_is_tls_got(r->kind) ? 1u : 0u; got_map[target] = (LinkSymId)(nslot + 1u); nslot++; } @@ -703,6 +758,7 @@ void link_layout_got(Linker* l, LinkImage* img, u32 map_size, if (nslot == 0) { if (slot_targets) h->free(h, slot_targets, sizeof(*slot_targets) * slot_cap); + if (slot_is_tls) h->free(h, slot_is_tls, tls_cap); h->free(h, got_map, sizeof(*got_map) * map_size); return; } @@ -731,16 +787,30 @@ void link_layout_got(Linker* l, LinkImage* img, u32 map_size, slot_id = link_append_symbol(img, &sym_rec); got_map[orig] = slot_id; - link_emit_internal_abs64(img, got_sec_id, (u32)(si * 8u), slot_vaddr, orig); + if (slot_is_tls[si]) + link_emit_internal_tpoff64(img, l, got_sec_id, (u32)(si * 8u), slot_vaddr, + orig); + else + link_emit_internal_abs64(img, got_sec_id, (u32)(si * 8u), slot_vaddr, + orig); } if (slot_targets) h->free(h, slot_targets, sizeof(*slot_targets) * slot_cap); + if (slot_is_tls) h->free(h, slot_is_tls, tls_cap); *got_map_out = got_map; } /* ---- pass 3d: STT_GNU_IFUNC trampoline ---- */ +/* The arch's R_*_IRELATIVE wire type, for the static __rela_iplt table. */ +static u32 link_elf_irelative_type(Compiler* c) { + const ObjFormatImpl* fmt = obj_format_lookup(KIT_OBJ_ELF); + const ObjElfArchOps* ao = + (fmt && fmt->elf_arch) ? fmt->elf_arch(c->target.arch) : NULL; + return ao ? ao->r_irelative : 0u; +} + void link_layout_iplt(Linker* l, LinkImage* img) { Heap* h = img->heap; u32 i; @@ -751,12 +821,27 @@ void link_layout_iplt(Linker* l, LinkImage* img) { LinkSectionId iplt_sec_id, igot_sec_id, pairs_sec_id, init_sec_id = 0; u8* iplt_bytes; u32 slot_idx; - int emit_init_array = l->emit_static_exe; + /* FreeBSD's crt resolves static IFUNCs by walking [__rela_iplt_start, + * __rela_iplt_end) of R_*_IRELATIVE relocs in __libc_start1, *before* + * _init_tls -- earlier than any (pre)init ctor can run, which matters + * because _init_tls allocates through malloc -> getenv -> IFUNC string + * ops. Emit that standard table (also the glibc mechanism) instead of the + * ctor-based __kit_ifunc_init path for hosted FreeBSD static links. Other + * targets (musl/freestanding) keep the ctor, whose crt does not walk + * __rela_iplt. */ + int use_rela_iplt = l->emit_static_exe && l->c->target.os == KIT_OS_FREEBSD && + l->c->target.obj == KIT_OBJ_ELF; + int emit_init_array = l->emit_static_exe && !use_rela_iplt; + LinkSectionId rela_iplt_sec_id = 0; + u64 rela_iplt_vaddr = 0, rela_iplt_size = 0; + u8* rela_iplt_bytes = NULL; + u32 irelative_type = use_rela_iplt ? link_elf_irelative_type(l->c) : 0u; LinkSymId ifunc_init_sym = LINK_SYM_NONE; Sym ifunc_init_name = 0; const LinkArchDesc* arch = link_arch_desc_for(l->c); if (!arch) - compiler_panic(img->c, SRCLOC_NONE, "link: layout_iplt: no arch descriptor"); + compiler_panic(img->c, SRCLOC_NONE, + "link: layout_iplt: no arch descriptor"); for (i = 0; i < LinkSyms_count(&img->syms); ++i) { const LinkSymbol* s = LinkSyms_at(&img->syms, i); @@ -791,7 +876,8 @@ void link_layout_iplt(Linker* l, LinkImage* img) { iplt_sec_id = link_synth_region( img, l, pool_intern_slice(l->c->global, SLICE_LIT(".iplt")), - SF_ALLOC | SF_EXEC, SSEM_PROGBITS, iplt_size, 4, &iplt_vaddr, &iplt_bytes); + SF_ALLOC | SF_EXEC, SSEM_PROGBITS, iplt_size, 4, &iplt_vaddr, + &iplt_bytes); igot_sec_id = link_synth_region( img, l, pool_intern_slice(l->c->global, SLICE_LIT(".igot.plt")), SF_ALLOC | SF_WRITE, SSEM_PROGBITS, igot_size, 8, &igot_vaddr, NULL); @@ -799,14 +885,26 @@ void link_layout_iplt(Linker* l, LinkImage* img) { img, l, pool_intern_slice(l->c->global, SLICE_LIT(".iplt.pairs")), SF_ALLOC | SF_WRITE, SSEM_PROGBITS, pairs_size, 8, &pairs_vaddr, NULL); if (emit_init_array) - init_sec_id = - link_synth_region(img, l, obj_secname_preinit_array(l->c), - SF_ALLOC | SF_WRITE, SSEM_PREINIT_ARRAY, init_size, 8, - &init_vaddr, NULL); + init_sec_id = link_synth_region(img, l, obj_secname_preinit_array(l->c), + SF_ALLOC | SF_WRITE, SSEM_PREINIT_ARRAY, + init_size, 8, &init_vaddr, NULL); link_emit_boundary_sym(l, img, "__start_iplt_pairs", pairs_vaddr); link_emit_boundary_sym(l, img, "__stop_iplt_pairs", pairs_vaddr + pairs_size); + if (use_rela_iplt) { + /* One Elf64_Rela (24 bytes) per IFUNC: r_offset = igot slot, r_info = + * IRELATIVE, r_addend = resolver. crt walks the bracketed range. */ + rela_iplt_size = (u64)nifunc * 24u; + rela_iplt_sec_id = link_synth_region( + img, l, pool_intern_slice(l->c->global, SLICE_LIT(".rela.plt")), + SF_ALLOC, SSEM_PROGBITS, rela_iplt_size, 8, &rela_iplt_vaddr, + &rela_iplt_bytes); + link_emit_boundary_sym(l, img, "__rela_iplt_start", rela_iplt_vaddr); + link_emit_boundary_sym(l, img, "__rela_iplt_end", + rela_iplt_vaddr + rela_iplt_size); + } + img->iplt_pairs = (u64*)h->alloc( h, sizeof(*img->iplt_pairs) * 2u * (size_t)nifunc, _Alignof(u64)); if (!img->iplt_pairs) @@ -881,6 +979,21 @@ void link_layout_iplt(Linker* l, LinkImage* img) { link_emit_internal_abs64(img, pairs_sec_id, (u32)(slot_idx * 16u + 8u), pair_vaddr + 8u, slot_id); + if (use_rela_iplt) { + /* Elf64_Rela: r_offset(+0)=slot, r_info(+8)=IRELATIVE, r_addend(+16)= + * resolver. r_info is a fixed constant (sym index 0); r_offset and + * r_addend are filled by internal ABS64 relocs so they pick up the + * final (post-shift) vaddrs. */ + u64 rela_off = (u64)slot_idx * 24u; + u64 rela_ent_vaddr = rela_iplt_vaddr + rela_off; + if (rela_iplt_bytes) + wr_u64_le(rela_iplt_bytes + rela_off + 8u, (u64)irelative_type); + link_emit_internal_abs64(img, rela_iplt_sec_id, (u32)rela_off, + rela_ent_vaddr, slot_id); + link_emit_internal_abs64(img, rela_iplt_sec_id, (u32)(rela_off + 16u), + rela_ent_vaddr + 16u, resolver_id); + } + s->kind = SK_FUNC; s->section_id = iplt_sec_id; s->value = (u64)slot_idx * (u64)arch->iplt_stub_size; @@ -964,7 +1077,8 @@ void link_emit_relocations(Linker* l, LinkImage* img, const LinkSymId* got_map, r->kind == R_RV_ALIGN) continue; if (r->sym == OBJ_SYM_NONE || r->sym >= m->nsym) - compiler_panic(l->c, SRCLOC_NONE, "link: reloc references unknown symbol"); + compiler_panic(l->c, SRCLOC_NONE, + "link: reloc references unknown symbol"); target = m->sym[r->sym]; if (target == LINK_SYM_NONE) compiler_panic(l->c, SRCLOC_NONE, @@ -972,7 +1086,8 @@ void link_emit_relocations(Linker* l, LinkImage* img, const LinkSymId* got_map, if (got_map && reloc_uses_got(r->kind)) { LinkSymId slot = got_map[target]; if (slot == LINK_SYM_NONE) - compiler_panic(l->c, SRCLOC_NONE, "link: GOT slot missing for symbol"); + compiler_panic(l->c, SRCLOC_NONE, + "link: GOT slot missing for symbol"); target = slot; } if (stub_map && arch && arch->needs_jit_call_stub && diff --git a/src/obj/elf/link.c b/src/obj/elf/link.c @@ -349,8 +349,22 @@ static void shift_image_addresses(LinkImage* img, u64 delta) { static int reloc_is_tlsle(RelocKind k) { return k == R_AARCH64_TLSLE_ADD_TPREL_HI12 || - k == R_AARCH64_TLSLE_ADD_TPREL_LO12_NC || k == R_RV_TPREL_HI20 || - k == R_RV_TPREL_LO12_I || k == R_RV_TPREL_LO12_S; + k == R_AARCH64_TLSLE_ADD_TPREL_LO12_NC || k == R_AARCH64_TPOFF64 || + k == R_RV_TPREL_HI20 || k == R_RV_TPREL_LO12_I || + k == R_RV_TPREL_LO12_S; +} + +/* Variant-I TP bias: distance from the TLS image start to where `tp` points. + * - AArch64 (AAPCS64): tp points at a 16-byte TCB ahead of the image -> +16 + * for both hosted and freestanding. + * - RISC-V: the psABI points tp at the *start* of the image, so hosted libcs + * (FreeBSD/Linux _init_tls) want +0; kit's own freestanding start.c places + * a 16-byte TCB ahead of .tdata and biases tp to match AArch64, so + * freestanding rv64/rv32 keep +16. */ +static u64 tls_tcb_bias(Compiler* c) { + if (c->target.arch == KIT_ARCH_RV64 || c->target.arch == KIT_ARCH_RV32) + return c->target.os == KIT_OS_FREESTANDING ? TLS_TCB_SIZE : 0ull; + return TLS_TCB_SIZE; } /* x86_64 SysV ABI: TLS variant II — the per-thread TLS image sits at @@ -426,7 +440,9 @@ static i64 rv_pcrel_lo12_disp(LinkImage* img, u64 auipc_vaddr, u64 img_base) { const LinkRelocApply* hi = LinkRelocs_at(&img->relocs, i); const LinkSymbol* hi_tgt; u64 hi_S, hi_P; - if (hi->kind != R_RV_PCREL_HI20 && hi->kind != R_RV_GOT_HI20) continue; + if (hi->kind != R_RV_PCREL_HI20 && hi->kind != R_RV_GOT_HI20 && + hi->kind != R_RV_TLS_GOT_HI20) + continue; if (hi->write_vaddr + img_base != auipc_vaddr) continue; hi_tgt = LinkSyms_at(&img->syms, hi->target - 1); hi_S = (hi_tgt->kind == SK_ABS) ? hi_tgt->vaddr : hi_tgt->vaddr + img_base; @@ -468,17 +484,23 @@ static void apply_all_relocs(LinkImage* img, u64 img_base) { } seg = &img->segments[sec->segment_id - 1]; if (reloc_is_tlsle(r->kind)) { - /* S is the target's TP-relative offset: distance from the - * TLS image start plus the 16-byte TCB. Both vaddrs are - * in the same (post-shift, image-relative) coordinate - * system, so img_base cancels out. */ - S = (tgt->vaddr - img->tls_vaddr) + TLS_TCB_SIZE; + /* S is the target's TP-relative offset: distance from the TLS image + * start plus the arch/OS TCB bias (see tls_tcb_bias). Both vaddrs are + * in the same (post-shift, image-relative) coordinate system, so + * img_base cancels out. */ + S = (tgt->vaddr - img->tls_vaddr) + tls_tcb_bias(img->c); } else if (reloc_is_x64_tlsle(r->kind)) { - /* x86_64 variant II: TP points just past the TLS image, so a - * symbol at offset X within the image is at TP-relative offset - * (X - tls_memsz). Cast through i64/u64 so the reloc apply - * writes the full 32- or 64-bit signed value. */ - i64 off = (i64)(tgt->vaddr - img->tls_vaddr) - (i64)img->tls_memsz; + /* x86_64 variant II: TP points just past the TLS image, so a symbol at + * offset X within the image is at TP-relative offset (X - tls_size). + * The runtime (FreeBSD/glibc _init_tls) allocates the block rounded up + * to the TLS alignment, so tls_size must be round_up(memsz, align) -- + * using the raw memsz is off by the rounding remainder whenever memsz + * is not a multiple of align, handing back a garbage TLS address (e.g. + * jemalloc's tsd, faulting non-canonical). Cast through i64/u64 so the + * apply writes the full 32- or 64-bit signed value. */ + u64 a = img->tls_align ? img->tls_align : 1u; + u64 tls_size = (img->tls_memsz + a - 1u) & ~(a - 1u); + i64 off = (i64)(tgt->vaddr - img->tls_vaddr) - (i64)tls_size; S = (u64)off; } else if (r->kind == R_RV_PCREL_LO12_I || r->kind == R_RV_PCREL_LO12_S) { /* PCREL_LO12: rewrite S so that link_reloc_apply's existing @@ -1432,6 +1454,13 @@ void link_emit_elf(LinkImage* img, Writer* w) { ehdr.e_ident[5] = ELFDATA2LSB; ehdr.e_ident[6] = EV_CURRENT; ehdr.e_ident[7] = ELFOSABI_NONE; + /* Brand FreeBSD executables with EI_OSABI=ELFOSABI_FREEBSD; the kernel + * matches that brand directly. Without it a static binary is rejected with + * ENOEXEC -- the FreeBSD ABI note crt1.o carries is not sufficient on its + * own for kit's images (the kernel's note scan does not recognize the + * layout), so we set the OSABI on every arch (FreeBSD/clang only sets it on + * amd64/aarch64, but the riscv64 kernel accepts it too). */ + if (img->c->target.os == KIT_OS_FREEBSD) ehdr.e_ident[7] = ELFOSABI_FREEBSD; ehdr.e_type = pie ? ET_DYN : ET_EXEC; ehdr.e_machine = (u16)e_machine; ehdr.e_version = EV_CURRENT;