commit f80b5dd6d6c855d8baa47ce108d2e859564354fc
parent 516e7b47cb094dd0efd52a836af95a5a45672102
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 4 Jun 2026 16:39:01 -0700
link: fix FreeBSD hosted static executables
Diffstat:
4 files changed, 277 insertions(+), 98 deletions(-)
diff --git a/driver/lib/hosted.c b/driver/lib/hosted.c
@@ -435,25 +435,34 @@ static int hosted_resolve_freebsd(const DriverHostedRequest* req,
DRIVER_HOSTED_INPUT_OBJECT) != 0)
return 1;
if (static_link) {
+ /* FreeBSD 15 split the raw syscall stubs out of libc into libsys; the
+ * compiler builtins / soft-float helpers (e.g. rv64's binary128 __multf3,
+ * which libc references because the RISC-V psABI makes long double a
+ * 128-bit quad) live in libcompiler_rt.a (a.k.a. libgcc.a). libc, libsys
+ * and libcompiler_rt are mutually recursive, so after the first libc.a we
+ * append the ones the sysroot provides and re-list libc.a to pick up the
+ * back-references they introduce -- kit resolves each archive against the
+ * inputs before it and has no --start-group. */
+ int has_libsys = hosted_libdir_has(req->env, dirs, "libsys.a");
+ int has_crt = hosted_libdir_has(req->env, dirs, "libcompiler_rt.a");
if (hosted_add_required_search(plan->after, &plan->nafter,
DRIVER_HOSTED_MAX_AFTER, req, dirs, "libc.a",
DRIVER_HOSTED_INPUT_ARCHIVE) != 0)
return 1;
- /* FreeBSD 15 split the raw syscall stubs out of libc into libsys; link it
- * after libc when the sysroot provides it (pre-15 roots won't have it).
- * libc.a and libsys.a are mutually recursive (libc calls the syscall
- * stubs in libsys; libsys's stubs call back into libc), so re-list libc.a
- * after libsys.a -- kit resolves each archive against the inputs before
- * it, so the second occurrence picks up the back-references libsys
- * introduces. (Equivalent to GNU ld's `--start-group libc libsys`.) */
- if (hosted_libdir_has(req->env, dirs, "libsys.a") &&
- (hosted_add_required_search(plan->after, &plan->nafter,
- DRIVER_HOSTED_MAX_AFTER, req, dirs,
- "libsys.a", DRIVER_HOSTED_INPUT_ARCHIVE) !=
- 0 ||
- hosted_add_required_search(plan->after, &plan->nafter,
- DRIVER_HOSTED_MAX_AFTER, req, dirs, "libc.a",
- DRIVER_HOSTED_INPUT_ARCHIVE) != 0))
+ if (has_libsys &&
+ hosted_add_required_search(
+ plan->after, &plan->nafter, DRIVER_HOSTED_MAX_AFTER, req, dirs,
+ "libsys.a", DRIVER_HOSTED_INPUT_ARCHIVE) != 0)
+ return 1;
+ if (has_crt && hosted_add_required_search(plan->after, &plan->nafter,
+ DRIVER_HOSTED_MAX_AFTER, req,
+ dirs, "libcompiler_rt.a",
+ DRIVER_HOSTED_INPUT_ARCHIVE) != 0)
+ return 1;
+ if ((has_libsys || has_crt) &&
+ hosted_add_required_search(plan->after, &plan->nafter,
+ DRIVER_HOSTED_MAX_AFTER, req, dirs, "libc.a",
+ DRIVER_HOSTED_INPUT_ARCHIVE) != 0)
return 1;
} else {
if (hosted_add_required_search(plan->after, &plan->nafter,
diff --git a/src/link/link_layout.c b/src/link/link_layout.c
@@ -28,6 +28,8 @@
LinkImage* link_image_alloc(Compiler*); /* defined in link.c */
+#define LINK_ELF_SHF_COMPRESSED 0x800u
+
/* Page size used for ELF segment alignment. We pull from env->execmem
* when present (matches the eventual JIT mapping granularity) and fall
* back to 16 KiB otherwise — large enough for any current Linux/aarch64
@@ -55,8 +57,15 @@ int link_section_kept(const Section* s) {
int link_section_kept_fileonly(const Section* s) {
/* Non-allocatable .debug_* sections. They get no PT_LOAD segment but
* are carried through to the file so addr2line / gdb resolve
- * file:line on the linked image. */
- return s && !s->removed && s->kind == SEC_DEBUG;
+ * file:line on the linked image.
+ *
+ * ELF SHF_COMPRESSED debug sections carry compressed bytes but relocation
+ * offsets refer to the uncompressed DWARF stream. Until the object model has
+ * a decompression/recompression path, dropping them is the only safe linked
+ * executable behavior. */
+ return s && !s->removed && s->kind == SEC_DEBUG &&
+ !(s->ext_kind == OBJ_EXT_ELF &&
+ (s->ext_flags & LINK_ELF_SHF_COMPRESSED));
}
SegBucket link_bucket_for(u16 flags) {
@@ -311,54 +320,63 @@ void link_layout_sections(Linker* l, LinkImage* img, const GcLive* g) {
u64 seg_bss_extra[SEG_NBUCKETS] = {0};
/* Pass 2: place sections, grouped by name within each bucket and
- * in first-occurrence order across groups. */
- for (u32 gi = 0; gi < ngroups; ++gi) {
- for (u32 k = groups[gi].head; k != PLACE_NONE; k = entries[k].next) {
- PlaceEntry* pe = &entries[k];
- SegBucket bucket = pe->bucket;
-
- ObjBuilder* ob = LinkInputs_at(&l->inputs, pe->input_idx)->obj;
- InputMap* m = &img->input_maps[pe->input_idx];
- const Section* s = obj_section_get(ob, pe->obj_sec_id);
- u32 align = s->align ? s->align : 1u;
- u64 ofs;
- LinkSection* ls;
- LinkSectionId lsid;
+ * in first-occurrence order across groups. NOBITS (.bss/.tbss) sections
+ * are placed in a second sub-pass so every bucket's file image stays a
+ * contiguous prefix: ELF requires bss to trail, and for TLS specifically
+ * a .tbss ahead of .tdata makes the loader copy garbage file bytes as the
+ * zero-init image (FreeBSD/riscv _init_tls then crashes on a stale TLS
+ * pointer). */
+ for (int bss_phase = 0; bss_phase < 2; ++bss_phase) {
+ for (u32 gi = 0; gi < ngroups; ++gi) {
+ for (u32 k = groups[gi].head; k != PLACE_NONE; k = entries[k].next) {
+ PlaceEntry* pe = &entries[k];
+ SegBucket bucket = pe->bucket;
+
+ ObjBuilder* ob = LinkInputs_at(&l->inputs, pe->input_idx)->obj;
+ InputMap* m = &img->input_maps[pe->input_idx];
+ const Section* s = obj_section_get(ob, pe->obj_sec_id);
+ u32 align = s->align ? s->align : 1u;
+ u64 ofs;
+ LinkSection* ls;
+ LinkSectionId lsid;
+ int is_bss = (s->sem == SSEM_NOBITS || s->kind == SEC_BSS);
+
+ if (is_bss != bss_phase) continue;
+ if (is_bss) {
+ u64 cursor = seg_size[bucket] + seg_bss_extra[bucket];
+ cursor = ALIGN_UP(cursor, (u64)(align));
+ seg_bss_extra[bucket] = cursor + (u64)pe->size - seg_size[bucket];
+ ofs = cursor;
+ } else {
+ seg_size[bucket] += seg_bss_extra[bucket];
+ seg_bss_extra[bucket] = 0;
+ ofs = ALIGN_UP(seg_size[bucket], (u64)(align));
+ seg_size[bucket] = ofs + (u64)pe->size;
+ }
- if (s->sem == SSEM_NOBITS || s->kind == SEC_BSS) {
- u64 cursor = seg_size[bucket] + seg_bss_extra[bucket];
- cursor = ALIGN_UP(cursor, (u64)(align));
- seg_bss_extra[bucket] = cursor + (u64)pe->size - seg_size[bucket];
- ofs = cursor;
- } else {
- seg_size[bucket] += seg_bss_extra[bucket];
- seg_bss_extra[bucket] = 0;
- ofs = ALIGN_UP(seg_size[bucket], (u64)(align));
- seg_size[bucket] = ofs + (u64)pe->size;
+ if (align > seg_align[bucket]) seg_align[bucket] = align;
+ seg_count[bucket]++;
+
+ lsid = (LinkSectionId)(img->nsections + 1u);
+ ls = &img->sections[img->nsections++];
+ memset(ls, 0, sizeof(*ls));
+ ls->id = lsid;
+ ls->input_id = LinkInputs_at(&l->inputs, pe->input_idx)->id;
+ ls->obj_section_id = pe->obj_sec_id;
+ ls->obj_atom_id = pe->obj_atom_id;
+ ls->segment_id = LINK_SEG_NONE;
+ ls->obj_offset = pe->obj_offset;
+ ls->input_offset = ofs;
+ ls->file_offset = ofs;
+ ls->vaddr = ofs;
+ ls->size = pe->size;
+ ls->flags = s->flags;
+ ls->align = align;
+ ls->name = s->name;
+ ls->sem = (s->kind == SEC_BSS) ? SSEM_NOBITS : s->sem;
+ ls->segment_id = (LinkSegmentId)(bucket + 1u); /* 1..3 sentinel */
+ map_placed_unit(m, pe->obj_sec_id, pe->obj_atom_id, lsid);
}
-
- if (align > seg_align[bucket]) seg_align[bucket] = align;
- seg_count[bucket]++;
-
- lsid = (LinkSectionId)(img->nsections + 1u);
- ls = &img->sections[img->nsections++];
- memset(ls, 0, sizeof(*ls));
- ls->id = lsid;
- ls->input_id = LinkInputs_at(&l->inputs, pe->input_idx)->id;
- ls->obj_section_id = pe->obj_sec_id;
- ls->obj_atom_id = pe->obj_atom_id;
- ls->segment_id = LINK_SEG_NONE;
- ls->obj_offset = pe->obj_offset;
- ls->input_offset = ofs;
- ls->file_offset = ofs;
- ls->vaddr = ofs;
- ls->size = pe->size;
- ls->flags = s->flags;
- ls->align = align;
- ls->name = s->name;
- ls->sem = (s->kind == SEC_BSS) ? SSEM_NOBITS : s->sem;
- ls->segment_id = (LinkSegmentId)(bucket + 1u); /* 1..3 sentinel */
- map_placed_unit(m, pe->obj_sec_id, pe->obj_atom_id, lsid);
}
}
@@ -879,7 +897,8 @@ static void link_layout_sections_scripted(Linker* l, LinkImage* img,
img->segment_bytes[img->nsegments] =
(u8*)h->alloc(h, (size_t)file_size_accum, 16);
if (!img->segment_bytes[img->nsegments])
- compiler_panic(img->c, SRCLOC_NONE, "link: oom on scripted segment bytes");
+ compiler_panic(img->c, SRCLOC_NONE,
+ "link: oom on scripted segment bytes");
img->segment_bytes_cap[img->nsegments] = (size_t)file_size_accum;
memset(img->segment_bytes[img->nsegments], 0, (size_t)file_size_accum);
}
@@ -1250,7 +1269,14 @@ LinkImage* link_resolve(Linker* l) {
link_emit_tls_boundaries(l, img);
link_emit_encoding_section_boundaries(l, img);
link_emit_boundary_sym(l, img, "__dso_handle", 0);
- link_emit_boundary_sym(l, img, "_DYNAMIC", 0);
+ /* `_DYNAMIC` marks the dynamic section; in a static image it must be
+ * absolute 0 so libc's static-vs-dynamic probe (FreeBSD's __libc_start1
+ * gates _init_tls() on `&_DYNAMIC != NULL`) takes the static path. Only
+ * define it for dynamic output, where layout_dyn places it at the real
+ * .dynamic vaddr; for static, the weak undef from crt/libc already
+ * resolved to SK_ABS 0, and defining it here as a rebased SK_OBJ symbol
+ * would wrongly make `&_DYNAMIC` non-zero. */
+ if (l->emit_pie) link_emit_boundary_sym(l, img, "_DYNAMIC", 0);
link_emit_boundary_sym(l, img, "_GLOBAL_OFFSET_TABLE_", 0);
/* PE/COFF: mingw CRT references `__ImageBase` for ASLR-relative
* addressing and base-relocation bookkeeping. The PE emitter
diff --git a/src/link/link_reloc_layout.c b/src/link/link_reloc_layout.c
@@ -23,6 +23,7 @@
#include "link/link.h"
#include "link/link_arch.h"
#include "link/link_internal.h"
+#include "obj/format.h"
/* Nominal (non-zero) width reported for the variable-length ULEB128
* RISC-V relocs. See the comment in reloc_width(): this value only has
@@ -266,6 +267,7 @@ static u8 reloc_width(RelocKind k) {
case R_X64_GOTPCRELX:
case R_X64_REX_GOTPCRELX:
case R_X64_GOTPC32:
+ case R_X64_GOTTPOFF:
return 4;
case R_ABS64:
case R_REL64:
@@ -296,6 +298,8 @@ static u8 reloc_width(RelocKind k) {
case R_AARCH64_LDST128_ABS_LO12_NC:
case R_AARCH64_ADR_GOT_PAGE:
case R_AARCH64_LD64_GOT_LO12_NC:
+ case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
+ case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
case R_AARCH64_TLSLE_ADD_TPREL_HI12:
case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
case R_AARCH64_TLVP_LOAD_PAGE21:
@@ -368,6 +372,22 @@ static u8 reloc_width(RelocKind k) {
}
}
+/* TLS Initial-Exec relocs that load a GOT slot holding the symbol's
+ * TP-relative offset (rather than its address). They take an ordinary GOT
+ * slot, but the slot is filled with the tpoff value at link time -- see the
+ * slot_is_tls handling in link_layout_got. */
+static int reloc_is_tls_got(u16 kind) {
+ switch (kind) {
+ case R_X64_GOTTPOFF:
+ case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
+ case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
+ case R_RV_TLS_GOT_HI20:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
static int reloc_uses_got(u16 kind) {
switch (kind) {
case R_AARCH64_ADR_GOT_PAGE:
@@ -378,7 +398,7 @@ static int reloc_uses_got(u16 kind) {
case R_RV_GOT_HI20:
return 1;
default:
- return 0;
+ return reloc_is_tls_got(kind);
}
}
@@ -430,8 +450,8 @@ u32 link_iplt_alloc_sections(LinkImage* img, u32 nsec) {
* max segment end, so a region appended after another lands just past it
* (page-aligned), matching the hand-rolled ALIGN_UP(prev_end, page). */
LinkSectionId link_synth_region(LinkImage* img, Linker* l, Sym name, u16 perms,
- u16 sem, u64 size, u32 sec_align, u64* out_vaddr,
- u8** out_bytes) {
+ u16 sem, u64 size, u32 sec_align,
+ u64* out_vaddr, u8** out_bytes) {
Heap* h = img->heap;
u64 page = link_layout_page_size(l);
u64 base_vaddr = 0;
@@ -460,7 +480,8 @@ LinkSectionId link_synth_region(LinkImage* img, Linker* l, Sym name, u16 perms,
bytes = (u8*)h->alloc(h, (size_t)size, 16);
img->segment_bytes[seg_idx] = bytes;
img->segment_bytes_cap[seg_idx] = (size_t)size;
- if (!bytes) compiler_panic(img->c, SRCLOC_NONE, "link: oom on synth region bytes");
+ if (!bytes)
+ compiler_panic(img->c, SRCLOC_NONE, "link: oom on synth region bytes");
memset(bytes, 0, (size_t)size);
img->nsegments += 1u;
@@ -512,8 +533,8 @@ void link_emit_internal_abs64(LinkImage* img, LinkSectionId lsid, u32 offset,
* `slot_target` GOT slot. */
static void emit_stub_apply_relocs(LinkImage* img, LinkSectionId stub_lsid,
u32 stub_base_offset, u64 stub_vaddr,
- const LinkArchIPltReloc* relocs,
- u32 nrelocs, LinkSymId slot_target) {
+ const LinkArchIPltReloc* relocs, u32 nrelocs,
+ LinkSymId slot_target) {
u32 ri;
for (ri = 0; ri < nrelocs; ++ri) {
LinkRelocApply rrec;
@@ -642,9 +663,8 @@ void link_layout_jit_stubs(Linker* l, LinkImage* img, u32 map_size,
stub_id = link_append_symbol(img, &stub_rec);
stub_map[orig] = stub_id;
- emit_stub_apply_relocs(img, stubs_sec_id,
- (u32)(i * arch->iplt_stub_size), stub_vaddr,
- stub_relocs, nstub_relocs, slot_id);
+ emit_stub_apply_relocs(img, stubs_sec_id, (u32)(i * arch->iplt_stub_size),
+ stub_vaddr, stub_relocs, nstub_relocs, slot_id);
link_emit_internal_abs64(img, slots_sec_id, (u32)(i * 8u), slot_vaddr,
resolver_id);
@@ -656,12 +676,39 @@ void link_layout_jit_stubs(Linker* l, LinkImage* img, u32 map_size,
/* ---- pass 3c: GOT layout ---- */
+/* Fill a TLS Initial-Exec GOT slot with the target's TP-relative offset.
+ * Emitted as an internal raw-64-bit tpoff reloc so apply_all_relocs computes
+ * the offset in the same coordinate system it uses for ordinary local-exec
+ * sites: x86_64 variant II (X - tls_memsz) via R_X64_TPOFF64, AArch64/RISC-V
+ * variant I ((X - tls_vaddr) + TCB) via R_AARCH64_TPOFF64. The slot is then
+ * loaded by the GOTTPOFF / GOTTPREL site. */
+static void link_emit_internal_tpoff64(LinkImage* img, Linker* l,
+ LinkSectionId lsid, u32 offset,
+ u64 write_vaddr, LinkSymId target) {
+ LinkRelocApply rrec;
+ memset(&rrec, 0, sizeof(rrec));
+ rrec.input_id = LINK_INPUT_NONE;
+ rrec.section_id = OBJ_SEC_NONE;
+ rrec.link_section_id = lsid;
+ rrec.offset = offset;
+ rrec.width = 8;
+ rrec.write_vaddr = write_vaddr;
+ rrec.write_file_offset = write_vaddr;
+ rrec.kind = (l->c->target.arch == KIT_ARCH_X86_64) ? R_X64_TPOFF64
+ : R_AARCH64_TPOFF64;
+ rrec.target = target;
+ rrec.addend = 0;
+ *link_append_reloc_slot(img) = rrec;
+}
+
void link_layout_got(Linker* l, LinkImage* img, u32 map_size,
LinkSymId** got_map_out) {
Heap* h = img->heap;
LinkSymId* got_map;
LinkSymId* slot_targets = NULL;
+ u8* slot_is_tls = NULL;
u32 slot_cap = 0;
+ u32 tls_cap = 0;
u32 nslot = 0;
u32 ii, k;
u64 base_vaddr;
@@ -691,10 +738,18 @@ void link_layout_got(Linker* l, LinkImage* img, u32 map_size,
if (r->sym == OBJ_SYM_NONE || r->sym >= m->nsym) continue;
target = m->sym[r->sym];
if (target == LINK_SYM_NONE) continue;
- if (got_map[target] != LINK_SYM_NONE) continue;
+ if (got_map[target] != LINK_SYM_NONE) {
+ /* A later reloc on the same target may reveal it is a TLS slot
+ * even if the slot was created by a non-TLS reference first. */
+ if (reloc_is_tls_got(r->kind)) slot_is_tls[got_map[target] - 1u] = 1u;
+ continue;
+ }
if (VEC_GROW(h, slot_targets, slot_cap, nslot + 1u))
compiler_panic(img->c, SRCLOC_NONE, "link: oom on got slot list");
+ if (VEC_GROW(h, slot_is_tls, tls_cap, nslot + 1u))
+ compiler_panic(img->c, SRCLOC_NONE, "link: oom on got slot tls map");
slot_targets[nslot] = target;
+ slot_is_tls[nslot] = reloc_is_tls_got(r->kind) ? 1u : 0u;
got_map[target] = (LinkSymId)(nslot + 1u);
nslot++;
}
@@ -703,6 +758,7 @@ void link_layout_got(Linker* l, LinkImage* img, u32 map_size,
if (nslot == 0) {
if (slot_targets)
h->free(h, slot_targets, sizeof(*slot_targets) * slot_cap);
+ if (slot_is_tls) h->free(h, slot_is_tls, tls_cap);
h->free(h, got_map, sizeof(*got_map) * map_size);
return;
}
@@ -731,16 +787,30 @@ void link_layout_got(Linker* l, LinkImage* img, u32 map_size,
slot_id = link_append_symbol(img, &sym_rec);
got_map[orig] = slot_id;
- link_emit_internal_abs64(img, got_sec_id, (u32)(si * 8u), slot_vaddr, orig);
+ if (slot_is_tls[si])
+ link_emit_internal_tpoff64(img, l, got_sec_id, (u32)(si * 8u), slot_vaddr,
+ orig);
+ else
+ link_emit_internal_abs64(img, got_sec_id, (u32)(si * 8u), slot_vaddr,
+ orig);
}
if (slot_targets) h->free(h, slot_targets, sizeof(*slot_targets) * slot_cap);
+ if (slot_is_tls) h->free(h, slot_is_tls, tls_cap);
*got_map_out = got_map;
}
/* ---- pass 3d: STT_GNU_IFUNC trampoline ---- */
+/* The arch's R_*_IRELATIVE wire type, for the static __rela_iplt table. */
+static u32 link_elf_irelative_type(Compiler* c) {
+ const ObjFormatImpl* fmt = obj_format_lookup(KIT_OBJ_ELF);
+ const ObjElfArchOps* ao =
+ (fmt && fmt->elf_arch) ? fmt->elf_arch(c->target.arch) : NULL;
+ return ao ? ao->r_irelative : 0u;
+}
+
void link_layout_iplt(Linker* l, LinkImage* img) {
Heap* h = img->heap;
u32 i;
@@ -751,12 +821,27 @@ void link_layout_iplt(Linker* l, LinkImage* img) {
LinkSectionId iplt_sec_id, igot_sec_id, pairs_sec_id, init_sec_id = 0;
u8* iplt_bytes;
u32 slot_idx;
- int emit_init_array = l->emit_static_exe;
+ /* FreeBSD's crt resolves static IFUNCs by walking [__rela_iplt_start,
+ * __rela_iplt_end) of R_*_IRELATIVE relocs in __libc_start1, *before*
+ * _init_tls -- earlier than any (pre)init ctor can run, which matters
+ * because _init_tls allocates through malloc -> getenv -> IFUNC string
+ * ops. Emit that standard table (also the glibc mechanism) instead of the
+ * ctor-based __kit_ifunc_init path for hosted FreeBSD static links. Other
+ * targets (musl/freestanding) keep the ctor, whose crt does not walk
+ * __rela_iplt. */
+ int use_rela_iplt = l->emit_static_exe && l->c->target.os == KIT_OS_FREEBSD &&
+ l->c->target.obj == KIT_OBJ_ELF;
+ int emit_init_array = l->emit_static_exe && !use_rela_iplt;
+ LinkSectionId rela_iplt_sec_id = 0;
+ u64 rela_iplt_vaddr = 0, rela_iplt_size = 0;
+ u8* rela_iplt_bytes = NULL;
+ u32 irelative_type = use_rela_iplt ? link_elf_irelative_type(l->c) : 0u;
LinkSymId ifunc_init_sym = LINK_SYM_NONE;
Sym ifunc_init_name = 0;
const LinkArchDesc* arch = link_arch_desc_for(l->c);
if (!arch)
- compiler_panic(img->c, SRCLOC_NONE, "link: layout_iplt: no arch descriptor");
+ compiler_panic(img->c, SRCLOC_NONE,
+ "link: layout_iplt: no arch descriptor");
for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
const LinkSymbol* s = LinkSyms_at(&img->syms, i);
@@ -791,7 +876,8 @@ void link_layout_iplt(Linker* l, LinkImage* img) {
iplt_sec_id = link_synth_region(
img, l, pool_intern_slice(l->c->global, SLICE_LIT(".iplt")),
- SF_ALLOC | SF_EXEC, SSEM_PROGBITS, iplt_size, 4, &iplt_vaddr, &iplt_bytes);
+ SF_ALLOC | SF_EXEC, SSEM_PROGBITS, iplt_size, 4, &iplt_vaddr,
+ &iplt_bytes);
igot_sec_id = link_synth_region(
img, l, pool_intern_slice(l->c->global, SLICE_LIT(".igot.plt")),
SF_ALLOC | SF_WRITE, SSEM_PROGBITS, igot_size, 8, &igot_vaddr, NULL);
@@ -799,14 +885,26 @@ void link_layout_iplt(Linker* l, LinkImage* img) {
img, l, pool_intern_slice(l->c->global, SLICE_LIT(".iplt.pairs")),
SF_ALLOC | SF_WRITE, SSEM_PROGBITS, pairs_size, 8, &pairs_vaddr, NULL);
if (emit_init_array)
- init_sec_id =
- link_synth_region(img, l, obj_secname_preinit_array(l->c),
- SF_ALLOC | SF_WRITE, SSEM_PREINIT_ARRAY, init_size, 8,
- &init_vaddr, NULL);
+ init_sec_id = link_synth_region(img, l, obj_secname_preinit_array(l->c),
+ SF_ALLOC | SF_WRITE, SSEM_PREINIT_ARRAY,
+ init_size, 8, &init_vaddr, NULL);
link_emit_boundary_sym(l, img, "__start_iplt_pairs", pairs_vaddr);
link_emit_boundary_sym(l, img, "__stop_iplt_pairs", pairs_vaddr + pairs_size);
+ if (use_rela_iplt) {
+ /* One Elf64_Rela (24 bytes) per IFUNC: r_offset = igot slot, r_info =
+ * IRELATIVE, r_addend = resolver. crt walks the bracketed range. */
+ rela_iplt_size = (u64)nifunc * 24u;
+ rela_iplt_sec_id = link_synth_region(
+ img, l, pool_intern_slice(l->c->global, SLICE_LIT(".rela.plt")),
+ SF_ALLOC, SSEM_PROGBITS, rela_iplt_size, 8, &rela_iplt_vaddr,
+ &rela_iplt_bytes);
+ link_emit_boundary_sym(l, img, "__rela_iplt_start", rela_iplt_vaddr);
+ link_emit_boundary_sym(l, img, "__rela_iplt_end",
+ rela_iplt_vaddr + rela_iplt_size);
+ }
+
img->iplt_pairs = (u64*)h->alloc(
h, sizeof(*img->iplt_pairs) * 2u * (size_t)nifunc, _Alignof(u64));
if (!img->iplt_pairs)
@@ -881,6 +979,21 @@ void link_layout_iplt(Linker* l, LinkImage* img) {
link_emit_internal_abs64(img, pairs_sec_id, (u32)(slot_idx * 16u + 8u),
pair_vaddr + 8u, slot_id);
+ if (use_rela_iplt) {
+ /* Elf64_Rela: r_offset(+0)=slot, r_info(+8)=IRELATIVE, r_addend(+16)=
+ * resolver. r_info is a fixed constant (sym index 0); r_offset and
+ * r_addend are filled by internal ABS64 relocs so they pick up the
+ * final (post-shift) vaddrs. */
+ u64 rela_off = (u64)slot_idx * 24u;
+ u64 rela_ent_vaddr = rela_iplt_vaddr + rela_off;
+ if (rela_iplt_bytes)
+ wr_u64_le(rela_iplt_bytes + rela_off + 8u, (u64)irelative_type);
+ link_emit_internal_abs64(img, rela_iplt_sec_id, (u32)rela_off,
+ rela_ent_vaddr, slot_id);
+ link_emit_internal_abs64(img, rela_iplt_sec_id, (u32)(rela_off + 16u),
+ rela_ent_vaddr + 16u, resolver_id);
+ }
+
s->kind = SK_FUNC;
s->section_id = iplt_sec_id;
s->value = (u64)slot_idx * (u64)arch->iplt_stub_size;
@@ -964,7 +1077,8 @@ void link_emit_relocations(Linker* l, LinkImage* img, const LinkSymId* got_map,
r->kind == R_RV_ALIGN)
continue;
if (r->sym == OBJ_SYM_NONE || r->sym >= m->nsym)
- compiler_panic(l->c, SRCLOC_NONE, "link: reloc references unknown symbol");
+ compiler_panic(l->c, SRCLOC_NONE,
+ "link: reloc references unknown symbol");
target = m->sym[r->sym];
if (target == LINK_SYM_NONE)
compiler_panic(l->c, SRCLOC_NONE,
@@ -972,7 +1086,8 @@ void link_emit_relocations(Linker* l, LinkImage* img, const LinkSymId* got_map,
if (got_map && reloc_uses_got(r->kind)) {
LinkSymId slot = got_map[target];
if (slot == LINK_SYM_NONE)
- compiler_panic(l->c, SRCLOC_NONE, "link: GOT slot missing for symbol");
+ compiler_panic(l->c, SRCLOC_NONE,
+ "link: GOT slot missing for symbol");
target = slot;
}
if (stub_map && arch && arch->needs_jit_call_stub &&
diff --git a/src/obj/elf/link.c b/src/obj/elf/link.c
@@ -349,8 +349,22 @@ static void shift_image_addresses(LinkImage* img, u64 delta) {
static int reloc_is_tlsle(RelocKind k) {
return k == R_AARCH64_TLSLE_ADD_TPREL_HI12 ||
- k == R_AARCH64_TLSLE_ADD_TPREL_LO12_NC || k == R_RV_TPREL_HI20 ||
- k == R_RV_TPREL_LO12_I || k == R_RV_TPREL_LO12_S;
+ k == R_AARCH64_TLSLE_ADD_TPREL_LO12_NC || k == R_AARCH64_TPOFF64 ||
+ k == R_RV_TPREL_HI20 || k == R_RV_TPREL_LO12_I ||
+ k == R_RV_TPREL_LO12_S;
+}
+
+/* Variant-I TP bias: distance from the TLS image start to where `tp` points.
+ * - AArch64 (AAPCS64): tp points at a 16-byte TCB ahead of the image -> +16
+ * for both hosted and freestanding.
+ * - RISC-V: the psABI points tp at the *start* of the image, so hosted libcs
+ * (FreeBSD/Linux _init_tls) want +0; kit's own freestanding start.c places
+ * a 16-byte TCB ahead of .tdata and biases tp to match AArch64, so
+ * freestanding rv64/rv32 keep +16. */
+static u64 tls_tcb_bias(Compiler* c) {
+ if (c->target.arch == KIT_ARCH_RV64 || c->target.arch == KIT_ARCH_RV32)
+ return c->target.os == KIT_OS_FREESTANDING ? TLS_TCB_SIZE : 0ull;
+ return TLS_TCB_SIZE;
}
/* x86_64 SysV ABI: TLS variant II — the per-thread TLS image sits at
@@ -426,7 +440,9 @@ static i64 rv_pcrel_lo12_disp(LinkImage* img, u64 auipc_vaddr, u64 img_base) {
const LinkRelocApply* hi = LinkRelocs_at(&img->relocs, i);
const LinkSymbol* hi_tgt;
u64 hi_S, hi_P;
- if (hi->kind != R_RV_PCREL_HI20 && hi->kind != R_RV_GOT_HI20) continue;
+ if (hi->kind != R_RV_PCREL_HI20 && hi->kind != R_RV_GOT_HI20 &&
+ hi->kind != R_RV_TLS_GOT_HI20)
+ continue;
if (hi->write_vaddr + img_base != auipc_vaddr) continue;
hi_tgt = LinkSyms_at(&img->syms, hi->target - 1);
hi_S = (hi_tgt->kind == SK_ABS) ? hi_tgt->vaddr : hi_tgt->vaddr + img_base;
@@ -468,17 +484,23 @@ static void apply_all_relocs(LinkImage* img, u64 img_base) {
}
seg = &img->segments[sec->segment_id - 1];
if (reloc_is_tlsle(r->kind)) {
- /* S is the target's TP-relative offset: distance from the
- * TLS image start plus the 16-byte TCB. Both vaddrs are
- * in the same (post-shift, image-relative) coordinate
- * system, so img_base cancels out. */
- S = (tgt->vaddr - img->tls_vaddr) + TLS_TCB_SIZE;
+ /* S is the target's TP-relative offset: distance from the TLS image
+ * start plus the arch/OS TCB bias (see tls_tcb_bias). Both vaddrs are
+ * in the same (post-shift, image-relative) coordinate system, so
+ * img_base cancels out. */
+ S = (tgt->vaddr - img->tls_vaddr) + tls_tcb_bias(img->c);
} else if (reloc_is_x64_tlsle(r->kind)) {
- /* x86_64 variant II: TP points just past the TLS image, so a
- * symbol at offset X within the image is at TP-relative offset
- * (X - tls_memsz). Cast through i64/u64 so the reloc apply
- * writes the full 32- or 64-bit signed value. */
- i64 off = (i64)(tgt->vaddr - img->tls_vaddr) - (i64)img->tls_memsz;
+ /* x86_64 variant II: TP points just past the TLS image, so a symbol at
+ * offset X within the image is at TP-relative offset (X - tls_size).
+ * The runtime (FreeBSD/glibc _init_tls) allocates the block rounded up
+ * to the TLS alignment, so tls_size must be round_up(memsz, align) --
+ * using the raw memsz is off by the rounding remainder whenever memsz
+ * is not a multiple of align, handing back a garbage TLS address (e.g.
+ * jemalloc's tsd, faulting non-canonical). Cast through i64/u64 so the
+ * apply writes the full 32- or 64-bit signed value. */
+ u64 a = img->tls_align ? img->tls_align : 1u;
+ u64 tls_size = (img->tls_memsz + a - 1u) & ~(a - 1u);
+ i64 off = (i64)(tgt->vaddr - img->tls_vaddr) - (i64)tls_size;
S = (u64)off;
} else if (r->kind == R_RV_PCREL_LO12_I || r->kind == R_RV_PCREL_LO12_S) {
/* PCREL_LO12: rewrite S so that link_reloc_apply's existing
@@ -1432,6 +1454,13 @@ void link_emit_elf(LinkImage* img, Writer* w) {
ehdr.e_ident[5] = ELFDATA2LSB;
ehdr.e_ident[6] = EV_CURRENT;
ehdr.e_ident[7] = ELFOSABI_NONE;
+ /* Brand FreeBSD executables with EI_OSABI=ELFOSABI_FREEBSD; the kernel
+ * matches that brand directly. Without it a static binary is rejected with
+ * ENOEXEC -- the FreeBSD ABI note crt1.o carries is not sufficient on its
+ * own for kit's images (the kernel's note scan does not recognize the
+ * layout), so we set the OSABI on every arch (FreeBSD/clang only sets it on
+ * amd64/aarch64, but the riscv64 kernel accepts it too). */
+ if (img->c->target.os == KIT_OS_FREEBSD) ehdr.e_ident[7] = ELFOSABI_FREEBSD;
ehdr.e_type = pie ? ET_DYN : ET_EXEC;
ehdr.e_machine = (u16)e_machine;
ehdr.e_version = EV_CURRENT;