commit ea01f03f2e47071ce36aae30a6de7f31bc5d9c8d
parent d8d8209e1bd898acb3455608478a57b9d2ecfc57
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 8 Jun 2026 22:54:07 -0700
jit/tls: ELF Local-Exec relaxation via arch-abstracted hook
Extend the uniform in-image JIT TLS model to ELF (Linux + FreeBSD), the
single-threaded JIT resolving each thread-local to its in-image .tdata/.tbss
instead of via the host thread pointer (mrs tpidr_el0 / add tp / mov fs:0),
which aliased into the host process's own TLS — wrong init and unsafe. This
was the documented bootstrap blocker (141_threadlocal_mutate returned 3
instead of 43 on the R lane).
Arch-abstracted through the existing relocation seam, no arch switch in
link_jit.c:
- RELOC_IS_TLS_LE flag on RelocDesc, set on each arch's Local-Exec reloc
rows (aa64 TLSLE_ADD_TPREL_*, rv TPREL_HI20/LO12_*, x64 TPOFF32).
- LinkArchDesc.jit_tls_le_relax hook; per-arch idiom rewrite lives in each
arch's reloc.c beside its reloc_apply_insn encoders:
aa64: mrs/add/add -> adrp; add :lo12:; nop
rv64: lui/add tp/addi -> auipc %pcrel_hi; nop; addi %pcrel_lo
x64: mov fs:0/lea -> nop...; lea rd,[rip+&var]
- link_jit.c (initial + append reloc loops) classifies via
reloc_kind_is_tls_le and delegates; the old reloc_is_tlsle /
reloc_is_x64_tlsle / JIT_TLS_TCB_SIZE host-TP code is removed.
Verified on the arm64 host + containers: aa64-linux (native), x64-linux and
rv64-linux (podman) all return 141=43 / 142=134 on the R lane at -O0/1/2 and
the interp lane. macOS (Mach-O) unchanged: test-toy/test-link/test-opt green.
COFF/Windows TLS relaxation is a separate follow-up (different TEB idiom).
Diffstat:
11 files changed, 209 insertions(+), 59 deletions(-)
diff --git a/doc/JIT.md b/doc/JIT.md
@@ -87,13 +87,11 @@ alias, while the symbol value `S` and the patch-site address `P` are the
fetch from. A handful of relocation kinds get special in-process
handling before reaching `link_reloc_apply`:
-- TLS-LE / TPREL (AArch64, RISC-V, x86-64): both the target and the TLS
- base are image-relative, so the runtime alias cancels out and the
- offset is computed in image space. AArch64/RISC-V use a 16-byte TCB
- bias (`JIT_TLS_TCB_SIZE`) matching `start.c` and the ELF writer;
- x86-64 SysV variant II addresses a TLS symbol as `offset - tls_memsz`.
- (See "Thread-local storage" below — the JIT is single-threaded, so the
- in-image `.tdata`/`.tbss` is the variable's one instance.)
+- TLS-LE (ELF, AArch64/RISC-V/x86-64): a reloc whose `RelocDesc` carries
+ `RELOC_IS_TLS_LE`. The loop stays arch-neutral: it computes the variable's
+ in-image storage address and delegates the idiom rewrite to the arch's
+ `LinkArchDesc.jit_tls_le_relax`, which drops the thread-pointer read. See
+ "Thread-local storage" below.
- RISC-V `PCREL_LO12_I/S`: the low-12 half of an `AUIPC` pair targets a
local anchor at the paired `HI20` site. The mapper finds that paired
reloc, recomputes the displacement against runtime addresses, and feeds
@@ -202,6 +200,13 @@ host process's own TLS, which is both wrong (no initializer) and unsafe
relaxed at map time to in-image addressing; no thunk, no per-thread
block, no host TLS vtable.
+The per-arch idiom rewrite lives behind an arch hook, not in the mapper:
+the reloc loop classifies a TLS access via the `RelocDesc` flags
+(`RELOC_IS_TLVP` for Mach-O, `RELOC_IS_TLS_LE` for ELF Local-Exec) and
+delegates to the arch's `LinkArchDesc.jit_tls_le_relax` (ELF) or applies
+the Mach-O TLVP relaxation inline. The mapper itself carries no arch
+`switch`.
+
- **Mach-O (AArch64):** codegen emits Apple's TLV sequence (load the
24-byte descriptor, load `descriptor[+0]` as a thunk, `blr` it). dyld
would rewrite that slot and allocate a pthread key; a JIT image is
@@ -211,13 +216,19 @@ block, no host TLS vtable.
directly: the `__thread_ptrs` load becomes `add` (descriptor address),
the thunk-load becomes `ldr xN,[xN,#16]`, and the `blr` becomes a nop.
-- **ELF (AArch64/RISC-V/x86-64):** the in-image relaxation lands first on
- Mach-O/AArch64; the ELF Local-Exec path is being unified onto the same
- model (relax `mrs/add/add`, `lui/add tp/addi`, `mov fs:0/lea` to
- PC-relative in-image addressing, dropping the thread-pointer read). Until
- that lands, ELF still computes a TP-relative offset against the host
- thread pointer (the TLS-LE bullet above), which is correct only when the
- host runtime seeds the thread pointer to the image's TLS block.
+- **ELF (AArch64/RISC-V/x86-64):** `jit_tls_le_relax` rewrites the
+ per-arch Local-Exec idiom in place to address the in-image storage:
+ AArch64 `mrs tpidr_el0; add #hi12; add #lo12` → `adrp; add :lo12:; nop`;
+ RISC-V `lui %tprel_hi; add tp; addi %tprel_lo` → `auipc %pcrel_hi; nop;
+ addi %pcrel_lo`; x86-64 `mov rd,fs:[0]; lea rd,[rd+tpoff]` → `nop…; lea
+ rd,[rip+&var]`. Codegen emits the idiom per-access and contiguous, so the
+ primary (HI/offset) reloc drives the whole rewrite and the LO12 half is a
+ no-op. The hook is in each arch's `reloc.c` next to its
+ `reloc_apply_insn` byte encoders.
+
+ COFF/Windows TLS (the TEB → `_tls_index` → TLS-array idiom) is not yet
+ relaxed for the JIT — a follow-up (it also has to neutralize the
+ `_tls_index` extern the sequence references).
`kit_jit_tls_addr` gives host/interpreter code the same resolution from
the address a thread-local's *symbol* resolves to (Mach-O: read
diff --git a/src/arch/aa64/link.c b/src/arch/aa64/link.c
@@ -177,6 +177,7 @@ void aa64_emit_macho_stub(u8* out, u64 stub_vaddr, u64 got_slot_vaddr) {
* through the .reloc_desc / .reloc_apply_insn hooks. */
const RelocDesc* aa64_reloc_desc(RelocKind);
int aa64_reloc_apply_insn(Compiler*, RelocKind, u8*, u64, i64, u64);
+void aa64_jit_tls_le_relax(Compiler*, RelocKind, u8*, u64, u64);
/* AArch64 __chkstk for PE/COFF: probes `x15 * 16` bytes of stack one page at a
* time, then returns. Mirrors the LLVM compiler-rt implementation (chkstk.S in
@@ -204,6 +205,7 @@ const LinkArchDesc link_arch_aa64 = {
.reloc_desc = aa64_reloc_desc,
.reloc_apply_insn = aa64_reloc_apply_insn,
+ .jit_tls_le_relax = aa64_jit_tls_le_relax,
.coff_chkstk_bytes = aa64_coff_chkstk,
.coff_chkstk_len = sizeof aa64_coff_chkstk,
diff --git a/src/arch/aa64/reloc.c b/src/arch/aa64/reloc.c
@@ -35,8 +35,8 @@ static const RelocDescRow aa64_rows[] = {
{R_AARCH64_LD64_GOT_LO12_NC, {4, RELOC_USES_GOT}},
{R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21, {4, RELOC_IS_TLS_GOT}},
{R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, {4, RELOC_IS_TLS_GOT}},
- {R_AARCH64_TLSLE_ADD_TPREL_HI12, {4, 0}},
- {R_AARCH64_TLSLE_ADD_TPREL_LO12_NC, {4, 0}},
+ {R_AARCH64_TLSLE_ADD_TPREL_HI12, {4, RELOC_IS_TLS_LE}},
+ {R_AARCH64_TLSLE_ADD_TPREL_LO12_NC, {4, RELOC_IS_TLS_LE}},
{R_AARCH64_TLVP_LOAD_PAGE21, {4, RELOC_IS_TLVP}},
{R_AARCH64_TLVP_LOAD_PAGEOFF12, {4, RELOC_IS_TLVP}},
/* COFF AArch64 TLS SECREL imm12 pair: ADD-imm12 instruction relocs,
@@ -221,3 +221,34 @@ int aa64_reloc_apply_insn(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A,
return 0;
}
}
+
+/* In-process JIT TLS Local-Exec relaxation (LinkArchDesc.jit_tls_le_relax).
+ * Codegen emits, per access:
+ * mrs rd, tpidr_el0 (no reloc)
+ * add rd, rd, #hi12 R_AARCH64_TLSLE_ADD_TPREL_HI12 <- `site`
+ * add rd, rd, #lo12 R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
+ * Single-threaded JIT: address the in-image storage directly, dropping the
+ * thread-pointer read:
+ * adrp rd, &var ; add rd, rd, :lo12:&var ; nop
+ * The HI12 reloc drives the whole rewrite; the LO12 half is then a no-op. */
+void aa64_jit_tls_le_relax(Compiler* c, RelocKind k, u8* site, u64 storage,
+ u64 site_pc) {
+ u8* mrs;
+ u8* add_lo;
+ u32 rd;
+ if (k == R_AARCH64_TLSLE_ADD_TPREL_LO12_NC) return; /* handled with HI12 */
+ if (k != R_AARCH64_TLSLE_ADD_TPREL_HI12)
+ compiler_panic(c, SRCLOC_NONE, "aa64 jit tls: unexpected reloc kind %u",
+ (unsigned)k);
+ mrs = site - 4; /* mrs rd, tpidr_el0 */
+ add_lo = site + 4; /* add rd, rd, #lo12 -> nop */
+ rd = rd_u32_le(site) & 0x1fu;
+ if (rd_u32_le(mrs) != (0xd53bd040u | rd))
+ compiler_panic(c, SRCLOC_NONE, "aa64 jit tls: unexpected access sequence");
+ wr_u32_le(mrs, 0x90000000u | rd); /* adrp rd, #0 */
+ aa64_reloc_apply_insn(c, R_AARCH64_ADR_PREL_PG_HI21, mrs, storage, 0,
+ site_pc - 4u);
+ wr_u32_le(site, 0x91000000u | (rd << 5) | rd); /* add rd, rd, #0 */
+ aa64_reloc_apply_insn(c, R_AARCH64_ADD_ABS_LO12_NC, site, storage, 0, site_pc);
+ wr_u32_le(add_lo, 0xd503201fu); /* nop */
+}
diff --git a/src/arch/riscv/link.c b/src/arch/riscv/link.c
@@ -114,6 +114,7 @@ static u32 rv32_emit_iplt_stub(u8* dst, u64 stub_vaddr, u64 slot_vaddr,
* U/I/S/B/J + RVC instruction-immediate byte encoders. */
const RelocDesc* rv_reloc_desc(RelocKind);
int rv_reloc_apply_insn(Compiler*, RelocKind, u8*, u64, i64, u64);
+void rv_jit_tls_le_relax(Compiler*, RelocKind, u8*, u64, u64);
const LinkArchDesc link_arch_rv64 = {
.plt0_size = RV64_PLT0_SIZE,
@@ -126,6 +127,7 @@ const LinkArchDesc link_arch_rv64 = {
.emit_iplt_stub = rv64_emit_iplt_stub,
.reloc_desc = rv_reloc_desc,
.reloc_apply_insn = rv_reloc_apply_insn,
+ .jit_tls_le_relax = rv_jit_tls_le_relax,
};
/* RV32 link descriptor: identical to rv64 (PLT0/entry/stub byte sizes,
diff --git a/src/arch/riscv/reloc.c b/src/arch/riscv/reloc.c
@@ -32,9 +32,9 @@ static const RelocDescRow rv_rows[] = {
{R_RV_PCREL_LO12_S, {4, 0}},
{R_RV_GOT_HI20, {4, RELOC_USES_GOT}},
{R_RV_TLS_GOT_HI20, {4, RELOC_IS_TLS_GOT}},
- {R_RV_TPREL_HI20, {4, 0}},
- {R_RV_TPREL_LO12_I, {4, 0}},
- {R_RV_TPREL_LO12_S, {4, 0}},
+ {R_RV_TPREL_HI20, {4, RELOC_IS_TLS_LE}},
+ {R_RV_TPREL_LO12_I, {4, RELOC_IS_TLS_LE}},
+ {R_RV_TPREL_LO12_S, {4, RELOC_IS_TLS_LE}},
{R_RV_CALL, {8, RELOC_IS_BRANCH}},
{R_PLT32, {4, RELOC_IS_BRANCH}},
{R_RV_RVC_BRANCH, {2, 0}},
@@ -262,3 +262,43 @@ int rv_reloc_apply_insn(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A,
return 0;
}
}
+
+/* In-process JIT TLS Local-Exec relaxation (LinkArchDesc.jit_tls_le_relax).
+ * Codegen emits, per access:
+ * lui t, %tprel_hi(var) R_RV_TPREL_HI20 <- `site`
+ * add t, tp, t (no reloc)
+ * addi rd, t, %tprel_lo(var) R_RV_TPREL_LO12_I
+ * Single-threaded JIT: address the in-image storage PC-relative, dropping the
+ * tp add:
+ * auipc t, %pcrel_hi(&var) ; nop ; addi rd, t, %pcrel_lo(&var)
+ * The HI20 reloc drives the whole rewrite; the LO12 half is then a no-op. The
+ * riscv `tp` ABI register is x4. */
+void rv_jit_tls_le_relax(Compiler* c, RelocKind k, u8* site, u64 storage,
+ u64 site_pc) {
+ u8* add;
+ u8* addi;
+ u32 rd_tmp, add_w, addi_w, hi20;
+ i64 disp;
+ i32 lo12;
+ if (k == R_RV_TPREL_LO12_I || k == R_RV_TPREL_LO12_S)
+ return; /* handled with HI20 */
+ if (k != R_RV_TPREL_HI20)
+ compiler_panic(c, SRCLOC_NONE, "riscv jit tls: unexpected reloc kind %u",
+ (unsigned)k);
+ add = site + 4; /* add t, tp, t -> nop */
+ addi = site + 8; /* addi rd, t, %lo -> addi rd, t, %pcrel_lo */
+ if ((rd_u32_le(site) & 0x7fu) != 0x37u) /* lui */
+ compiler_panic(c, SRCLOC_NONE, "riscv jit tls: unexpected access sequence");
+ add_w = rd_u32_le(add);
+ if ((add_w & 0x7fu) != 0x33u || ((add_w >> 15) & 0x1fu) != 4u) /* add ?,tp,? */
+ compiler_panic(c, SRCLOC_NONE, "riscv jit tls: unexpected access sequence");
+ rd_tmp = (rd_u32_le(site) >> 7) & 0x1fu;
+ disp = (i64)storage - (i64)site_pc;
+ lo12 = (i32)((u32)disp & 0xfffu);
+ if (lo12 & 0x800) lo12 -= 0x1000; /* sign-extend 12-bit */
+ hi20 = (u32)(((disp - (i64)lo12) >> 12) & 0xfffffu);
+ wr_u32_le(site, 0x00000017u | (rd_tmp << 7) | (hi20 << 12)); /* auipc */
+ wr_u32_le(add, 0x00000013u); /* nop */
+ addi_w = rd_u32_le(addi);
+ wr_u32_le(addi, (addi_w & 0x000fffffu) | (((u32)lo12 & 0xfffu) << 20));
+}
diff --git a/src/arch/x64/link.c b/src/arch/x64/link.c
@@ -61,6 +61,7 @@ static u32 x64_emit_iplt_stub(u8* dst, u64 stub_vaddr, u64 slot_vaddr,
* through the .reloc_desc / .reloc_apply_insn hooks. */
const RelocDesc* x64_reloc_desc(RelocKind);
int x64_reloc_apply_insn(Compiler*, RelocKind, u8*, u64, i64, u64);
+void x64_jit_tls_le_relax(Compiler*, RelocKind, u8*, u64, u64);
/* PE/COFF IAT stub for x86_64 (6 B):
*
@@ -88,6 +89,7 @@ const LinkArchDesc link_arch_x64 = {
.reloc_desc = x64_reloc_desc,
.reloc_apply_insn = x64_reloc_apply_insn,
+ .jit_tls_le_relax = x64_jit_tls_le_relax,
.tls_variant_ii = 1,
};
diff --git a/src/arch/x64/reloc.c b/src/arch/x64/reloc.c
@@ -27,7 +27,7 @@ static const RelocDescRow x64_rows[] = {
{R_X64_REX_GOTPCRELX, {4, RELOC_USES_GOT}},
{R_X64_GOTPC32, {4, 0}},
{R_X64_GOTTPOFF, {4, RELOC_IS_TLS_GOT}},
- {R_X64_TPOFF32, {4, 0}},
+ {R_X64_TPOFF32, {4, RELOC_IS_TLS_LE}},
{R_X64_GLOB_DAT, {8, 0}},
{R_X64_JUMP_SLOT, {8, 0}},
{R_X64_RELATIVE, {8, 0}},
@@ -57,3 +57,41 @@ int x64_reloc_apply_insn(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A,
return 0;
}
}
+
+/* In-process JIT TLS Local-Exec relaxation (LinkArchDesc.jit_tls_le_relax).
+ * Codegen emits, per access:
+ * 64 REX.W 8B modrm sib disp32 mov rd, fs:[0] (fixed 9 bytes)
+ * REX.W 8D modrm [sib] disp32 lea rd,[rd+tpoff] (7 or 8 bytes; `site`
+ * points at this disp32, R_X64_TPOFF32)
+ * Single-threaded JIT: nop the whole block and emit `lea rd,[rip+&var]` so rd
+ * holds the in-image storage address, dropping the fs read. */
+void x64_jit_tls_le_relax(Compiler* c, RelocKind k, u8* site, u64 storage,
+ u64 site_pc) {
+ u8* block_end = site + 4; /* disp32 ends the lea */
+ u8* mov = NULL;
+ u8* lea;
+ u8* p;
+ u32 rd;
+ i64 disp;
+ if (k != R_X64_TPOFF32)
+ compiler_panic(c, SRCLOC_NONE, "x64 jit tls: unexpected reloc kind %u",
+ (unsigned)k);
+ /* The 9-byte fs-mov ends where the lea (7 or 8 bytes) begins. */
+ p = block_end - 7 - 9;
+ if (p[0] == 0x64u && p[2] == 0x8Bu)
+ mov = p;
+ else {
+ p = block_end - 8 - 9;
+ if (p[0] == 0x64u && p[2] == 0x8Bu) mov = p;
+ }
+ if (!mov)
+ compiler_panic(c, SRCLOC_NONE, "x64 jit tls: unexpected access sequence");
+ rd = ((u32)(mov[3] >> 3) & 7u) | ((mov[1] & 0x04u) ? 8u : 0u);
+ for (p = mov; p < block_end; ++p) *p = 0x90u; /* nop the block */
+ lea = block_end - 7;
+ disp = (i64)storage - (i64)(site_pc + 4u); /* rip = end of lea */
+ lea[0] = (u8)(0x48u | ((rd >= 8u) ? 0x04u : 0x00u)); /* REX.W (+REX.R) */
+ lea[1] = 0x8Du; /* lea */
+ lea[2] = (u8)(((rd & 7u) << 3) | 5u); /* mod=00 reg=rd rm=101 (rip) */
+ wr_u32_le(lea + 3, (u32)disp);
+}
diff --git a/src/link/link_arch.h b/src/link/link_arch.h
@@ -98,6 +98,20 @@ typedef struct LinkArchDesc {
int (*reloc_apply_insn)(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A,
u64 P);
+ /* In-process JIT only: relax an ELF Local-Exec TLS access (a reloc whose
+ * RelocDesc carries RELOC_IS_TLS_LE) to ordinary in-image addressing. The
+ * JIT is single-threaded, so the image's in-image .tdata/.tbss is the
+ * variable's one instance; the per-arch tp-based idiom (mrs tpidr_el0 / add
+ * tp / mov fs:0) is rewritten in place to address that storage directly,
+ * dropping the thread-pointer read (which would alias into the host's TLS).
+ * `site` is the write-alias bytes at the reloc; `storage` the runtime
+ * address of the variable's in-image storage; `site_pc` the runtime address
+ * of the reloc site. Codegen emits the idiom per-access and contiguous, so
+ * the primary (HI/offset) reloc rewrites the whole idiom and the LO12 half is
+ * a no-op. NULL on arches with no ELF Local-Exec TLS. */
+ void (*jit_tls_le_relax)(Compiler* c, RelocKind k, u8* site, u64 storage,
+ u64 site_pc);
+
/* TLS variant: 1 = variant II (x86-64, tpoff = X - tls_memsz_rounded);
* 0 = variant I (AArch64/RISC-V, tpoff = (X - tls_vaddr) + tcb_bias).
* Consulted by the ELF linker when applying R_TPOFF64. */
diff --git a/src/link/link_jit.c b/src/link/link_jit.c
@@ -21,6 +21,7 @@
#include "core/slice.h"
#include "core/util.h"
#include "link/link.h"
+#include "link/link_arch.h"
#include "link/link_internal.h"
#include "link/link_reloc_desc.h"
#include "obj/obj.h"
@@ -102,35 +103,12 @@ struct KitJit {
#define JIT_APPEND_RW_SLACK (16ull * 1024ull * 1024ull)
#define JIT_APPEND_TLS_SLACK (4ull * 1024ull * 1024ull)
-/* AArch64 ELF ABI: TP points 16 bytes before the TLS image; TLSLE
- * encodes (target_offset_in_image + 16).
- *
- * RISC-V psABI normally points TP at the start of the TLS image, but
- * kit's freestanding start.c (and the JIT harness) places a 16-byte
- * TCB ahead of .tdata and biases TP accordingly so a single TPREL
- * convention works for both arches. Mirrors src/obj/elf/link.c's
- * TLS_TCB_SIZE comment. */
-#define JIT_TLS_TCB_SIZE 16ull
-
-static int reloc_is_tlsle(RelocKind k) {
- return k == R_AARCH64_TLSLE_ADD_TPREL_HI12 ||
- k == R_AARCH64_TLSLE_ADD_TPREL_LO12_NC || k == R_RV_TPREL_HI20 ||
- k == R_RV_TPREL_LO12_I || k == R_RV_TPREL_LO12_S;
-}
-
/* RISC-V PCREL_LO12_I/S target a local "anchor" symbol whose vaddr is
* the address of the paired AUIPC's PCREL_HI20 (or GOT_HI20) site.
* Defined below vaddr_to_runtime. */
static i64 jit_rv_pcrel_lo12_disp(LinkImage* img, KitExecMemRegion* segs,
u64 auipc_image_vaddr);
-/* x86_64 SysV TLS variant II: %fs points at the TCB immediately after
- * the static TLS image, so a symbol at image offset X is addressed as
- * X - tls_memsz. */
-static int reloc_is_x64_tlsle(RelocKind k) {
- return k == R_X64_TPOFF32 || k == R_TPOFF64;
-}
-
static int perms_for(u32 secflags) {
int p = KIT_PROT_READ;
if (secflags & SF_EXEC) p |= KIT_PROT_EXEC;
@@ -381,12 +359,24 @@ KitJit* kit_jit_from_image(LinkImage* img) {
const LinkSymbol* tgt = LinkSyms_at(&img->syms, r->target - 1);
u64 S, P;
u8* P_bytes;
- if (reloc_is_tlsle(r->kind)) {
- /* TLSLE/TPREL: S is the TP-relative offset of the target. Both
- * vaddrs are image-relative, so the runtime alias drops
- * out and we work in image-space. */
- S = (tgt->vaddr - img->tls_vaddr) + JIT_TLS_TCB_SIZE;
- } else if (r->kind == R_RV_PCREL_LO12_I || r->kind == R_RV_PCREL_LO12_S) {
+ /* ELF Local-Exec TLS -> in-image addressing (single-threaded JIT). The
+ * per-arch idiom rewrite lives behind LinkArchDesc.jit_tls_le_relax; this
+ * loop stays arch-neutral, classifying via the RELOC_IS_TLS_LE flag. */
+ if (reloc_kind_is_tls_le(c, r->kind)) {
+ const LinkArchDesc* d = link_arch_desc_for(c);
+ u64 storage = (tgt->kind == SK_ABS)
+ ? tgt->vaddr + (u64)r->addend
+ : (u64)vaddr_to_runtime(img, segs, tgt->vaddr) +
+ (u64)r->addend;
+ u8* bytes = (u8*)vaddr_to_write(img, segs, r->write_vaddr);
+ u64 site_pc = (u64)vaddr_to_runtime(img, segs, r->write_vaddr);
+ if (!d || !d->jit_tls_le_relax || !bytes)
+ compiler_panic(c, SRCLOC_NONE,
+ "kit_jit: target has no TLS Local-Exec relaxation");
+ d->jit_tls_le_relax(c, r->kind, bytes, storage, site_pc);
+ continue;
+ }
+ if (r->kind == R_RV_PCREL_LO12_I || r->kind == R_RV_PCREL_LO12_S) {
/* RISC-V PCREL_LO12: target.vaddr is the paired AUIPC site
* (a local anchor symbol). Recompute the AUIPC's runtime
* displacement and feed it as S to the LO12_I/S apply path so
@@ -399,9 +389,6 @@ KitJit* kit_jit_from_image(LinkImage* img) {
link_reloc_apply(c, alias, P_bytes, (u64)disp, 0,
(u64)vaddr_to_runtime(img, segs, r->write_vaddr));
continue;
- } else if (reloc_is_x64_tlsle(r->kind)) {
- i64 off = (i64)(tgt->vaddr - img->tls_vaddr) - (i64)img->tls_memsz;
- S = (u64)off;
} else if (tgt->kind == SK_ABS) {
/* extern resolver result OR true absolute symbol — vaddr
* already holds the runtime address. */
@@ -715,9 +702,24 @@ static void jit_apply_one_reloc(KitJit* jit, const LinkRelocApply* r) {
u64 S;
u64 P;
u8* P_bytes;
- if (reloc_is_tlsle(r->kind)) {
- S = (tgt->vaddr - img->tls_vaddr) + JIT_TLS_TCB_SIZE;
- } else if (r->kind == R_RV_PCREL_LO12_I || r->kind == R_RV_PCREL_LO12_S) {
+ /* ELF Local-Exec TLS -> in-image addressing (single-threaded JIT); arch
+ * idiom rewrite behind LinkArchDesc.jit_tls_le_relax (see kit_jit_from_image
+ * for the mirror of this in the initial reloc pass). */
+ if (reloc_kind_is_tls_le(jit->c, r->kind)) {
+ const LinkArchDesc* d = link_arch_desc_for(jit->c);
+ u64 storage = (tgt->kind == SK_ABS)
+ ? tgt->vaddr + (u64)r->addend
+ : (u64)vaddr_to_runtime(img, jit->segs, tgt->vaddr) +
+ (u64)r->addend;
+ u8* bytes = (u8*)vaddr_to_write(img, jit->segs, r->write_vaddr);
+ u64 site_pc = (u64)vaddr_to_runtime(img, jit->segs, r->write_vaddr);
+ if (!d || !d->jit_tls_le_relax || !bytes)
+ compiler_panic(jit->c, SRCLOC_NONE,
+ "kit_jit_append_obj: no TLS Local-Exec relaxation");
+ d->jit_tls_le_relax(jit->c, r->kind, bytes, storage, site_pc);
+ return;
+ }
+ if (r->kind == R_RV_PCREL_LO12_I || r->kind == R_RV_PCREL_LO12_S) {
i64 disp = jit_rv_pcrel_lo12_disp(img, jit->segs, tgt->vaddr);
RelocKind alias =
(r->kind == R_RV_PCREL_LO12_I) ? R_RV_LO12_I : R_RV_LO12_S;
@@ -728,10 +730,8 @@ static void jit_apply_one_reloc(KitJit* jit, const LinkRelocApply* r) {
link_reloc_apply(jit->c, alias, P_bytes, (u64)disp, 0,
(u64)vaddr_to_runtime(img, jit->segs, r->write_vaddr));
return;
- } else if (reloc_is_x64_tlsle(r->kind)) {
- i64 off = (i64)(tgt->vaddr - img->tls_vaddr) - (i64)img->tls_memsz;
- S = (u64)off;
- } else if (tgt->kind == SK_ABS) {
+ }
+ if (tgt->kind == SK_ABS) {
S = tgt->vaddr;
} else {
S = (u64)vaddr_to_runtime(img, jit->segs, tgt->vaddr);
diff --git a/src/link/link_reloc_desc.h b/src/link/link_reloc_desc.h
@@ -40,6 +40,13 @@ static inline int reloc_kind_is_tls_got(const Compiler* c, RelocKind k) {
return d && (d->flags & RELOC_IS_TLS_GOT) ? 1 : 0;
}
+/* ELF Local-Exec TLS access (the per-arch tp-relative idiom). The in-process
+ * JIT relaxes these to in-image addressing via LinkArchDesc.jit_tls_le_relax. */
+static inline int reloc_kind_is_tls_le(const Compiler* c, RelocKind k) {
+ const RelocDesc* d = reloc_desc(c, k);
+ return d && (d->flags & RELOC_IS_TLS_LE) ? 1 : 0;
+}
+
/* A direct GOT-load instruction reloc (the Mach-O linker's notion): non-TLS
* GOT load. Distinct from reloc_kind_uses_got, which also counts TLS-IE. */
static inline int reloc_kind_is_got_load(const Compiler* c, RelocKind k) {
diff --git a/src/obj/reloc.h b/src/obj/reloc.h
@@ -23,6 +23,9 @@ typedef enum RelocDescFlag {
RELOC_DIRECT_PAGE = 1u << 4, /* Mach-O ADRP-direct (non-GOT) page / pageoff */
RELOC_MARKER = 1u << 5, /* no bytes patched (RELAX / TPREL_ADD) */
RELOC_WIDTH_DYN = 1u << 6, /* width read from the bytes at apply (ULEB128) */
+ RELOC_IS_TLS_LE = 1u << 7, /* ELF Local-Exec TLS access (tp-relative idiom);
+ * the in-process JIT relaxes it to in-image
+ * addressing via LinkArchDesc.jit_tls_le_relax */
} RelocDescFlag;
typedef struct RelocDesc {