commit 1c1cdd6474b2490108ebf52f932fe4d0405370c9
parent d2ab4e9a9375513bde35c2c4ecb85ed3b33cf6d7
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 11 May 2026 15:56:16 -0700
obj: emit Mach-O TLV descriptors so cfree-compiled _Thread_local works
cfree-as-compiler couldn't produce a Mach-O object for any
`_Thread_local` source: `aa_tls_addr_of` emitted ELF TLSLE relocs that
`macho_emit` then rejected, and parse never built the
`__DATA,__thread_vars` descriptor that TLVP relocs target. Only
clang-compiled inputs worked through Path E.
Routes TLS emission through two new `obj/` interfaces so the format
split lives in one module and the frontend/codegen stay
format-agnostic:
- `obj_define_tls` (new `src/obj/obj_tls.c`) — frontend hands in the
SK_TLS symbol plus data buffer and pointer-init relocs. ELF places
the symbol directly in `.tdata`/`.tbss`. Mach-O places the data
under a private `<name>$tlv$init` and defines the user-visible
symbol on a 24-byte descriptor in `__thread_vars`.
- `obj_format_tls_via_descriptor` — predicate; `aa_tls_addr_of` uses
it to pick TLSLE vs the TLVP `adrp/ldr/ldr/blr/mov` sequence (x0/x1
scratch — the AArch64 regalloc only allocates x19-x28).
- `macho_emit.section_flags_for` maps `SF_TLS` + sectname to
`S_THREAD_LOCAL_{REGULAR,ZEROFILL,VARIABLES}`; the BSS override no
longer collapses the TLV-zerofill type to plain `S_ZEROFILL`.
- `_tlv_bootstrap` undef cached on `ObjBuilder` so multiple
`_Thread_local`s in one TU share one symbol.
Verified: `cfree cc -c` → Apple `ld` + dyld → exit 0; same `.o`
through `cfree_link_exe` + libSystem.tbd + codesign → exit 0.
`make test-elf` 37/37, `make test` 1232 pass, `make test-link
CFREE_TEST_OBJ=macho` 102/102.
`cfree run` on macOS still segfaults on TLS — the JIT linker doesn't
process TLVP / `_tlv_bootstrap`. `doc/MACHO.md` rewritten to lead
with that gap (and a fix sketch); resolved-item changelog removed.
Diffstat:
7 files changed, 359 insertions(+), 180 deletions(-)
diff --git a/doc/MACHO.md b/doc/MACHO.md
@@ -1,169 +1,89 @@
-# MACHO — Mach-O linker open issues
+# MACHO — Mach-O open issues
-This doc is the running ledger of Mach-O-specific issues uncovered while
-bringing `test-link` Path E green on `aa64-macho`. Items listed here are the
-known gaps in `link_emit_macho` (and its read-side / layout dependencies) that
-need work before the suite is fully green.
+Running ledger of Mach-O-specific gaps still open against the
+`aa64-macho` lane. Resolved items have been pruned from this doc;
+`git log` is the historical record.
-Current state (2026-05-11): Path E on `aa64-macho` is **102/102 pass**
-(36_tls_basic was the last open regression — see §4 for the resolution).
-`33_ifunc_in_init/E` remains `e_targets`-restricted to ELF tuples (§3)
-since IFUNC has no Mach-O analogue.
+State (2026-05-11):
-Path J on `aa64-macho` is also 102/102 — `36_tls_basic` joins the
-IFUNC trio on the `j_targets`-excluded list, since `_tlv_bootstrap`
-is a libSystem symbol with no JIT-lane equivalent (§4).
+ make test-elf # 37/37
+ make test-link # 122/122 (ELF baseline)
+ make test-link CFREE_TEST_OBJ=macho # 102/102 (Path E + J)
-ELF (`make test-elf`, `make test-link`) is unaffected — every change
-described here is either Mach-O-only or guarded on `target.obj ==
-CFREE_OBJ_MACHO`.
+What still doesn't work:
----
+1. **`cfree run` on macOS with `_Thread_local`** — segfaults at the
+ first TLV access. §1 below.
+2. **`36_tls_basic/J` on `aa64-macho`** — same lane, kept on the
+ `j_targets` excludelist. §1.
+3. **`33_ifunc_in_init/E` on `aa64-macho`** — IFUNC has no Mach-O
+ representation; permanent `e_targets` exclusion, not a fix target.
+4. **Section coalescing in `link_macho.plan_layout`** — cosmetic, see
+ §2.
-## 4. TLV (thread-local variables) — RESOLVED
-
-Adds `ARM64_RELOC_TLVP_LOAD_PAGE21` / `PAGEOFF12` support and
-section/binding plumbing for `__DATA,__thread_vars` /
-`__DATA,__thread_data` / `__DATA,__thread_bss`. Path E on `aa64-macho`
-now passes `36_tls_basic`; Path J is `j_targets`-excluded because the
-JIT lane cannot resolve `_tlv_bootstrap` (libSystem-only).
-
-**What landed:**
-
-- New `RelocKind`s `R_AARCH64_TLVP_LOAD_PAGE21` /
- `R_AARCH64_TLVP_LOAD_PAGEOFF12` (`src/obj/obj.h`), with translator
- entries in `macho_reloc_aarch64.c` and a name in `pipeline.c`.
-- `link_reloc.c` encodes PAGE21 as ADRP-form and PAGEOFF12 as a 64-bit
- LDR `lo12` (scale 3). `reloc_width` in `link_layout.c` lists both at
- width 4.
-- `link_macho.c`:
- - `collect_tlv` pass mirrors the internal-GOT pass: one
- `MachTlv` slot per unique descriptor symbol referenced via a TLVP
- reloc.
- - `plan_layout` synthesizes `__DATA,__thread_ptrs`
- (`S_THREAD_LOCAL_VARIABLE_POINTERS`) after the user's TLV input
- sections.
- - TLV input sections retain their `S_THREAD_LOCAL_*` section type
- bits (`__thread_vars` → S_THREAD_LOCAL_VARIABLES, `__thread_data`
- → REGULAR, `__thread_bss` → ZEROFILL), keyed off `SF_TLS` + sectname.
- - `__thread_vars` is forced to align 8 (clang emits it with align 1,
- relying on layout to land it on 8 — dyld's chained-fixup walk
- requires 8-byte boundaries for the descriptor's pointer fields).
- - `apply_relocs` routes `R_AARCH64_TLVP_LOAD_PAGE21/PAGEOFF12` through
- the `__thread_ptrs` slot regardless of whether the descriptor
- target is in-image or imported.
- - `apply_relocs` special-cases `R_ABS64` whose patch site is in a
- `S_THREAD_LOCAL_VARIABLES` section: writes the literal TLV offset
- (`target_vaddr - tls_image_vaddr`) with no chained-fixup entry.
- Matches Apple `ld`'s descriptor[2] format — dyld interprets that
- word as a per-thread offset, not an absolute address.
- - Chained-fixup REBASE (internal) / BIND (imported) per `__thread_ptrs`
- slot, parallel to the existing `__got` per-slot fixup pass.
- - `build_chained_fixups` recognizes the synthetic `__thread_ptrs`
- region inside `__DATA` for slot-byte writeback.
-
-**Test:** `test/link/cases/36_tls_basic` — a single `_Thread_local int`
-read. Matrix:
-
-- `aa64-elf`, `x64-elf`, `rv64-elf` — R + E pass (J passes on host
- arch, skip elsewhere).
-- `aa64-macho` — **R + E pass**, **J excluded** via `j_targets`.
-
-**Root cause of the Path E abort (resolved).** The mach-header
-`flags` word omitted `MH_HAS_TLV_DESCRIPTORS` (0x00800000). Without
-that flag, dyld processes chained fixups normally (so descriptor[0]
-binds to `__tlv_bootstrap`), but then skips the second TLV-setup pass
-that scans each `S_THREAD_LOCAL_VARIABLES` section to allocate a
-pthread_key into descriptor[1] and rewrite descriptor[0] to a
-per-descriptor thunk. With no pthread_key registered, dyld replaces
-descriptor[0] with `_tlv_bootstrap_error` defensively, so the first
-TLV access aborts. Apple's `ld` sets this flag whenever the image
-contains any `S_THREAD_LOCAL_*` section; we now set it in
-`link_emit_macho` when `x.ntlv > 0`.
-
-The descriptor record itself, the chained-fixup chain (BIND on
-descriptor[0] → REBASE on the `__thread_ptrs` slot), the
-`__thread_vars` align-8, and the literal-offset write into
-descriptor[2] were all already correct — confirmed by byte-diffing
-against `clang -arch arm64 -isysroot $(xcrun --show-sdk-path) tls.c`'s
-output. The only divergence that mattered to dyld was the missing
-header flag.
-
-(Cosmetic divergence that does not matter: Apple's `ld` relaxes the
-`TLVP_LOAD_PAGEOFF12` LDR into an ADD when the descriptor is in-image
-and drops the indirect `__thread_ptrs` slot entirely. Our linker
-keeps the LDR form and routes through a one-pointer `__thread_ptrs`
-section, which dyld accepts.)
-
-**Path J excluded.** `_tlv_bootstrap` is a libSystem symbol; the JIT
-path has no dylib resolution. `36_tls_basic/j_targets` now lists only
-the three ELF tuples, mirroring the IFUNC trio in §3.3. A real fix
-would provide a TLV-bootstrap stub for the JIT lane.
+ELF lanes are the regression guardrail; every Mach-O change must keep
+`make test-elf` and `make test-link` (ELF) green.
---
-## 5. Other gaps surfaced en route
-
-These don't currently break a test but are worth tracking:
-
-- **Section coalescing.** `plan_layout` emits one `__TEXT,__text` MSec
- per input `__text` LinkSection. Mach-O technically wants one section
- per (segname, sectname) within a segment. Tools like `llvm-objdump`
- fail with "bad section index" against the resulting binaries. The
- kernel/dyld don't care — they load by segment vaddr — so binaries
- still execute. Fix: merge equal-named MSecs in `plan_layout` before
- emitting `section_64` records.
-
-- **`read_macho` GOT-pageoff scale.** Resolved (this pass) by inspecting
- the patched instruction at the fixup site to disambiguate
- ARM64_RELOC_PAGEOFF12 between ADD/LDST8/16/32/64/128. Worth keeping
- on the radar if SIMD/FP load forms surface — only the integer LDR
- family is unit-tested by the cg corpus today.
-
-- **`pick_macho_names` static-buffer aliasing.** Fixed. Previously
- every comma-form section name pointed at the same static buffer,
- so all `__TEXT,__*` sections ended up with whichever name the last
- call wrote. Now per-MSec inline storage.
-
-- **Internal GOT slots.** Added a second pass in `collect_imports`
- that materializes `MachImp{internal=1}` entries for every
- GOT_LOAD_PAGE21 / LD64_GOT_LO12_NC reloc targeting an in-image
- symbol. Each gets a `__DATA_CONST,__got` slot plus a chained-fixup
- REBASE entry (or no entry at all for weak-undef → NULL). Required
- because clang on Mach-O routes every extern-global through the GOT
- on AArch64. Companion change: skip `layout_got` on Mach-O so the
- ELF-shape synthesis doesn't rewrite GOT reloc targets behind
- link_macho's back.
-
-- **`link_intern_c_name` / `cfree_jit_lookup` mangling.** Was
- conditionally skipping the leading-`_` prefix when the caller's
- source-level name already started with `_`. That diverged from
- `decl.c`, which prepends unconditionally (so C `_start` becomes
- asm `__start`). Now both helpers always prepend on Mach-O, matching
- Apple `cc` and the doc.
-
-- **AMFI inode caching.** macOS caches "this inode failed AMFI" for
- load-failed binaries, and a subsequent overwrite with `O_TRUNC` keeps
- the same inode — so a re-link of the same path with corrected bytes
- still gets rejected. `test/link/harness/link_exe_runner.c` now
- `unlink()`s before `open(O_CREAT)` so each link gets a fresh inode.
-
-- **`test-link` Path E reporting.** The post-flush loop previously
- printed nothing when `RUN_RC != expected` and no `gc_present` /
- `gc_absent` markers were configured. Mach-O surfaced this — every
- run-time-failing case was silently dropped from the FAIL list.
- Fixed in `test/link/run.sh` so any rc mismatch reports a FAIL.
+## 1. TLV in the JIT lane
+
+`cfree cc -c` → `cfree_link_exe` (or Apple `ld`) → dyld is green for
+`_Thread_local` end-to-end. `cfree run` is not: the JIT linker
+(`src/link/link_jit.c`) handles TLSLE but not TLVP, and there is no
+runtime that fills the role dyld plays for a normal Mach-O image
+(allocate a pthread_key, install a per-descriptor thunk into
+descriptor[0], record the key in descriptor[1]).
+
+Concretely, when codegen emits the Mach-O TLV access sequence
+
+ adrp x0, sym@TLVPPAGE
+ ldr x0, [x0, sym@TLVPPAGEOFF]
+ ldr x1, [x0] ; descriptor[0]
+ blr x1 ; thunk(x0=desc) → x0 = TLV addr
+
+the JIT applies the TLVP relocs trivially (the descriptor exists in
+the JIT image), but descriptor[0] is whatever `R_ABS64` against
+`__tlv_bootstrap` resolved to. Two viable directions:
+
+- **dlsym `_tlv_bootstrap`.** `driver_dlsym_resolver` can already
+ find it on a macOS host. Open question: dyld's pthread_key /
+ descriptor-rewrite pass only walks `S_THREAD_LOCAL_VARIABLES`
+ sections it owns; JIT-mmap'd memory isn't in that walk, so
+ `_tlv_bootstrap` would run unparameterized. Likely insufficient on
+ its own.
+
+- **cfree-owned thunk.** Allocate one `pthread_key_t` per JIT image,
+ write our thunk's address into every descriptor[0], and have the
+ thunk consult descriptor[1]/[2] to find (or lazy-init) per-thread
+ storage. No libSystem dependency; cleaner for the freestanding
+ goal. This is the recommended fix.
+
+Either way the JIT linker needs a new pass parallel to its
+`reloc_is_tlsle` branch that materializes per-descriptor runtime
+state before the entry call.
---
-## 6. Validation
+## 2. Section coalescing in `plan_layout`
+
+`link_macho.plan_layout` emits one `__TEXT,__text` `MSec` per input
+`__text` `LinkSection`. Mach-O wants one section per
+`(segname, sectname)` within a segment. `llvm-objdump` rejects the
+result ("bad section index"); the kernel and dyld don't care and
+binaries still execute — that's why this hasn't bitten a test.
+
+Fix: merge equal-named MSecs in `plan_layout` before writing
+`section_64` records.
+
+---
-Run the matrix under both tuples; the ELF side is the regression
-guardrail:
+## 3. Cosmetic divergences worth flagging
- make test-elf # 37/37 — unaffected
- make test-link # 122/122 — ELF baseline
- make test-link CFREE_TEST_OBJ=macho # 102/102 — Mach-O (E+J)
+These do not block tests but are documented so a future "why doesn't
+our output byte-match clang's?" doesn't re-debug them:
-`33_ifunc_in_init/E` is `e_targets`-excluded on `aa64-macho` (§5).
-`36_tls_basic/J` is `j_targets`-excluded on `aa64-macho` (§4).
+- TLVP_LOAD_PAGEOFF12 LDR relaxation. Apple `ld` relaxes the LDR to
+ an ADD when the descriptor is in-image and drops the indirect
+ `__thread_ptrs` slot entirely. Our linker keeps the LDR + slot.
+ dyld accepts both shapes.
diff --git a/src/arch/aarch64/ops.c b/src/arch/aarch64/ops.c
@@ -282,6 +282,38 @@ static void aa_tls_addr_of(CGTarget* t, Operand dst, ObjSymId sym, i64 addend) {
u32 sec = mc->section_id;
u32 rd = reg_num(dst);
+ if (obj_format_tls_via_descriptor(t->c)) {
+ /* TLV access via per-variable descriptor (Mach-O TLVP). The thunk's
+ * ABI is custom — x0 in/out as descriptor → TLV addr, all other
+ * regs preserved — so we materialize via x0 and copy to `dst` only
+ * when they differ. x0/x1 are scratch here (the regalloc only hands
+ * out x19-x28), and x30 was saved at the prologue.
+ *
+ * adrp x0, sym@TLVPPAGE ; R_AARCH64_TLVP_LOAD_PAGE21
+ * ldr x0, [x0, sym@TLVPPAGEOFF] ; R_AARCH64_TLVP_LOAD_PAGEOFF12
+ * ldr x1, [x0] ; descriptor[0] = thunk pointer
+ * blr x1 ; x0 in/out
+ * mov xdst, x0 ; only if dst != x0
+ *
+ * TLVP relocs do not carry an addend; nonzero addends are applied
+ * after the call as a follow-on ADD/SUB on `dst`. */
+ u32 adrp_pos = mc->pos(mc);
+ aa64_emit32(mc, aa64_adrp_base(/*Rd=*/0));
+ mc->emit_reloc_at(mc, sec, adrp_pos, R_AARCH64_TLVP_LOAD_PAGE21, sym, 0, 0,
+ 0);
+ u32 ldr_pos = mc->pos(mc);
+ aa64_emit32(mc,
+ aa64_ldr_uimm(/*size=*/3, /*Rt=*/0, /*Rn=*/0, /*byte_off=*/0));
+ mc->emit_reloc_at(mc, sec, ldr_pos, R_AARCH64_TLVP_LOAD_PAGEOFF12, sym, 0,
+ 0, 0);
+ aa64_emit32(mc,
+ aa64_ldr_uimm(/*size=*/3, /*Rt=*/1, /*Rn=*/0, /*byte_off=*/0));
+ aa64_emit32(mc, aa64_blr(/*Rn=*/1));
+ if (rd != 0) aa64_emit32(mc, aa64_mov_reg(/*sf=*/1, rd, /*Rm=*/0));
+ if (addend) aa64_emit_addr_adjust(mc, rd, rd, (i32)addend);
+ return;
+ }
+
aa64_emit32(mc, aa64_mrs_tpidr_el0(/*Rt=*/9));
u32 hi_pos = mc->pos(mc);
diff --git a/src/obj/macho_emit.c b/src/obj/macho_emit.c
@@ -163,6 +163,18 @@ static u32 section_flags_for(u16 sec_kind, u16 sec_flags, const char* sectname,
if (sec_kind == SEC_TEXT || (sec_flags & SF_EXEC)) {
f |= S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS;
}
+ if (sec_flags & SF_TLS) {
+ /* Mach-O distinguishes three TLV section types by sectname:
+ * __thread_data → S_THREAD_LOCAL_REGULAR (initial data)
+ * __thread_bss → S_THREAD_LOCAL_ZEROFILL (zero-init data)
+ * __thread_vars → S_THREAD_LOCAL_VARIABLES (descriptor records)
+ * dyld dispatches its TLV-bootstrap pass off the S_TYPE; the
+ * S_ATTR_* bits don't carry TLV semantics so we just emit the type. */
+ if (sect_len >= 13 && memcmp(sectname, "__thread_vars", 13) == 0)
+ return S_THREAD_LOCAL_VARIABLES;
+ if (sec_kind == SEC_BSS) return S_THREAD_LOCAL_ZEROFILL;
+ return S_THREAD_LOCAL_REGULAR;
+ }
if (sec_kind == SEC_BSS || (sect_len >= 5 && memcmp(sectname, "__bss", 5) == 0)) {
f |= S_ZEROFILL;
}
@@ -265,7 +277,11 @@ void emit_macho(Compiler* c, ObjBuilder* ob, Writer* w) {
m->is_zerofill = 1;
m->size = s->bss_size;
m->obj_bytes = NULL;
- m->flags = (m->flags & ~SECTION_TYPE) | S_ZEROFILL;
+ /* Preserve S_THREAD_LOCAL_ZEROFILL when SF_TLS routed us there;
+ * a regular BSS section gets the plain S_ZEROFILL type. */
+ u32 stype = m->flags & SECTION_TYPE;
+ if (stype != S_THREAD_LOCAL_ZEROFILL)
+ m->flags = (m->flags & ~SECTION_TYPE) | S_ZEROFILL;
} else {
m->is_zerofill = 0;
m->size = s->bytes.total;
diff --git a/src/obj/obj.c b/src/obj/obj.c
@@ -33,6 +33,10 @@ struct CfreeObjBuilder {
* derives a sensible default by arch. */
u32 elf_e_flags;
u8 elf_e_flags_set;
+ /* Cached undef extern `__tlv_bootstrap` (Mach-O on-disk name) used by
+ * obj_define_tls when emitting `_Thread_local` storage on Mach-O.
+ * Lazily materialized on the first TLV emission; OBJ_SYM_NONE otherwise. */
+ ObjSymId tlv_bootstrap_sym;
};
struct ObjSymIter {
@@ -66,6 +70,15 @@ ObjBuilder* obj_new(Compiler* c) {
Compiler* obj_compiler(const ObjBuilder* ob) { return ob ? ob->c : NULL; }
+/* Private accessors for the `_tlv_bootstrap` cache used by obj_define_tls.
+ * Lives in obj.c so the ObjBuilder field doesn't leak through obj.h. */
+ObjSymId obj_tlv_bootstrap_get(const ObjBuilder* ob) {
+ return ob ? ob->tlv_bootstrap_sym : OBJ_SYM_NONE;
+}
+void obj_tlv_bootstrap_set(ObjBuilder* ob, ObjSymId id) {
+ if (ob) ob->tlv_bootstrap_sym = id;
+}
+
void obj_free(ObjBuilder* ob) {
u32 i, n;
if (!ob) return;
diff --git a/src/obj/obj.h b/src/obj/obj.h
@@ -436,6 +436,44 @@ Sym obj_secname_preinit_array(Compiler*);
Sym obj_secname_tdata(Compiler*);
Sym obj_secname_tbss(Compiler*);
+/* ---- thread-local storage emission ---------------------------------
+ *
+ * The frontend collects a `_Thread_local` definition's bytes (or marks
+ * it BSS), alignment, and any pointer-init relocs, then calls
+ * obj_define_tls to materialize the storage and bind the user-visible
+ * symbol. The obj layer owns the format split:
+ *
+ * ELF : `sym` is defined directly in `.tdata` / `.tbss`; the
+ * supplied relocs are applied at the same section/offset.
+ *
+ * Mach-O: the data lives under a private `<name>$tlv$init` symbol in
+ * `__DATA,__thread_data` / `__DATA,__thread_bss`; `sym` is
+ * defined onto a 24-byte TLV *descriptor* in
+ * `__DATA,__thread_vars` whose three slots are
+ * [_tlv_bootstrap, 0, &init]. dyld rewrites slot[0] to a
+ * per-descriptor thunk and fills slot[1] with a pthread_key
+ * during image-load; the compiler's TLVP_LOAD_PAGE21 /
+ * PAGEOFF12 codegen sequence targets the descriptor.
+ *
+ * The `_tlv_bootstrap` undef extern is cached on the ObjBuilder so a
+ * second TLV var in the same TU shares one symbol entry. */
+typedef struct ObjTlsReloc {
+ u32 offset; /* within the data buffer */
+ RelocKind kind;
+ ObjSymId target;
+ i64 addend;
+} ObjTlsReloc;
+
+void obj_define_tls(Compiler*, ObjBuilder*, ObjSymId sym, const u8* data,
+ u32 size, int has_nonzero_init, u32 align,
+ const ObjTlsReloc* relocs, u32 nrelocs);
+
+/* True when reads of `_Thread_local` storage go through a per-variable
+ * descriptor + thunk call rather than a direct TP-relative offset.
+ * Mach-O: yes (TLVP_LOAD_PAGE21 + thunk in descriptor[0]).
+ * ELF: no (Local-Exec / Initial-Exec: `mrs tpidr_el0` + tprel offset). */
+int obj_format_tls_via_descriptor(const Compiler*);
+
/* ---- format-aware codegen policy ----
*
* Backends consult these predicates instead of branching on
diff --git a/src/obj/obj_tls.c b/src/obj/obj_tls.c
@@ -0,0 +1,169 @@
+/* Format-aware thread-local storage emission.
+ *
+ * `_Thread_local` storage has the same source-level shape across object
+ * formats (a global with a per-thread instance), but the on-disk
+ * representation diverges sharply:
+ *
+ * ELF : one symbol in `.tdata` / `.tbss`; access via direct
+ * TP-relative offset (TLSLE relocs).
+ * Mach-O: storage and access are split. The bytes live under a
+ * private `<name>$tlv$init` symbol in `__DATA,__thread_data`
+ * / `__DATA,__thread_bss`. The user-visible symbol points
+ * at a 24-byte *descriptor* in `__DATA,__thread_vars`:
+ * +0 : ptr to `_tlv_bootstrap` (BIND; dyld rewrites to a
+ * per-descriptor thunk after allocating a pthread key)
+ * +8 : pthread key (0 on disk; filled by dyld)
+ * +16 : ptr to the data symbol (REBASE; link_macho rewrites
+ * the literal to (target_vaddr - tls_image_vaddr))
+ * Access from compiled code is an indirect call through the
+ * descriptor's slot[0]; the TLVP_LOAD_PAGE21/PAGEOFF12 reloc
+ * pair targets the descriptor symbol, not the data.
+ *
+ * Centralizing the split here keeps the frontend (parse_init.c) and the
+ * codegen (per-arch ops.c) format-agnostic — they pass the canonical
+ * SK_TLS symbol plus byte buffer to `obj_define_tls`, and consult
+ * `obj_format_tls_via_descriptor` when choosing an access sequence.
+ *
+ * The `_tlv_bootstrap` undef extern is cached on ObjBuilder so multiple
+ * TLV vars in one TU share one entry; the linker dedupes across TUs by
+ * name. */
+
+#include "obj/obj.h"
+
+#include <string.h>
+
+#include "core/core.h"
+#include "core/heap.h"
+#include "core/pool.h"
+
+/* ObjBuilder is opaque outside obj.c; obj_tls.c reaches the bootstrap
+ * cache via these accessors defined in obj.c. Declared here to avoid
+ * exposing the field in obj.h's public ObjBuilder layout. */
+ObjSymId obj_tlv_bootstrap_get(const ObjBuilder*);
+void obj_tlv_bootstrap_set(ObjBuilder*, ObjSymId);
+
+int obj_format_tls_via_descriptor(const Compiler* c) {
+ return c->target.obj == CFREE_OBJ_MACHO;
+}
+
+static void define_tls_elf(ObjBuilder* ob, Compiler* c, ObjSymId sym,
+ const u8* data, u32 size, int has_nonzero_init,
+ u32 align, const ObjTlsReloc* relocs, u32 nrelocs) {
+ u32 a = align ? align : 1u;
+ if (!data || !has_nonzero_init) {
+ Sym sname = obj_secname_tbss(c);
+ ObjSecId sec = obj_section_ex(ob, sname, SEC_BSS, SSEM_NOBITS,
+ SF_ALLOC | SF_WRITE | SF_TLS, a, 0,
+ OBJ_SEC_NONE, 0);
+ u32 base = obj_align_to(ob, sec, a);
+ obj_reserve_bss(ob, sec, base + size, a);
+ obj_symbol_define(ob, sym, sec, base, size);
+ return;
+ }
+ Sym sname = obj_secname_tdata(c);
+ ObjSecId sec = obj_section(ob, sname, SEC_DATA,
+ SF_ALLOC | SF_WRITE | SF_TLS, a);
+ u32 base = obj_align_to(ob, sec, a);
+ {
+ u8* dst = obj_reserve(ob, sec, size);
+ if (dst) memcpy(dst, data, size);
+ }
+ obj_symbol_define(ob, sym, sec, base, size);
+ for (u32 i = 0; i < nrelocs; ++i) {
+ obj_reloc(ob, sec, base + relocs[i].offset, relocs[i].kind,
+ relocs[i].target, relocs[i].addend);
+ }
+}
+
+static ObjSymId tlv_bootstrap(ObjBuilder* ob, Compiler* c) {
+ ObjSymId s = obj_tlv_bootstrap_get(ob);
+ if (s != OBJ_SYM_NONE) return s;
+ /* On-disk name carries the Mach-O leading underscore: source-level
+ * `_tlv_bootstrap` becomes `__tlv_bootstrap`. */
+ Sym name = pool_intern_cstr(c->global, "__tlv_bootstrap");
+ s = obj_symbol(ob, name, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0);
+ obj_tlv_bootstrap_set(ob, s);
+ return s;
+}
+
+static ObjSymId mint_init_sym(ObjBuilder* ob, Compiler* c, Sym desc_name) {
+ size_t nlen = 0;
+ const char* nm = pool_str(c->global, desc_name, &nlen);
+ static const char suffix[] = "$tlv$init";
+ size_t slen = sizeof(suffix) - 1u;
+ Heap* h = (Heap*)c->env->heap;
+ char* buf = (char*)h->alloc(h, nlen + slen + 1u, 1);
+ if (!buf)
+ compiler_panic(c, (SrcLoc){0, 0, 0}, "obj_define_tls: oom interning init name");
+ if (nlen) memcpy(buf, nm, nlen);
+ memcpy(buf + nlen, suffix, slen);
+ buf[nlen + slen] = 0;
+ Sym n = pool_intern(c->global, buf, (u32)(nlen + slen));
+ h->free(h, buf, nlen + slen + 1u);
+ return obj_symbol(ob, n, SB_LOCAL, SK_TLS, OBJ_SEC_NONE, 0, 0);
+}
+
+static void define_tls_macho(ObjBuilder* ob, Compiler* c, ObjSymId sym,
+ const u8* data, u32 size, int has_nonzero_init,
+ u32 align,
+ const ObjTlsReloc* relocs, u32 nrelocs) {
+ const ObjSym* desc_os = obj_symbol_get(ob, sym);
+ if (!desc_os)
+ compiler_panic(c, (SrcLoc){0, 0, 0}, "obj_define_tls: descriptor sym not found");
+ ObjSymId data_sym = mint_init_sym(ob, c, desc_os->name);
+
+ /* Storage section: __thread_data (initialized) or __thread_bss (BSS).
+ * Same SF_TLS flag as ELF — macho_emit's section_flags_for maps SF_TLS
+ * + sectname to the right S_THREAD_LOCAL_* type. */
+ u32 a = align ? align : 1u;
+ if (!data || !has_nonzero_init) {
+ Sym sname = obj_secname_tbss(c);
+ ObjSecId sec = obj_section_ex(ob, sname, SEC_BSS, SSEM_NOBITS,
+ SF_ALLOC | SF_WRITE | SF_TLS, a, 0,
+ OBJ_SEC_NONE, 0);
+ u32 base = obj_align_to(ob, sec, a);
+ obj_reserve_bss(ob, sec, base + size, a);
+ obj_symbol_define(ob, data_sym, sec, base, size);
+ } else {
+ Sym sname = obj_secname_tdata(c);
+ ObjSecId sec = obj_section(ob, sname, SEC_DATA,
+ SF_ALLOC | SF_WRITE | SF_TLS, a);
+ u32 base = obj_align_to(ob, sec, a);
+ {
+ u8* dst = obj_reserve(ob, sec, size);
+ if (dst) memcpy(dst, data, size);
+ }
+ obj_symbol_define(ob, data_sym, sec, base, size);
+ for (u32 i = 0; i < nrelocs; ++i) {
+ obj_reloc(ob, sec, base + relocs[i].offset, relocs[i].kind,
+ relocs[i].target, relocs[i].addend);
+ }
+ }
+
+ /* Descriptor in __DATA,__thread_vars: 24 bytes aligned 8.
+ * The user-visible `sym` lives here; the TLVP relocs in code target
+ * this symbol so the linker can route them through __thread_ptrs. */
+ Sym vars_name = pool_intern_cstr(c->global, "__DATA,__thread_vars");
+ ObjSecId vars_sec = obj_section(ob, vars_name, SEC_DATA,
+ SF_ALLOC | SF_WRITE | SF_TLS, 8u);
+ u32 desc_base = obj_align_to(ob, vars_sec, 8u);
+ {
+ u8* dst = obj_reserve(ob, vars_sec, 24u);
+ if (dst) memset(dst, 0, 24u);
+ }
+ obj_symbol_define(ob, sym, vars_sec, desc_base, 24u);
+ obj_reloc(ob, vars_sec, desc_base + 0u, R_ABS64, tlv_bootstrap(ob, c), 0);
+ obj_reloc(ob, vars_sec, desc_base + 16u, R_ABS64, data_sym, 0);
+}
+
+void obj_define_tls(Compiler* c, ObjBuilder* ob, ObjSymId sym, const u8* data,
+ u32 size, int has_nonzero_init, u32 align,
+ const ObjTlsReloc* relocs, u32 nrelocs) {
+ if (obj_format_tls_via_descriptor(c)) {
+ define_tls_macho(ob, c, sym, data, size, has_nonzero_init, align, relocs,
+ nrelocs);
+ return;
+ }
+ define_tls_elf(ob, c, sym, data, size, has_nonzero_init, align, relocs,
+ nrelocs);
+}
diff --git a/src/parse/parse_init.c b/src/parse/parse_init.c
@@ -729,32 +729,23 @@ void define_static_object(Parser* p, ObjSymId sym, const Type* var_ty,
}
if (is_tls) {
- Sym sname;
- ObjSecId sec;
- u32 a = align ? align : 1u;
- u32 base;
- if (!has_init || !has_nonzero) {
- sname = obj_secname_tbss(p->c);
- sec = obj_section_ex(ob, sname, SEC_BSS, SSEM_NOBITS,
- SF_ALLOC | SF_WRITE | SF_TLS, a, 0, OBJ_SEC_NONE, 0);
- base = obj_align_to(ob, sec, a);
- obj_reserve_bss(ob, sec, base + size, a);
- obj_symbol_define(ob, sym, sec, base, size);
- return;
- }
- sname = obj_secname_tdata(p->c);
- sec = obj_section(ob, sname, SEC_DATA, SF_ALLOC | SF_WRITE | SF_TLS, a);
- base = obj_align_to(ob, sec, a);
- {
- u8* dst = obj_reserve(ob, sec, size);
- if (dst) memcpy(dst, buf, size);
- }
- obj_symbol_define(ob, sym, sec, base, size);
- for (u32 i = 0; i < p->static_relocs_len; ++i) {
- RelocKind rk = (p->static_relocs[i].size == 8) ? R_ABS64 : R_ABS32;
- obj_reloc(ob, sec, base + p->static_relocs[i].offset, rk,
- p->static_relocs[i].target, p->static_relocs[i].addend);
+ /* Format split (ELF .tdata/.tbss vs Mach-O TLV descriptor) lives in
+ * obj_define_tls. Translate the parser's static_relocs[] (size=4|8)
+ * into the obj-layer's RelocKind so the caller stays format-agnostic. */
+ ObjTlsReloc* tr = NULL;
+ u32 nr = p->static_relocs_len;
+ if (nr) {
+ tr = arena_array(p->c->tu, ObjTlsReloc, nr);
+ for (u32 i = 0; i < nr; ++i) {
+ tr[i].offset = p->static_relocs[i].offset;
+ tr[i].kind =
+ (p->static_relocs[i].size == 8) ? R_ABS64 : R_ABS32;
+ tr[i].target = p->static_relocs[i].target;
+ tr[i].addend = p->static_relocs[i].addend;
+ }
}
+ obj_define_tls(p->c, ob, sym, buf, size, has_nonzero,
+ align ? align : 1u, tr, nr);
p->static_relocs_len = 0;
(void)loc;
return;