commit 98ca5ca79027bd63e9609ddcfc45d7ddcb020709
parent be98de5e51866601a18064d691b78e7be4d03fda
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 29 May 2026 13:18:21 -0700
aa64: emit Windows/COFF TLS Local-Exec access sequence
aa_tls_addr_of panicked 'unsupported TLS object format' for any non-ELF,
non-descriptor target, so the aarch64-windows runtime could not build
(coro.c uses _Thread_local) and test-coff failed at the compile step.
Add aa_tls_addr_of_win mirroring x64_tls_addr_of_win64: load the TEB TLS
array pointer from x18+0x58, index it by _tls_index, then apply the
section-relative offset of the variable via the COFF SECREL_{HIGH,LOW}12A
relocations. The reloc/linker/TLS-directory infrastructure already existed;
only the codegen emission was missing.
&_tls_index is materialized with ADRP+ADD (ADD_ABS_LO12_NC) rather than an
LDR :lo12: form on purpose: the COFF reader collapses LDST32->LDST64 width,
which would mis-scale an LDR :lo12: at link time.
Also record the full native-arch completeness audit in
doc/NATIVE_ARCH_COMPLETENESS.md.
Verified: test-coff green; objdump confirms the 7-instruction sequence and
the expected _tls_index + SECREL relocations.
Diffstat:
2 files changed, 148 insertions(+), 0 deletions(-)
diff --git a/doc/NATIVE_ARCH_COMPLETENESS.md b/doc/NATIVE_ARCH_COMPLETENESS.md
@@ -0,0 +1,86 @@
+# Native-arch completeness: asm / disasm / link-reloc / dwarf
+
+Goal: aa64, x64, rv64 each have complete **asm**, **disasm**, **link/reloc**,
+and **dwarf** support across the OS support matrix:
+
+| arch | OSes | object formats |
+|------|----------------------------|--------------------|
+| aa64 | Linux, Windows, Mac | ELF, COFF/PE, Mach-O |
+| x64 | Linux, Windows | ELF, COFF/PE |
+| rv64 | Linux | ELF |
+
+Status from the 2026-05-29 audit (baseline tests + a 40-agent static audit with
+adversarial verification of every gap). Only the items below are *verified real*
+and *in scope*; seven other candidate gaps were checked and rejected
+(e.g. rv64 omitting the four Mach-O `LinkArchDesc` classification hooks is N/A —
+no rv64 Mach-O; aa64 ELF TLS being Local-Exec-only is correct for the static
+whole-module link model).
+
+## Tier 0 — correctness blockers (broken today, in scope)
+
+- [ ] **aa64 Windows/COFF TLS codegen** — `aa_tls_addr_of` (`src/arch/aa64/native.c:1787`)
+ panics `unsupported TLS object format` for COFF; this *fails `make test-coff`*
+ (the aarch64-windows runtime `coro.c` uses `_Thread_local`). The COFF SECREL
+ reloc + TLS-directory + `_tls_index` infra already exist; only codegen is
+ missing. x64 has the analogue (`x64_tls_addr_of_win64`, `native.c:2538`).
+- [ ] **rv64 TLS-IE reloc unhandled → hard link failure** — under `-fPIE` (the
+ default) rv64 emits `R_RV_TLS_GOT_HI20`, which the linker cannot lay out or
+ apply (`link: unsupported reloc kind 80`). Fix: emit Local-Exec (`R_RV_TPREL_*`)
+ like aa64/x64 (`src/arch/rv64/native.c:2134`).
+- [ ] **rv64 assembler emits no relocations** — `assemble_one`
+ (`src/arch/rv64/asm.c:292`) routes every branch/jump/call/lui/auipc immediate
+ through `asm_driver_parse_const`, which panics on any symbol. `beq a0,a1,label`
+ / `j label` / `call f` cannot be assembled. Mirror aa64/x64 symbolic-branch path.
+
+## Tier 1 — correctness bugs (wrong output, in scope)
+
+- [ ] **x64 `.eh_frame` wrong DWARF reg for RBP** — passes `X64_RBP` (HW enc 5 =
+ DWARF RDI) into CFI instead of DWARF reg 6 (`src/arch/x64/native.c:1696`).
+ Corrupts frame-pointer unwinding on x64-Linux. Needs HW→DWARF reg map.
+
+## Tier 2 — codegen emits but disasm/asm can't handle (round-trip violations)
+
+- [ ] **aa64 disasm: FP/SIMD data-processing family undecodable** → `.inst`
+ (no FP-DP rows in `src/arch/aa64/isa.c`). Any aa64 float code mis-renders in
+ objdump + JIT debugger on all 3 OSes.
+- [ ] **x64 disasm: SSE `movd/movq` (66 0F 6E/7E), `xorps/xorpd` (0F 57) missing**
+ → 1-byte `.byte` fallback desyncs the whole stream (`src/arch/x64/isa.c:250`).
+- [ ] **aa64 asm: no FP-scalar instrs** (shares the isa.c table with the disasm gap).
+- [ ] **aa64 asm: no byte/half loads/stores** (`ldrb/strb/ldrh/strh/ldrsb/ldrsh/ldrsw`);
+ also no pre/post-index parsing (`src/arch/aa64/asm.c:938`).
+- [ ] **aa64 asm: no atomics / exclusive / bitfield / clz / rev** — codegen emits
+ these (`aa_ldxr/aa_stxr/aa_ldar/aa_sbfm/aa_ubfm/aa_clz/aa_rbit/aa_rev`) but
+ neither asm nor disasm handle them.
+- [ ] **x64 asm: memory operands only `disp(%base)`** — no SIB index/scale, no
+ `(%rip)` (`src/arch/x64/asm.c:193`). Disasm of cg output not reassemblable.
+- [ ] **x64 asm: ALU reg→mem / imm→mem store forms** unsupported (`asm.c:701,793`).
+- [ ] **x64 dwarf: no named params/locals** — `frame_slot_debug_loc = NULL`
+ (`src/arch/x64/native.c:3746`). gdb/lldb can't print locals. (DBG_TODO.md:153)
+- [ ] **rv64 dwarf: no named params/locals** — same NULL hook (`rv64/native.c:3240`).
+- [ ] **x64 dwarf: step-out can't recover RA** — `cfree_dwarf_unwind_step` has no
+ memory provider, and x64 has no link-register fallback. Needs a mem-reading
+ unwind variant.
+
+## Tier 3 — minor correctness / robustness
+
+- [ ] rv64 `needs_jit_call_stub` NULL — no far-call stub for >2GiB SK_ABS in JIT.
+- [ ] CFI offset rules pinned to function-end PC, not post-prologue (one-shot
+ `cfi_set_next_pc_offset`; shared producer `src/arch/mc.c`). All archs.
+- [ ] aa64 asm: `mov Rd,#bitmask-imm` via ORR alias (matches GNU as).
+- [ ] rv64 asm: `call/tail/la/lla` pseudos + multi-word `li`.
+- [ ] shared asm: `.comm/.lcomm` mint no symbol; `.uleb128/.sleb128` emit no bytes
+ (silent miscompile) — `src/asm/asm.c:902`.
+- [ ] asm: no relocation-operator operand syntax (`@PLT/@GOTPCREL/:lo12:/%hi/%pcrel_hi`)
+ on any arch (large; overlaps the rv64 + x64 asm items).
+- [ ] rv64 link: `R_RV_SET_ULEB128/SUB_ULEB128` mapped on read but unsized/unapplied
+ (blocks ingesting external rv64 objects with ULEB128 diff relocs).
+
+## Tier 4 — test coverage (no behavior change; lock in the above)
+
+- [ ] x64 decode/ISA test (test-isa builds only aa64+rv64).
+- [ ] x64 encode corpus never runs under default `make test` (gated on `CFREE_TEST_ARCH`).
+- [ ] rv64 asm/codegen/link corpus only via opt-in targets; add no-exec default lanes.
+- [ ] x64 ELF linker reloc application not run by default.
+- [ ] `test/debug/cfi_unit.c` (.eh_frame roundtrip, aa64+rv64) wired to no target.
+- [ ] add x64 case to `cfi_unit.c` (would catch the RBP bug).
+- [ ] aa64/x64 FP decode test (would have caught the FP disasm holes).
diff --git a/src/arch/aa64/native.c b/src/arch/aa64/native.c
@@ -63,6 +63,15 @@ enum {
AA_TAIL_WORDS = 32u,
};
+/* Windows/AArch64 TLS Local-Exec. The TEB pointer lives in the reserved
+ * platform register x18 (never allocated; see AA_PHYS_INT_RESERVED(18)), and
+ * the thread's TLS-array pointer (TEB.ThreadLocalStoragePointer) sits at
+ * TEB+0x58 — same offset as on Win64/x86-64. */
+enum {
+ AA_WIN_TEB_REG = 18u,
+ AA_WIN_TEB_TLS_PTR_OFF = 0x58u,
+};
+
/* ============================================================================
* AAPCS64 frame layout
*
@@ -1763,6 +1772,55 @@ static void aa_store_native(NativeTarget* t, NativeAddr addr, NativeLoc src,
aa_emit_mem(aa_of(t), 0, src, addr, mem);
}
+/* Windows/AArch64 TLS Local-Exec (PE-COFF). Mirrors x64_tls_addr_of_win64:
+ * ldr rd, [x18, #0x58] ; TEB.ThreadLocalStoragePointer
+ * adrp x16, _tls_index ; PAGEBASE_REL21
+ * add x16, x16, :lo12:_tls_index ; PAGEOFFSET_12A
+ * ldr w16, [x16] ; module's TLS index
+ * ldr rd, [rd, x16, lsl #3] ; this module's TLS block base
+ * add rd, rd, #:secrel_hi12:sym ; SECREL_HIGH12A (sh=1)
+ * add rd, rd, #:secrel_lo12:sym ; SECREL_LOW12A (sh=0)
+ * We materialize &_tls_index via ADRP+ADD (not LDR :lo12:) on purpose: the
+ * COFF reader collapses LDST32→LDST64 width, so an LDR :lo12: form would be
+ * mis-scaled at link time; ADD_ABS_LO12_NC carries no width and round-trips
+ * cleanly. AA_TMP0 (x16) is the reserved scratch; rd is an allocated reg
+ * distinct from x16/x17/x18. */
+static void aa_tls_addr_of_win(NativeTarget* t, NativeLoc dst, ObjSymId sym,
+ i64 addend) {
+ MCEmitter* mc = t->mc;
+ u32 sec = mc->section_id;
+ u32 rd = loc_reg(dst);
+ u32 pos;
+ Sym idx_name = pool_intern_slice(t->c->global, SLICE_LIT("_tls_index"));
+ ObjSymId idx_sym = obj_symbol_find(t->obj, idx_name);
+ if (idx_sym == 0)
+ idx_sym = obj_symbol(t->obj, idx_name, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0,
+ 0);
+ /* (1) rd = TEB.ThreadLocalStoragePointer. */
+ aa_emit32(mc, aa_ldr_uimm(3, rd, AA_WIN_TEB_REG, AA_WIN_TEB_TLS_PTR_OFF));
+ /* (2)+(3) x16 = &_tls_index via ADRP + ADD. */
+ pos = mc->pos(mc);
+ aa_emit32(mc, aa64_adrp(AA_TMP0, 0, 0));
+ mc->emit_reloc_at(mc, sec, pos, R_AARCH64_ADR_PREL_PG_HI21, idx_sym, 0, 0, 0);
+ pos = mc->pos(mc);
+ aa_emit32(mc, aa64_add_imm(1, AA_TMP0, AA_TMP0, 0, 0));
+ mc->emit_reloc_at(mc, sec, pos, R_AARCH64_ADD_ABS_LO12_NC, idx_sym, 0, 0, 0);
+ /* (4) w16 = _tls_index (the loaded value). */
+ aa_emit32(mc, aa_ldr_uimm(2, AA_TMP0, AA_TMP0, 0));
+ /* (5) rd = TLS array slot for this module: ldr rd, [rd, x16, lsl #3]. */
+ aa_emit32(mc, aa_ldst_regoff_v(3, 0, 1, rd, rd, AA_TMP0, 1));
+ /* (6) rd += :secrel_hi12:sym (ADD with sh=1; linker patches imm12). */
+ pos = mc->pos(mc);
+ aa_emit32(mc, aa64_add_imm(1, rd, rd, 0, 1));
+ mc->emit_reloc_at(mc, sec, pos, R_COFF_AARCH64_SECREL_HIGH12A, sym, addend, 1,
+ 0);
+ /* (7) rd += :secrel_lo12:sym (ADD with sh=0). */
+ pos = mc->pos(mc);
+ aa_emit32(mc, aa64_add_imm(1, rd, rd, 0, 0));
+ mc->emit_reloc_at(mc, sec, pos, R_COFF_AARCH64_SECREL_LOW12A, sym, addend, 1,
+ 0);
+}
+
static void aa_tls_addr_of(NativeTarget* t, NativeLoc dst, ObjSymId sym,
i64 addend) {
AANativeTarget* a = aa_of(t);
@@ -1784,6 +1842,10 @@ static void aa_tls_addr_of(NativeTarget* t, NativeLoc dst, ObjSymId sym,
if (rd != 0) aa_emit32(mc, aa64_mov_reg(1, rd, 0));
return;
}
+ if (t->c->target.os == CFREE_OS_WINDOWS) {
+ aa_tls_addr_of_win(t, dst, sym, addend);
+ return;
+ }
if (t->c->target.obj != CFREE_OBJ_ELF) {
aa_panic(a, "unsupported TLS object format");
}