commit 0493f0042030ecca28f7d0d67039372cf165a4b8
parent 65545dc2cc2be1f6fb2de2149e2702a267aa7960
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 5 Jun 2026 19:04:57 -0700
Partition the relocation byte-patcher per-arch (RELOC WS-C)
Move the per-arch instruction-immediate relocation byte encoders out of the
format-neutral src/obj/reloc_apply.c into each backend's reloc.c, reached
through a new LinkArchDesc.reloc_apply_insn hook, while keeping
link_reloc_apply as the single public byte-patcher entry. Adding an arch's
relocation is now arch-local: one RelocDesc row (WS-B) + one reloc_apply_insn
arm + one wire-translator entry, no generic src/link or src/obj edit.
- src/arch/{aa64,x64,riscv}/reloc.c: *_reloc_apply_insn — the imm19/imm26/
ADRP-page (aa64), U/I/S/B/J + RVC scatter & +0x800 bias (rv), and rel8 (x64)
encoders, moved verbatim. Wired into each LinkArchDesc in link.c.
- src/obj/reloc_apply.c: reloc_apply_neutral() keeps the arch-neutral data-word
+ ULEB128 arms (R_ABS/REL/PC/TPOFF writes, x64 GOT/dynamic slots, RISC-V data
ADD/SUB/SET). Pure obj-core, no link/arch dependency.
- src/link/link_reloc_apply.c (new): link_reloc_apply() dispatcher, neutral-
then-arch. Lives in src/link (not obj-core) because resolving the per-arch
slice needs link_arch_desc_for() — same boundary reason WS-B placed
reloc_desc() in src/link. The dispatcher enumerates no kinds:
`rg "case R_(AARCH64|X64|RV)_" src/link` is empty.
R_PLT32's apply is the RISC-V AUIPC+JALR pair (x64 emits R_X64_PLT32 on read,
never canonical R_PLT32), so it lives in the rv hook beside R_RV_CALL — not
neutral, despite its neutral name. x64 owns only R_X64_PC8; the wider x64
GOT/PLT/TPOFF data slots stay neutral.
Migration guard: test/link/reloc_apply_test.c (test-link-reloc-apply, 50 frozen
pre-WS-C byte checks across aa64/x64/rv). Verified byte-identical: full
link/elf/macho/ar/asm/isa/opt/coff/smoke matrix + bootstrap-debug (stage2 ==
stage3). reloc_uleb128_unit's c=NULL path still works (neutral never touches the
compiler).
Also refresh test/smoke/rv64_tls_link.sh's reloc grep (RV_TPREL_HI20 ->
R_RISCV_TPREL_HI20): WS-E.3 switched ELF reloc records to binutils spellings but
missed this script, and R_RISCV_TPREL_HI20 doesn't contain the old substring.
Diffstat:
15 files changed, 809 insertions(+), 437 deletions(-)
diff --git a/doc/plan/RELOC.md b/doc/plan/RELOC.md
@@ -1,6 +1,6 @@
# Relocation-layer genericization (planned work)
-## Status — 2026-06-05 — WS-B (descriptor table) + WS-E.2/E.3 (residual gates) landed; only the WS-C byte-patcher partition remains
+## Status — 2026-06-05 — WS-B (descriptor table) + WS-C (byte-patcher partition) + WS-E.2/E.3 (residual gates) landed; only the optional WS-A enum collapse remains
This roadmap makes the **canonical-`RelocKind` half** of the relocation subsystem
as modular as the wire half already is. The goal is the project's standing
@@ -56,7 +56,7 @@ parallel tables the compiler cannot keep in sync:
| Attribute | Lives in | Status |
|-----------|----------|--------|
-| how to patch the bytes | `link_reloc_apply()` `src/obj/reloc_apply.c:83` (switch, ~77 arms) | **open** — WS-C |
+| how to patch the bytes | per-arch `src/arch/<arch>/reloc.c` (`*_reloc_apply_insn`) + neutral `reloc_apply_neutral()` `src/obj/reloc_apply.c`; dispatched by `link_reloc_apply()` `src/link/link_reloc_apply.c` | **landed** — WS-C |
| byte width | `RelocDesc.width` (per-arch `src/arch/<arch>/reloc.c` + neutral `src/obj/reloc.c`) | **landed** — WS-B |
| uses GOT / is TLS-GOT | `RelocDesc.flags` `RELOC_USES_GOT`/`RELOC_IS_TLS_GOT` | **landed** — WS-B |
| branch / got-load / tlvp / direct-page | `RelocDesc.flags` `RELOC_IS_BRANCH`/`USES_GOT`/`IS_TLVP`/`DIRECT_PAGE` | **landed** — WS-B |
@@ -90,13 +90,22 @@ ISA knowledge — still sit in the format-neutral `src/obj/reloc_apply.c`.
## The end state (ownership)
```
-src/obj/reloc.c / reloc_apply.c neutral core: byte encoders for the
- arch-independent data-word kinds (R_ABS*,
- R_REL*, R_PC*, R_TPOFF*, R_GOT32, R_PLT32) and
- the single public link_reloc_apply() dispatcher.
-src/arch/<arch>/reloc.c (NEW) that arch's RelocDesc rows (width + class flags)
- AND its instruction-immediate byte encoders,
- reached via LinkArchDesc.
+src/obj/reloc_apply.c neutral core: reloc_apply_neutral() — byte encoders
+ for the arch-independent data-word kinds (R_ABS*,
+ R_REL*, R_PC*, R_TPOFF*, the x64 GOT/dynamic data
+ slots, the RISC-V data ADD/SUB/SET arithmetic) + the
+ ULEB128 codec. Pure obj-core, no link/arch dep.
+src/link/link_reloc_apply.c (NEW) the single public link_reloc_apply() dispatcher:
+ neutral-then-arch. Housed in link (not obj-core)
+ because resolving the per-arch slice needs
+ link_arch_desc_for() — same boundary call as WS-B's
+ reloc_desc() dispatcher.
+src/arch/<arch>/reloc.c that arch's RelocDesc rows (width + class flags, WS-B)
+ AND its instruction-immediate byte encoders
+ (*_reloc_apply_insn, WS-C), reached via
+ LinkArchDesc.reloc_apply_insn. (R_PLT32's apply is the
+ RISC-V AUIPC+JALR pair, so it lives in the rv hook with
+ R_RV_CALL — not neutral, despite its neutral name.)
src/obj/<fmt>/reloc_<arch>.c UNCHANGED — the per-(arch,fmt) wire translators,
incl. the reloc_name spellings (already landed).
src/obj/coff/reloc.c COFF-specific kinds' RelocDesc rows (format, not arch).
@@ -226,9 +235,29 @@ deletion touches; byte-identity catches any width drift).
---
-## WS-C — Partition the byte-patcher per-arch behind the single entry (addresses **D**)
-
-**Problem.** `src/obj/reloc_apply.c` lives in the format-neutral obj layer but
+## WS-C — Partition the byte-patcher per-arch behind the single entry (addresses **D**) — *LANDED*
+
+**Status (landed).** The instruction-immediate byte encoders moved into each
+backend as `*_reloc_apply_insn` (`src/arch/{aa64,x64,riscv}/reloc.c`), reached
+through a new `LinkArchDesc.reloc_apply_insn` hook (`src/link/link_arch.h`,
+wired in each arch's `link.c`). The format-neutral data-word arms (R_ABS/REL/PC/
+TPOFF writes, x64 GOT/dynamic slots, the RISC-V data ADD/SUB/SET arithmetic, and
+the ULEB128 codec) stay in obj-core as `reloc_apply_neutral()`
+(`src/obj/reloc_apply.c`), which has no link/arch dependency. The single public
+entry `link_reloc_apply()` moved to `src/link/link_reloc_apply.c` (neutral-then-
+arch dispatch) — *not* obj-core, because resolving the per-arch slice needs
+`link_arch_desc_for()`, the same boundary reason WS-B placed `reloc_desc()` in
+`src/link`. The dispatcher enumerates no kinds (`rg "case R_(AARCH64|X64|RV)_"
+src/link` is empty). x64 owns only `R_X64_PC8`; the wider x64 GOT/PLT/TPOFF data
+slots remained neutral. `R_PLT32` is applied as the RISC-V AUIPC+JALR pair so it
+lives in the rv hook beside `R_RV_CALL` (x64 never emits canonical `R_PLT32` — it
+emits `R_X64_PLT32` via `reloc_from`). Migration guard:
+`test/link/reloc_apply_test.c` (`test-link-reloc-apply`) — frozen pre-WS-C
+patched bytes for every instruction-immediate kind across aa64/x64/rv (50
+checks). The reloc_uleb128 c=NULL path still works (neutral never touches the
+compiler). Full link/elf/macho/ar/asm/isa/opt/coff/smoke matrix + bootstrap pass.
+
+**Problem (original).** `src/obj/reloc_apply.c` lives in the format-neutral obj layer but
encodes pure ISA knowledge — AArch64 imm19/imm26/ADRP page math, RISC-V U/I/S/B/J
immediate scatter and the 0x800 HI20 bias, x64 field writes. Adding an arch edits
this shared file; the encoders belong in the backends, beside that arch's MC emitter
@@ -300,9 +329,11 @@ wherever `use_rela_iplt` is true); deeper coverage is the FreeBSD VM lane
exhaustiveness test, deleting both generic switches and the duplicating `is_*`
hooks. This is now the highest-value open item (the identity switches are already
gone). Fold WS-A's value-class collapse in here since it touches the same enum/arms.
-2. **WS-C** — encoder partition; gated behind WS-B's test, one arch at a time.
-3. **WS-E.2 / WS-E.3** — independent, low priority; land WS-E.3 (golden refresh) on
- its own so the corpus diff is reviewed in isolation.
+2. **WS-C** — **DONE.** Encoder partition behind the single entry, gated by the new
+ `test/link/reloc_apply_test.c` frozen-bytes guard + bootstrap byte-identity.
+3. **WS-E.2 / WS-E.3** — **DONE.** (WS-E.3's binutils-spelling switch also required
+ refreshing `test/smoke/rv64_tls_link.sh`'s reloc grep — `RV_TPREL_HI20` →
+ `R_RISCV_TPREL_HI20` — a stale expectation it had missed.)
**Risk controls.** Every WS is red-green: WS-B's exhaustiveness + width-migration test
is written first and fails until each arch's slice is complete. The **bootstrap** is
@@ -314,21 +345,27 @@ WS (especially WS-C, per-arch) so any regression bisects to one arch's hook.
## Done criteria
-- No file under `src/link/` enumerates `RelocKind` arms: `reloc_width`,
- `reloc_uses_got`, `reloc_is_tls_got`, and the `LinkArchDesc.is_*` hooks are
- deleted; their consumers read the per-arch `RelocDesc`. (`rg "case R_(AARCH64|X64|RV)_"
- src/link` returns nothing.)
-- Every relocation static fact has exactly one source: width + class flags in the
- per-arch `RelocDesc` slice, wire encoding + name in `src/obj/<fmt>/reloc_<arch>.c`.
-- `link_reloc_apply` remains the single public byte-patcher entry; its
- instruction-encoding arms live in `src/arch/<arch>/reloc.c`, the obj layer keeps
- only the arch-neutral data-word arms.
-- Adding a hypothetical new arch's relocation touches only that arch's
- `src/arch/<arch>/reloc.c` and its `src/obj/<fmt>/reloc_<arch>.c` — verified by the
- `test/obj/reloc_desc` exhaustiveness test failing until the new rows exist, and by
- no generic file needing edits.
-- (Optional/low-pri) the `tpoff64_reloc` field is retired by the `R_TPOFF64`
- collapse; the `object_file.c` `reloc_name` gate is removed and the objdump golden
- corpus refreshed; `link_elf_irelative_type` names no literal format.
-- `make bootstrap` (debug + release) reaches the byte-identical fixed point; the full
- link/elf/macho/coff/isa/asm/smoke matrix passes.
+All met by WS-B + WS-C below except the optional WS-A enum collapse (still deferred).
+
+- ✓ No file under `src/link/` enumerates `RelocKind` arms: `reloc_width`,
+ `reloc_uses_got`, `reloc_is_tls_got`, the `LinkArchDesc.is_*` hooks, **and the
+ byte-patcher's instruction arms** are gone; consumers read the per-arch
+ `RelocDesc` / call the per-arch `reloc_apply_insn`. (`rg "case R_(AARCH64|X64|RV)_"
+ src/link` returns nothing — the WS-C dispatcher is case-free.)
+- ✓ Every relocation static fact has exactly one source: width + class flags in the
+ per-arch `RelocDesc` slice, wire encoding + name in `src/obj/<fmt>/reloc_<arch>.c`,
+ **and the instruction byte encoder in that arch's `reloc.c` `*_reloc_apply_insn`**.
+- ✓ `link_reloc_apply` remains the single public byte-patcher entry (now in
+ `src/link/link_reloc_apply.c`); its instruction-encoding arms live in
+ `src/arch/<arch>/reloc.c`, the obj layer keeps only the arch-neutral data-word arms
+ (`reloc_apply_neutral`).
+- ✓ Adding a hypothetical new arch's relocation touches only that arch's
+ `src/arch/<arch>/reloc.c` (one `RelocDesc` row + one `reloc_apply_insn` arm) and its
+ `src/obj/<fmt>/reloc_<arch>.c` — guarded by `test/link/reloc_desc_test.c` (rows) and
+ `test/link/reloc_apply_test.c` (bytes); no generic file needs edits.
+- (Optional/low-pri, **still open** — WS-A) the `tpoff64_reloc` field is retired by the
+ `R_TPOFF64` collapse. (The `object_file.c` `reloc_name` gate removal + objdump golden
+ refresh and `link_elf_irelative_type` already landed under WS-E.)
+- ✓ `make bootstrap-debug` reaches the byte-identical fixed point; the full
+ link/elf/macho/coff/isa/asm/opt/smoke matrix passes. (Release bootstrap carries a
+ PRE-EXISTING `.Lkit_jt.0` break unrelated to this work — gate on `bootstrap-debug`.)
diff --git a/mk/test.mk b/mk/test.mk
@@ -95,6 +95,7 @@ TEST_TARGETS = \
test-link \
test-link-reloc-uleb128 \
test-link-reloc-desc \
+ test-link-reloc-apply \
test-macho \
test-native-direct-target \
test-opt \
@@ -161,6 +162,7 @@ DEFAULT_TEST_TARGETS = \
test-asm-symmetry \
test-link-reloc-uleb128 \
test-link-reloc-desc \
+ test-link-reloc-apply \
test-dbg \
test-disasm-complete \
test-macho \
@@ -383,6 +385,15 @@ RELOC_DESC_TEST_BIN = build/test/reloc_desc_test
test-link-reloc-desc: $(RELOC_DESC_TEST_BIN)
$(RELOC_DESC_TEST_BIN)
+# Relocation byte-encoder migration guard (doc/plan/RELOC.md, WS-C): pins
+# link_reloc_apply to the frozen pre-refactor patched bytes for every
+# instruction-immediate kind, so partitioning the encoders per-arch stays
+# byte-identical. Internal-surface, so links the raw lib objects.
+RELOC_APPLY_TEST_BIN = build/test/reloc_apply_test
+
+test-link-reloc-apply: $(RELOC_APPLY_TEST_BIN)
+ $(RELOC_APPLY_TEST_BIN)
+
# test-emu-unit: white-box unit tests for the emulator's INTERNAL units (rv64
# decoder, EmuAddrSpace, Linux syscall handler) that have no public API. Reaches
# internal symbols -> links $(LIB_OBJS) (mirrors test-interp), not the archive.
diff --git a/mk/test_unit.mk b/mk/test_unit.mk
@@ -49,7 +49,8 @@ x64_inline_test_SRC := test/arch/x64_inline_test.c
UNIT_TESTS_INTERNAL := \
dwarf_test debug_roundtrip_unit debug_cfi_unit \
aa64_isa_test rv64_decode_test rv32_decode_test aa64_sweep_gen \
- reloc_uleb128_unit reloc_desc_test emu_rv64_unit_test interp_smoke_test \
+ reloc_uleb128_unit reloc_desc_test reloc_apply_test emu_rv64_unit_test \
+ interp_smoke_test \
rv64_interp_smoke_test abi_classify_test ir_recorder_test \
native_direct_target_test x64_dbg_test cg_ir_lower_test tiny_inline_test
dwarf_test_SRC := test/dwarf/dwarf_test.c
@@ -61,6 +62,7 @@ rv32_decode_test_SRC := test/arch/rv32_decode_test.c
aa64_sweep_gen_SRC := test/arch/aa64_sweep_gen.c
reloc_uleb128_unit_SRC := test/link/reloc_uleb128_unit.c
reloc_desc_test_SRC := test/link/reloc_desc_test.c
+reloc_apply_test_SRC := test/link/reloc_apply_test.c
emu_rv64_unit_test_SRC := test/emu/rv64_vm_unit_test.c
interp_smoke_test_SRC := test/interp/interp_smoke_test.c
rv64_interp_smoke_test_SRC := test/emu/rv64_interp_smoke_test.c
diff --git a/src/arch/aa64/link.c b/src/arch/aa64/link.c
@@ -172,9 +172,11 @@ void aa64_emit_macho_stub(u8* out, u64 stub_vaddr, u64 got_slot_vaddr) {
wr_u32_le(out + 8, aa64_br(AA64_PLT_SCRATCH_X16));
}
-/* Width + classification rows for AArch64's relocation kinds; defined in
- * src/arch/aa64/reloc.c and consulted through the .reloc_desc hook. */
+/* Width + classification rows + instruction-immediate byte encoders for
+ * AArch64's relocation kinds; defined in src/arch/aa64/reloc.c and consulted
+ * through the .reloc_desc / .reloc_apply_insn hooks. */
const RelocDesc* aa64_reloc_desc(RelocKind);
+int aa64_reloc_apply_insn(Compiler*, RelocKind, u8*, u64, i64, u64);
/* AArch64 __chkstk for PE/COFF: probes `x15 * 16` bytes of stack one page at a
* time, then returns. Mirrors the LLVM compiler-rt implementation (chkstk.S in
@@ -201,6 +203,7 @@ const LinkArchDesc link_arch_aa64 = {
.emit_iplt_stub = aa64_emit_iplt_stub,
.reloc_desc = aa64_reloc_desc,
+ .reloc_apply_insn = aa64_reloc_apply_insn,
/* AAPCS64 variant I: GOT TLS-IE slots hold (X - tls_vaddr) + TCB. */
.tpoff64_reloc = R_AARCH64_TPOFF64,
diff --git a/src/arch/aa64/reloc.c b/src/arch/aa64/reloc.c
@@ -13,6 +13,8 @@
#include "obj/reloc.h"
+#include "core/bytes.h"
+
static const RelocDescRow aa64_rows[] = {
{R_AARCH64_ABS16, {2, 0}},
{R_AARCH64_PREL16, {2, 0}},
@@ -48,3 +50,175 @@ const RelocDesc* aa64_reloc_desc(RelocKind k) {
return reloc_desc_row_find(aa64_rows,
(u32)(sizeof aa64_rows / sizeof aa64_rows[0]), k);
}
+
+/* AArch64 instruction-immediate byte encoders (WS-C). Moved verbatim from the
+ * format-neutral byte-patcher; reached via LinkArchDesc.reloc_apply_insn for
+ * the instruction-embedded kinds. Encoding references: ARM ARMv8-A "ELF for
+ * the ARM 64-bit Architecture (AArch64)" §5.7. Returns 1 if it owns `k`. */
+int aa64_reloc_apply_insn(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A,
+ u64 P) {
+ switch (k) {
+ case R_AARCH64_CONDBR19:
+ case R_AARCH64_LD_PREL_LO19: {
+ /* B.cond / CB(N)Z / LDR (literal) — imm19 in 4-byte units,
+ * signed, at bits [23:5]. Range: ±1MiB. */
+ i64 disp = (i64)S + A - (i64)P;
+ u32 instr;
+ u32 imm19;
+ if (disp & 3)
+ compiler_panic(c, SRCLOC_NONE,
+ "link: imm19 reloc misaligned displacement");
+ if (disp < -(i64)(1 << 20) || disp >= (i64)(1 << 20))
+ compiler_panic(c, SRCLOC_NONE,
+ "link: imm19 reloc out of range (need ±1MiB)");
+ imm19 = (u32)((disp >> 2) & 0x7ffffu);
+ instr = rd_u32_le(P_bytes);
+ instr = (instr & ~(0x7ffffu << 5)) | (imm19 << 5);
+ wr_u32_le(P_bytes, instr);
+ return 1;
+ }
+ case R_AARCH64_TSTBR14: {
+ /* TBZ/TBNZ — imm14 in 4-byte units, signed, at bits [18:5].
+ * Range: ±32KiB. */
+ i64 disp = (i64)S + A - (i64)P;
+ u32 instr;
+ u32 imm14;
+ if (disp & 3)
+ compiler_panic(c, SRCLOC_NONE, "link: TSTBR14 misaligned displacement");
+ if (disp < -(i64)(1 << 15) || disp >= (i64)(1 << 15))
+ compiler_panic(c, SRCLOC_NONE,
+ "link: TSTBR14 out of range (need ±32KiB)");
+ imm14 = (u32)((disp >> 2) & 0x3fffu);
+ instr = rd_u32_le(P_bytes);
+ instr = (instr & ~(0x3fffu << 5)) | (imm14 << 5);
+ wr_u32_le(P_bytes, instr);
+ return 1;
+ }
+ case R_AARCH64_ADR_PREL_LO21: {
+ /* ADR — byte-granularity imm21, encoded as immlo[30:29] +
+ * immhi[23:5]. No 12-bit shift (unlike ADRP). Range: ±1MiB. */
+ i64 disp = (i64)S + A - (i64)P;
+ u32 instr;
+ u32 immlo, immhi;
+ if (disp < -(i64)(1 << 20) || disp >= (i64)(1 << 20))
+ compiler_panic(c, SRCLOC_NONE,
+ "link: ADR_PREL_LO21 out of range (need ±1MiB)");
+ immlo = (u32)(disp & 0x3u);
+ immhi = (u32)((disp >> 2) & 0x7ffffu);
+ instr = rd_u32_le(P_bytes);
+ instr = (instr & 0x9f00001fu) | (immlo << 29) | (immhi << 5);
+ wr_u32_le(P_bytes, instr);
+ return 1;
+ }
+ case R_AARCH64_JUMP26:
+ case R_AARCH64_CALL26: {
+ /* B/BL imm26 — branch displacement in 4-byte units, signed.
+ * Clear bits [25:0] of the existing instruction and OR in the
+ * new imm26. Range check: ±128MiB. */
+ i64 disp = (i64)S + A - (i64)P;
+ u32 instr;
+ u32 imm26;
+ if (disp & 3)
+ compiler_panic(c, SRCLOC_NONE, "link: CALL26 misaligned displacement");
+ if (disp < -(i64)(1 << 27) || disp >= (i64)(1 << 27))
+ compiler_panic(c, SRCLOC_NONE,
+ "link: CALL26 out of range (need ±128MiB)");
+ imm26 = (u32)((disp >> 2) & 0x3ffffffu);
+ instr = rd_u32_le(P_bytes);
+ instr = (instr & 0xfc000000u) | imm26;
+ wr_u32_le(P_bytes, instr);
+ return 1;
+ }
+ case R_AARCH64_TLVP_LOAD_PAGE21:
+ case R_AARCH64_ADR_GOT_PAGE:
+ case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
+ case R_AARCH64_ADR_PREL_PG_HI21:
+ case R_AARCH64_ADR_PREL_PG_HI21_NC: {
+ /* ADRP — page-relative imm21, encoded as immlo[30:29] +
+ * immhi[23:5]. Effective immediate is (S+A) page minus P page,
+ * shifted right by 12, sign-extended to 33 bits. _NC variant
+ * skips the range check (compiler asserts it can't overflow,
+ * e.g. when paired with explicit page bracketing). */
+ i64 page_s = ((i64)S + A) & ~(i64)0xfff;
+ i64 page_p = (i64)P & ~(i64)0xfff;
+ i64 disp = page_s - page_p;
+ i64 imm21 = disp >> 12;
+ u32 instr;
+ u32 immlo, immhi;
+ if (k != R_AARCH64_ADR_PREL_PG_HI21_NC &&
+ (imm21 < -(i64)(1 << 20) || imm21 >= (i64)(1 << 20)))
+ compiler_panic(c, SRCLOC_NONE,
+ "link: ADR_PREL_PG_HI21 out of range (need ±4GiB)");
+ immlo = (u32)(imm21 & 0x3u);
+ immhi = (u32)((imm21 >> 2) & 0x7ffffu);
+ instr = rd_u32_le(P_bytes);
+ instr = (instr & 0x9f00001fu) | (immlo << 29) | (immhi << 5);
+ wr_u32_le(P_bytes, instr);
+ return 1;
+ }
+ case R_AARCH64_ADD_ABS_LO12_NC: {
+ /* ADD (immediate) imm12 at bits [21:10]. NC = no overflow check. */
+ u64 v = ((u64)S + (u64)A) & 0xfffu;
+ u32 instr = rd_u32_le(P_bytes);
+ instr = (instr & ~(0xfffu << 10)) | ((u32)v << 10);
+ wr_u32_le(P_bytes, instr);
+ return 1;
+ }
+ case R_AARCH64_TLSLE_ADD_TPREL_HI12:
+ case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: {
+ /* AArch64 TLS local-exec. Caller passes S already as the
+ * TP-relative offset (target's image offset minus the TLS
+ * image base, plus the 16-byte AArch64 TCB). HI12 takes
+ * bits 23:12, LO12_NC takes bits 11:0; both encoded as
+ * imm12 at instruction bits [21:10] of an ADD (immediate).
+ * The HI12 form's instruction carries LSL #12 in its opcode,
+ * so bits 11:0 of the operand naturally land at scale 4096. */
+ u64 v = (u64)((i64)S + A);
+ u32 imm12 = (k == R_AARCH64_TLSLE_ADD_TPREL_HI12)
+ ? (u32)((v >> 12) & 0xfffu)
+ : (u32)(v & 0xfffu);
+ u32 instr = rd_u32_le(P_bytes);
+ instr = (instr & ~(0xfffu << 10)) | (imm12 << 10);
+ wr_u32_le(P_bytes, instr);
+ return 1;
+ }
+ case R_AARCH64_LDST8_ABS_LO12_NC:
+ case R_AARCH64_LDST16_ABS_LO12_NC:
+ case R_AARCH64_LDST32_ABS_LO12_NC:
+ case R_AARCH64_LDST64_ABS_LO12_NC:
+ case R_AARCH64_LDST128_ABS_LO12_NC:
+ case R_AARCH64_LD64_GOT_LO12_NC:
+ case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
+ case R_AARCH64_TLVP_LOAD_PAGEOFF12: {
+ /* LDR/STR with imm12 at bits [21:10]; the imm is scaled by the
+ * access size, so we right-shift the low 12 bits of (S+A) by
+ * the size scale before encoding. NC = no overflow check.
+ *
+ * LD64_GOT_LO12_NC has the same encoding as LDST64_ABS_LO12_NC;
+ * the linker has already redirected `S` to the GOT slot. */
+ u32 shift = (k == R_AARCH64_LDST8_ABS_LO12_NC) ? 0u
+ : (k == R_AARCH64_LDST16_ABS_LO12_NC) ? 1u
+ : (k == R_AARCH64_LDST32_ABS_LO12_NC) ? 2u
+ : (k == R_AARCH64_LDST64_ABS_LO12_NC ||
+ k == R_AARCH64_LD64_GOT_LO12_NC ||
+ k == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC ||
+ k == R_AARCH64_TLVP_LOAD_PAGEOFF12)
+ ? 3u
+ : 4u;
+ u64 lo12 = ((u64)S + (u64)A) & 0xfffu;
+ u64 imm12 = lo12 >> shift;
+ u32 instr = rd_u32_le(P_bytes);
+ if (lo12 & ((1u << shift) - 1u))
+ compiler_panic(c, SRCLOC_NONE,
+ "link: LDST%u_ABS_LO12_NC misaligned address "
+ "(kind=%u S=0x%llx A=%lld P=0x%llx)",
+ 1u << (3 + shift), (unsigned)k, (unsigned long long)S,
+ (long long)A, (unsigned long long)P);
+ instr = (instr & ~(0xfffu << 10)) | ((u32)(imm12 & 0xfffu) << 10);
+ wr_u32_le(P_bytes, instr);
+ return 1;
+ }
+ default:
+ return 0;
+ }
+}
diff --git a/src/arch/riscv/link.c b/src/arch/riscv/link.c
@@ -110,8 +110,10 @@ static u32 rv32_emit_iplt_stub(u8* dst, u64 stub_vaddr, u64 slot_vaddr,
* .reloc_desc hook. R_RV_CALL / R_PLT32 carry RELOC_IS_BRANCH: a direct
* AUIPC+JALR reaches only ±2GiB, so a too-far target (e.g. a JIT-resolved
* host libc symbol) routes through the call-stub pass, the same safety net
- * aa64 and x64 wire. */
+ * aa64 and x64 wire. rv_reloc_apply_insn (same file) holds the matching
+ * U/I/S/B/J + RVC instruction-immediate byte encoders. */
const RelocDesc* rv_reloc_desc(RelocKind);
+int rv_reloc_apply_insn(Compiler*, RelocKind, u8*, u64, i64, u64);
const LinkArchDesc link_arch_rv64 = {
.plt0_size = RV64_PLT0_SIZE,
@@ -123,6 +125,7 @@ const LinkArchDesc link_arch_rv64 = {
.emit_plt_entry = rv64_emit_plt_entry,
.emit_iplt_stub = rv64_emit_iplt_stub,
.reloc_desc = rv_reloc_desc,
+ .reloc_apply_insn = rv_reloc_apply_insn,
/* RISC-V variant I shares the internal raw-64-bit variant-I tpoff with
* AArch64 ((X - tls_vaddr) + TCB); there is no R_RV_TPOFF64. */
.tpoff64_reloc = R_AARCH64_TPOFF64,
@@ -142,6 +145,7 @@ const LinkArchDesc link_arch_rv32 = {
.emit_plt_entry = rv32_emit_plt_entry,
.emit_iplt_stub = rv32_emit_iplt_stub,
.reloc_desc = rv_reloc_desc,
+ .reloc_apply_insn = rv_reloc_apply_insn,
/* See rv64: shares R_AARCH64_TPOFF64 (variant-I internal tpoff). */
.tpoff64_reloc = R_AARCH64_TPOFF64,
};
diff --git a/src/arch/riscv/reloc.c b/src/arch/riscv/reloc.c
@@ -19,6 +19,8 @@
#include "obj/reloc.h"
+#include "core/bytes.h"
+
static const RelocDescRow rv_rows[] = {
{R_RV_HI20, {4, 0}},
{R_RV_LO12_I, {4, 0}},
@@ -60,3 +62,206 @@ const RelocDesc* rv_reloc_desc(RelocKind k) {
return reloc_desc_row_find(rv_rows, (u32)(sizeof rv_rows / sizeof rv_rows[0]),
k);
}
+
+/* RISC-V instruction-immediate byte encoders (WS-C), shared by rv64 and rv32.
+ * Moved verbatim from the format-neutral byte-patcher; reached via
+ * LinkArchDesc.reloc_apply_insn. Encoding references: "RISC-V ELF psABI" §3
+ * and "The RISC-V Instruction Set Manual, Volume I" Ch.19. The +0x800 bias on
+ * HI20 / CALL compensates the sign-extension of the paired 12-bit immediate.
+ * The data-word ADD/SUB/SET arms and the ULEB128 codec are arch-neutral byte
+ * writes and stay in the obj-core neutral path. Returns 1 if it owns `k`. */
+int rv_reloc_apply_insn(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A,
+ u64 P) {
+ switch (k) {
+ case R_RV_HI20:
+ case R_RV_TPREL_HI20: {
+ /* U-type (LUI/AUIPC) imm[31:12] = high 20 bits of (S + A + 0x800).
+ * The 0x800 bias compensates the sign-extension of the paired
+ * 12-bit ADDI/load/store immediate, so HI20 + signext12(LO12)
+ * reconstructs the full value. */
+ i64 v = (i64)S + A;
+ u32 hi20 = (u32)(((u64)(v + 0x800)) >> 12) & 0xfffffu;
+ u32 instr = rd_u32_le(P_bytes);
+ instr = (instr & 0x00000fffu) | (hi20 << 12);
+ wr_u32_le(P_bytes, instr);
+ return 1;
+ }
+ case R_RV_PCREL_HI20:
+ case R_RV_GOT_HI20:
+ case R_RV_TLS_GOT_HI20: {
+ /* AUIPC pc-relative HI20: same encoding as HI20 but the
+ * displacement is (S + A) - P. The paired PCREL_LO12 reloc at
+ * the ADDI/load below recovers the low 12 bits of the same
+ * displacement via a lookup keyed on this AUIPC's site vaddr.
+ * GOT_HI20 collapses to PCREL_HI20 in static-link with no
+ * indirection: the symbol resolves to its own address. */
+ i64 disp = (i64)S + A - (i64)P;
+ u32 hi20 = (u32)(((u64)(disp + 0x800)) >> 12) & 0xfffffu;
+ u32 instr = rd_u32_le(P_bytes);
+ instr = (instr & 0x00000fffu) | (hi20 << 12);
+ wr_u32_le(P_bytes, instr);
+ return 1;
+ }
+ case R_RV_LO12_I:
+ case R_RV_TPREL_LO12_I: {
+ /* I-type imm[11:0] in instruction bits [31:20]. Low 12 bits of
+ * (S + A); the sign-extension at execute time pairs with HI20's
+ * 0x800 bias to reconstruct the full address. */
+ u64 v = (u64)((i64)S + A);
+ u32 lo12 = (u32)(v & 0xfffu);
+ u32 instr = rd_u32_le(P_bytes);
+ instr = (instr & 0x000fffffu) | (lo12 << 20);
+ wr_u32_le(P_bytes, instr);
+ return 1;
+ }
+ case R_RV_LO12_S:
+ case R_RV_TPREL_LO12_S: {
+ /* S-type imm[11:5] in bits [31:25], imm[4:0] in bits [11:7]. */
+ u64 v = (u64)((i64)S + A);
+ u32 lo12 = (u32)(v & 0xfffu);
+ u32 instr = rd_u32_le(P_bytes);
+ instr = (instr & 0x01fff07fu) | ((lo12 & 0xfe0u) << 20) |
+ ((lo12 & 0x1fu) << 7);
+ wr_u32_le(P_bytes, instr);
+ return 1;
+ }
+ case R_RV_BRANCH: {
+ /* B-type 12-bit signed displacement in 2-byte units (13-bit
+ * range). imm[12] in bit 31, imm[10:5] in 30:25, imm[4:1] in
+ * 11:8, imm[11] in bit 7. */
+ i64 disp = (i64)S + A - (i64)P;
+ u32 instr;
+ u32 b;
+ if (disp & 1)
+ compiler_panic(c, SRCLOC_NONE,
+ "link: RV BRANCH misaligned displacement");
+ if (disp < -(i64)(1 << 12) || disp >= (i64)(1 << 12))
+ compiler_panic(c, SRCLOC_NONE,
+ "link: RV BRANCH out of range (need ±4KiB)");
+ b = (u32)((u64)disp & 0x1ffeu) | ((u32)(((u64)disp >> 11) & 1u) << 11) |
+ ((u32)(((u64)disp >> 12) & 1u) << 12);
+ instr = rd_u32_le(P_bytes);
+ instr &= 0x01fff07fu;
+ instr |= ((b >> 12) & 1u) << 31;
+ instr |= ((b >> 5) & 0x3fu) << 25;
+ instr |= ((b >> 1) & 0xfu) << 8;
+ instr |= ((b >> 11) & 1u) << 7;
+ wr_u32_le(P_bytes, instr);
+ return 1;
+ }
+ case R_RV_JAL: {
+ /* J-type 20-bit signed displacement in 2-byte units (21-bit
+ * range). imm[20] in bit 31, imm[10:1] in 30:21, imm[11] in bit
+ * 20, imm[19:12] in bits 19:12. */
+ i64 disp = (i64)S + A - (i64)P;
+ u32 instr;
+ u32 b;
+ if (disp & 1)
+ compiler_panic(c, SRCLOC_NONE, "link: RV JAL misaligned displacement");
+ if (disp < -(i64)(1 << 20) || disp >= (i64)(1 << 20))
+ compiler_panic(c, SRCLOC_NONE,
+ "link: RV JAL out of range (need ±1MiB)");
+ b = (u32)((u64)disp & 0x1ffffeu) | ((u32)(((u64)disp >> 11) & 1u) << 11) |
+ ((u32)(((u64)disp >> 20) & 1u) << 20);
+ instr = rd_u32_le(P_bytes);
+ instr &= 0x00000fffu;
+ instr |= ((b >> 20) & 1u) << 31;
+ instr |= ((b >> 1) & 0x3ffu) << 21;
+ instr |= ((b >> 11) & 1u) << 20;
+ instr |= ((b >> 12) & 0xffu) << 12;
+ wr_u32_le(P_bytes, instr);
+ return 1;
+ }
+ case R_RV_CALL:
+ case R_PLT32: {
+ /* AUIPC + JALR pair encoding the same 32-bit signed PC-relative
+ * displacement. AUIPC at P, JALR at P+4. The 0x800 bias on the
+ * AUIPC immediate compensates JALR's signed 12-bit imm so that
+ * (auipc_imm << 12) + signext12(jalr_imm) == disp.
+ *
+ * R_PLT32 is the kit-canonical RelocKind that
+ * elf_riscv64_reloc_from(R_RISCV_CALL_PLT) maps to; static-link
+ * with no PLT collapses CALL_PLT to a direct CALL (no
+ * indirection). */
+ i64 disp = (i64)S + A - (i64)P;
+ u32 hi20 = (u32)(((u64)(disp + 0x800)) >> 12) & 0xfffffu;
+ u32 lo12 = (u32)((u64)disp & 0xfffu);
+ u32 auipc = rd_u32_le(P_bytes);
+ u32 jalr = rd_u32_le(P_bytes + 4);
+ if (disp < -(i64)(1ll << 31) || disp >= (i64)(1ll << 31))
+ compiler_panic(c, SRCLOC_NONE,
+ "link: RV CALL out of range (need ±2GiB)");
+ auipc = (auipc & 0x00000fffu) | (hi20 << 12);
+ jalr = (jalr & 0x000fffffu) | (lo12 << 20);
+ wr_u32_le(P_bytes, auipc);
+ wr_u32_le(P_bytes + 4, jalr);
+ return 1;
+ }
+ case R_RV_RVC_BRANCH: {
+ /* CB-type 8-bit signed displacement in 2-byte units (9-bit
+ * range). c.beqz / c.bnez. Encoding (16-bit instruction):
+ * bit 12 = imm[8]
+ * bits 11:10 = imm[4:3]
+ * bits 9:7 = rs1' (untouched)
+ * bits 6:5 = imm[7:6]
+ * bits 4:3 = imm[2:1]
+ * bit 2 = imm[5] */
+ i64 disp = (i64)S + A - (i64)P;
+ u16 instr = (u16)(P_bytes[0] | ((u16)P_bytes[1] << 8));
+ u32 b;
+ if (disp & 1)
+ compiler_panic(c, SRCLOC_NONE,
+ "link: RV RVC_BRANCH misaligned displacement");
+ if (disp < -(i64)(1 << 8) || disp >= (i64)(1 << 8))
+ compiler_panic(c, SRCLOC_NONE,
+ "link: RV RVC_BRANCH out of range (need ±256B)");
+ b = (u32)((u64)disp & 0x1feu);
+ instr = (u16)(instr & 0xe383u);
+ instr = (u16)(instr | (((b >> 8) & 1u) << 12));
+ instr = (u16)(instr | (((b >> 3) & 3u) << 10));
+ instr = (u16)(instr | (((b >> 6) & 3u) << 5));
+ instr = (u16)(instr | (((b >> 1) & 3u) << 3));
+ instr = (u16)(instr | (((b >> 5) & 1u) << 2));
+ P_bytes[0] = (u8)(instr & 0xffu);
+ P_bytes[1] = (u8)((instr >> 8) & 0xffu);
+ return 1;
+ }
+ case R_RV_RVC_JUMP: {
+ /* CJ-type 11-bit signed displacement in 2-byte units (12-bit
+ * range). c.j / c.jal. Encoding bits in the 16-bit instruction:
+ * 12=imm[11], 11=imm[4], 10:9=imm[9:8], 8=imm[10],
+ * 7=imm[6], 6=imm[7], 5:3=imm[3:1], 2=imm[5]. */
+ i64 disp = (i64)S + A - (i64)P;
+ u16 instr = (u16)(P_bytes[0] | ((u16)P_bytes[1] << 8));
+ u32 b;
+ if (disp & 1)
+ compiler_panic(c, SRCLOC_NONE,
+ "link: RV RVC_JUMP misaligned displacement");
+ if (disp < -(i64)(1 << 11) || disp >= (i64)(1 << 11))
+ compiler_panic(c, SRCLOC_NONE,
+ "link: RV RVC_JUMP out of range (need ±2KiB)");
+ b = (u32)((u64)disp & 0xffeu);
+ instr = (u16)(instr & 0xe003u);
+ instr = (u16)(instr | (((b >> 11) & 1u) << 12));
+ instr = (u16)(instr | (((b >> 4) & 1u) << 11));
+ instr = (u16)(instr | (((b >> 8) & 3u) << 9));
+ instr = (u16)(instr | (((b >> 10) & 1u) << 8));
+ instr = (u16)(instr | (((b >> 6) & 1u) << 7));
+ instr = (u16)(instr | (((b >> 7) & 1u) << 6));
+ instr = (u16)(instr | (((b >> 1) & 7u) << 3));
+ instr = (u16)(instr | (((b >> 5) & 1u) << 2));
+ P_bytes[0] = (u8)(instr & 0xffu);
+ P_bytes[1] = (u8)((instr >> 8) & 0xffu);
+ return 1;
+ }
+ case R_RV_RELAX:
+ case R_RV_TPREL_ADD:
+ /* Marker relocs only — RELAX permits the prior reloc to be
+ * compressed, TPREL_ADD annotates a TLS thread-pointer ADD that
+ * the linker may fold during relaxation. We don't relax, so
+ * both are no-ops. */
+ return 1;
+ default:
+ return 0;
+ }
+}
diff --git a/src/arch/x64/link.c b/src/arch/x64/link.c
@@ -56,9 +56,11 @@ static u32 x64_emit_iplt_stub(u8* dst, u64 stub_vaddr, u64 slot_vaddr,
return 0;
}
-/* Width + classification rows for x86-64's relocation kinds; defined in
- * src/arch/x64/reloc.c and consulted through the .reloc_desc hook. */
+/* Width + classification rows + instruction-immediate byte encoders for
+ * x86-64's relocation kinds; defined in src/arch/x64/reloc.c and consulted
+ * through the .reloc_desc / .reloc_apply_insn hooks. */
const RelocDesc* x64_reloc_desc(RelocKind);
+int x64_reloc_apply_insn(Compiler*, RelocKind, u8*, u64, i64, u64);
/* PE/COFF IAT stub for x86_64 (6 B):
*
@@ -85,6 +87,7 @@ const LinkArchDesc link_arch_x64 = {
.emit_iplt_stub = x64_emit_iplt_stub,
.reloc_desc = x64_reloc_desc,
+ .reloc_apply_insn = x64_reloc_apply_insn,
/* x86_64 variant II: GOT TLS-IE slots hold (X - tls_memsz). */
.tpoff64_reloc = R_X64_TPOFF64,
diff --git a/src/arch/x64/reloc.c b/src/arch/x64/reloc.c
@@ -15,6 +15,8 @@
#include "obj/reloc.h"
+#include "core/bytes.h"
+
static const RelocDescRow x64_rows[] = {
{R_X64_PC8, {1, 0}},
{R_X64_32S, {4, 0}},
@@ -36,3 +38,23 @@ const RelocDesc* x64_reloc_desc(RelocKind k) {
return reloc_desc_row_find(x64_rows,
(u32)(sizeof x64_rows / sizeof x64_rows[0]), k);
}
+
+/* x86-64 instruction-immediate byte encoders (WS-C). Moved verbatim from the
+ * format-neutral byte-patcher; reached via LinkArchDesc.reloc_apply_insn. The
+ * only instruction-embedded x86-64 kind is the pc-relative rel8 displacement
+ * (the wider GOT/PLT/TPOFF kinds are plain little-endian data words and stay
+ * in the obj-core neutral path). Returns 1 if it owns `k`. */
+int x64_reloc_apply_insn(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A,
+ u64 P) {
+ switch (k) {
+ case R_X64_PC8: {
+ i64 v = (i64)S + A - (i64)P;
+ if (v < -128 || v > 127)
+ compiler_panic(c, SRCLOC_NONE, "link: X64_PC8 out of range");
+ P_bytes[0] = (u8)((u64)v & 0xffu);
+ return 1;
+ }
+ default:
+ return 0;
+ }
+}
diff --git a/src/link/link_arch.h b/src/link/link_arch.h
@@ -85,6 +85,19 @@ typedef struct LinkArchDesc {
* reloc_width / reloc_uses_got / reloc_is_tls_got switches. */
const RelocDesc* (*reloc_desc)(RelocKind);
+ /* This arch's instruction-immediate relocation byte encoders: patch the
+ * `width` bytes at P_bytes for an instruction-embedded kind (imm19/imm26/
+ * ADRP-page on AArch64; U/I/S/B/J + RVC immediate scatter on RISC-V; the
+ * pc-relative rel8 on x86-64). Returns 1 if it owns and applied `k`, 0 if
+ * `k` is not one of this arch's instruction kinds (the dispatcher then
+ * panics "unsupported reloc kind"). The arch-neutral data-word kinds
+ * (the R_ABS / R_REL / R_PC / R_TPOFF writes, the RISC-V data ADD/SUB/SET
+ * arithmetic, and the ULEB128 codec) are handled by reloc_apply_neutral() in
+ * the obj core before this hook is consulted — see link_reloc_apply
+ * (src/link/link_reloc_apply.c). */
+ int (*reloc_apply_insn)(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A,
+ u64 P);
+
/* ---- TLS Initial-Exec GOT slot fill ----
* The internal raw-64-bit local-exec tpoff written into a TLS GOT slot
* (link_emit_internal_tpoff64): x86_64 uses R_X64_TPOFF64 (variant II,
diff --git a/src/link/link_reloc_apply.c b/src/link/link_reloc_apply.c
@@ -0,0 +1,29 @@
+/* The single public relocation byte-patcher entry.
+ *
+ * link_reloc_apply() is the "one encoder, three loaders" boundary (doc/OBJ.md):
+ * the static linker, JIT linker, assembler, and emulator guest loader all call
+ * it unchanged. It first tries the arch-neutral data-word encoders in obj-core
+ * (reloc_apply_neutral), then routes instruction-immediate kinds to the owning
+ * backend via LinkArchDesc.reloc_apply_insn, panicking only if neither claims
+ * the kind.
+ *
+ * It lives in src/link (not obj-core) because resolving the per-arch slice
+ * needs link_arch_desc_for() — housing the dispatcher in obj-core would invert
+ * the obj->link boundary, exactly as for WS-B's reloc_desc(). The neutral
+ * encoders it delegates to stay pure obj-core. See doc/plan/RELOC.md (WS-C).
+ */
+
+#include "core/core.h"
+#include "link/link_arch.h"
+#include "obj/reloc_apply.h"
+
+void link_reloc_apply(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A,
+ u64 P) {
+ const LinkArchDesc* d;
+ if (reloc_apply_neutral(c, k, P_bytes, S, A, P)) return;
+ d = link_arch_desc_for(c);
+ if (d && d->reloc_apply_insn && d->reloc_apply_insn(c, k, P_bytes, S, A, P))
+ return;
+ compiler_panic(c, SRCLOC_NONE, "link: unsupported reloc kind %u",
+ (unsigned)k);
+}
diff --git a/src/obj/reloc_apply.c b/src/obj/reloc_apply.c
@@ -1,18 +1,21 @@
-/* Per-arch relocation byte application.
+/* Arch-neutral relocation byte application (the obj-core half).
*
- * Pure function: takes the resolved final addresses (S, P) and the
- * addend (A), and patches `width` bytes at the relocation site.
- * Callers (static linker, JIT linker, emulator dynamic loader) compute
- * loader/linker policy first; this routine sees only final values.
+ * reloc_apply_neutral() patches the kinds whose byte encoding is a plain
+ * little-endian data word — absolute / pc-relative writes, the x86-64 GOT/PLT/
+ * dynamic data slots, the RISC-V data ADD/SUB/SET arithmetic, and the ULEB128
+ * codec — i.e. everything that carries NO instruction-field knowledge. It is
+ * pure obj-core: no link or arch dependency, so it stays usable by every
+ * loader (static linker, JIT linker, assembler, emulator) without pulling in
+ * the link layer.
*
- * Encoding references:
- * AArch64: ARM ARMv8-A "ELF for the ARM 64-bit Architecture (AArch64)"
- * §5.7 (relocation types).
- * RISC-V: "RISC-V ELF psABI specification" §3 (relocation types) and
- * "The RISC-V Instruction Set Manual, Volume I" Chapter 19
- * (instruction encodings). Reloc semantics live behind the
- * R_RV_* RelocKind values; LO12_S sits at the S-type imm
- * slots, LO12_I at I-type, and BRANCH/JAL at B/J-type. */
+ * The instruction-immediate encoders (AArch64 imm19/imm26/ADRP page math;
+ * RISC-V U/I/S/B/J + RVC scatter and the 0x800 HI20 bias; x86-64 rel8) live in
+ * each backend's src/arch/<arch>/reloc.c and are reached through
+ * LinkArchDesc.reloc_apply_insn. The single public byte-patcher entry,
+ * link_reloc_apply(), dispatches neutral-then-arch from src/link/
+ * link_reloc_apply.c — housed in the link layer because resolving the per-arch
+ * slice needs link_arch_desc_for() (same boundary call as WS-B's reloc_desc()).
+ * See doc/plan/RELOC.md (WS-C). */
#include "obj/reloc_apply.h"
@@ -80,8 +83,8 @@ static void reloc_uleb128_write_fixed(u8* p, u64 v, u32 width) {
}
}
-void link_reloc_apply(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A,
- u64 P) {
+int reloc_apply_neutral(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A,
+ u64 P) {
switch (k) {
case R_ABS32:
case R_X64_32S:
@@ -92,7 +95,7 @@ void link_reloc_apply(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A,
* At the byte level the encoding is identical. */
u64 v = S + (u64)A;
wr_u32_le(P_bytes, (u32)(v & 0xffffffffu));
- return;
+ return 1;
}
case R_ABS64:
case R_X64_TPOFF64:
@@ -103,7 +106,7 @@ void link_reloc_apply(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A,
* loader would otherwise do the same fixup at load time. */
u64 v = S + (u64)A;
wr_u64_le(P_bytes, v);
- return;
+ return 1;
}
case R_X64_GLOB_DAT:
case R_X64_JUMP_SLOT: {
@@ -111,13 +114,13 @@ void link_reloc_apply(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A,
* paths we write the resolved symbol value (S) into the GOT/PLT
* slot. Addend is unused per the x86_64 psABI. */
wr_u64_le(P_bytes, S);
- return;
+ return 1;
}
case R_X64_COPY:
compiler_panic(c, SRCLOC_NONE,
"link: R_X64_COPY belongs in dynamic loader, "
"not static link");
- return;
+ return 1;
case R_REL32:
case R_PC32:
case R_X64_PLT32:
@@ -139,14 +142,7 @@ void link_reloc_apply(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A,
* displacement collapses to a plain 32-bit PC-relative call. */
i64 v = (i64)S + A - (i64)P;
wr_u32_le(P_bytes, (u32)((u64)v & 0xffffffffu));
- return;
- }
- case R_X64_PC8: {
- i64 v = (i64)S + A - (i64)P;
- if (v < -128 || v > 127)
- compiler_panic(c, SRCLOC_NONE, "link: X64_PC8 out of range");
- P_bytes[0] = (u8)((u64)v & 0xffu);
- return;
+ return 1;
}
case R_REL64:
case R_PC64: {
@@ -155,429 +151,81 @@ void link_reloc_apply(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A,
* the arm64 kernel image_size header field). */
i64 v = (i64)S + A - (i64)P;
wr_u64_le(P_bytes, (u64)v);
- return;
+ return 1;
}
case R_AARCH64_ABS16: {
u64 v = S + (u64)A;
wr_u16_le(P_bytes, (u16)(v & 0xffffu));
- return;
+ return 1;
}
case R_AARCH64_PREL16: {
i64 v = (i64)S + A - (i64)P;
wr_u16_le(P_bytes, (u16)((u64)v & 0xffffu));
- return;
- }
- case R_AARCH64_CONDBR19:
- case R_AARCH64_LD_PREL_LO19: {
- /* B.cond / CB(N)Z / LDR (literal) — imm19 in 4-byte units,
- * signed, at bits [23:5]. Range: ±1MiB. */
- i64 disp = (i64)S + A - (i64)P;
- u32 instr;
- u32 imm19;
- if (disp & 3)
- compiler_panic(c, SRCLOC_NONE,
- "link: imm19 reloc misaligned displacement");
- if (disp < -(i64)(1 << 20) || disp >= (i64)(1 << 20))
- compiler_panic(c, SRCLOC_NONE,
- "link: imm19 reloc out of range (need ±1MiB)");
- imm19 = (u32)((disp >> 2) & 0x7ffffu);
- instr = rd_u32_le(P_bytes);
- instr = (instr & ~(0x7ffffu << 5)) | (imm19 << 5);
- wr_u32_le(P_bytes, instr);
- return;
- }
- case R_AARCH64_TSTBR14: {
- /* TBZ/TBNZ — imm14 in 4-byte units, signed, at bits [18:5].
- * Range: ±32KiB. */
- i64 disp = (i64)S + A - (i64)P;
- u32 instr;
- u32 imm14;
- if (disp & 3)
- compiler_panic(c, SRCLOC_NONE, "link: TSTBR14 misaligned displacement");
- if (disp < -(i64)(1 << 15) || disp >= (i64)(1 << 15))
- compiler_panic(c, SRCLOC_NONE,
- "link: TSTBR14 out of range (need ±32KiB)");
- imm14 = (u32)((disp >> 2) & 0x3fffu);
- instr = rd_u32_le(P_bytes);
- instr = (instr & ~(0x3fffu << 5)) | (imm14 << 5);
- wr_u32_le(P_bytes, instr);
- return;
- }
- case R_AARCH64_ADR_PREL_LO21: {
- /* ADR — byte-granularity imm21, encoded as immlo[30:29] +
- * immhi[23:5]. No 12-bit shift (unlike ADRP). Range: ±1MiB. */
- i64 disp = (i64)S + A - (i64)P;
- u32 instr;
- u32 immlo, immhi;
- if (disp < -(i64)(1 << 20) || disp >= (i64)(1 << 20))
- compiler_panic(c, SRCLOC_NONE,
- "link: ADR_PREL_LO21 out of range (need ±1MiB)");
- immlo = (u32)(disp & 0x3u);
- immhi = (u32)((disp >> 2) & 0x7ffffu);
- instr = rd_u32_le(P_bytes);
- instr = (instr & 0x9f00001fu) | (immlo << 29) | (immhi << 5);
- wr_u32_le(P_bytes, instr);
- return;
- }
- case R_AARCH64_JUMP26:
- case R_AARCH64_CALL26: {
- /* B/BL imm26 — branch displacement in 4-byte units, signed.
- * Clear bits [25:0] of the existing instruction and OR in the
- * new imm26. Range check: ±128MiB. */
- i64 disp = (i64)S + A - (i64)P;
- u32 instr;
- u32 imm26;
- if (disp & 3)
- compiler_panic(c, SRCLOC_NONE, "link: CALL26 misaligned displacement");
- if (disp < -(i64)(1 << 27) || disp >= (i64)(1 << 27))
- compiler_panic(c, SRCLOC_NONE,
- "link: CALL26 out of range (need ±128MiB)");
- imm26 = (u32)((disp >> 2) & 0x3ffffffu);
- instr = rd_u32_le(P_bytes);
- instr = (instr & 0xfc000000u) | imm26;
- wr_u32_le(P_bytes, instr);
- return;
- }
- case R_AARCH64_TLVP_LOAD_PAGE21:
- case R_AARCH64_ADR_GOT_PAGE:
- case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
- case R_AARCH64_ADR_PREL_PG_HI21:
- case R_AARCH64_ADR_PREL_PG_HI21_NC: {
- /* ADRP — page-relative imm21, encoded as immlo[30:29] +
- * immhi[23:5]. Effective immediate is (S+A) page minus P page,
- * shifted right by 12, sign-extended to 33 bits. _NC variant
- * skips the range check (compiler asserts it can't overflow,
- * e.g. when paired with explicit page bracketing). */
- i64 page_s = ((i64)S + A) & ~(i64)0xfff;
- i64 page_p = (i64)P & ~(i64)0xfff;
- i64 disp = page_s - page_p;
- i64 imm21 = disp >> 12;
- u32 instr;
- u32 immlo, immhi;
- if (k != R_AARCH64_ADR_PREL_PG_HI21_NC &&
- (imm21 < -(i64)(1 << 20) || imm21 >= (i64)(1 << 20)))
- compiler_panic(c, SRCLOC_NONE,
- "link: ADR_PREL_PG_HI21 out of range (need ±4GiB)");
- immlo = (u32)(imm21 & 0x3u);
- immhi = (u32)((imm21 >> 2) & 0x7ffffu);
- instr = rd_u32_le(P_bytes);
- instr = (instr & 0x9f00001fu) | (immlo << 29) | (immhi << 5);
- wr_u32_le(P_bytes, instr);
- return;
- }
- case R_AARCH64_ADD_ABS_LO12_NC: {
- /* ADD (immediate) imm12 at bits [21:10]. NC = no overflow check. */
- u64 v = ((u64)S + (u64)A) & 0xfffu;
- u32 instr = rd_u32_le(P_bytes);
- instr = (instr & ~(0xfffu << 10)) | ((u32)v << 10);
- wr_u32_le(P_bytes, instr);
- return;
- }
- case R_AARCH64_TLSLE_ADD_TPREL_HI12:
- case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: {
- /* AArch64 TLS local-exec. Caller passes S already as the
- * TP-relative offset (target's image offset minus the TLS
- * image base, plus the 16-byte AArch64 TCB). HI12 takes
- * bits 23:12, LO12_NC takes bits 11:0; both encoded as
- * imm12 at instruction bits [21:10] of an ADD (immediate).
- * The HI12 form's instruction carries LSL #12 in its opcode,
- * so bits 11:0 of the operand naturally land at scale 4096. */
- u64 v = (u64)((i64)S + A);
- u32 imm12 = (k == R_AARCH64_TLSLE_ADD_TPREL_HI12)
- ? (u32)((v >> 12) & 0xfffu)
- : (u32)(v & 0xfffu);
- u32 instr = rd_u32_le(P_bytes);
- instr = (instr & ~(0xfffu << 10)) | (imm12 << 10);
- wr_u32_le(P_bytes, instr);
- return;
- }
- case R_AARCH64_LDST8_ABS_LO12_NC:
- case R_AARCH64_LDST16_ABS_LO12_NC:
- case R_AARCH64_LDST32_ABS_LO12_NC:
- case R_AARCH64_LDST64_ABS_LO12_NC:
- case R_AARCH64_LDST128_ABS_LO12_NC:
- case R_AARCH64_LD64_GOT_LO12_NC:
- case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
- case R_AARCH64_TLVP_LOAD_PAGEOFF12: {
- /* LDR/STR with imm12 at bits [21:10]; the imm is scaled by the
- * access size, so we right-shift the low 12 bits of (S+A) by
- * the size scale before encoding. NC = no overflow check.
- *
- * LD64_GOT_LO12_NC has the same encoding as LDST64_ABS_LO12_NC;
- * the linker has already redirected `S` to the GOT slot. */
- u32 shift = (k == R_AARCH64_LDST8_ABS_LO12_NC) ? 0u
- : (k == R_AARCH64_LDST16_ABS_LO12_NC) ? 1u
- : (k == R_AARCH64_LDST32_ABS_LO12_NC) ? 2u
- : (k == R_AARCH64_LDST64_ABS_LO12_NC ||
- k == R_AARCH64_LD64_GOT_LO12_NC ||
- k == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC ||
- k == R_AARCH64_TLVP_LOAD_PAGEOFF12)
- ? 3u
- : 4u;
- u64 lo12 = ((u64)S + (u64)A) & 0xfffu;
- u64 imm12 = lo12 >> shift;
- u32 instr = rd_u32_le(P_bytes);
- if (lo12 & ((1u << shift) - 1u))
- compiler_panic(c, SRCLOC_NONE,
- "link: LDST%u_ABS_LO12_NC misaligned address "
- "(kind=%u S=0x%llx A=%lld P=0x%llx)",
- 1u << (3 + shift), (unsigned)k, (unsigned long long)S,
- (long long)A, (unsigned long long)P);
- instr = (instr & ~(0xfffu << 10)) | ((u32)(imm12 & 0xfffu) << 10);
- wr_u32_le(P_bytes, instr);
- return;
- }
- case R_RV_HI20:
- case R_RV_TPREL_HI20: {
- /* U-type (LUI/AUIPC) imm[31:12] = high 20 bits of (S + A + 0x800).
- * The 0x800 bias compensates the sign-extension of the paired
- * 12-bit ADDI/load/store immediate, so HI20 + signext12(LO12)
- * reconstructs the full value. */
- i64 v = (i64)S + A;
- u32 hi20 = (u32)(((u64)(v + 0x800)) >> 12) & 0xfffffu;
- u32 instr = rd_u32_le(P_bytes);
- instr = (instr & 0x00000fffu) | (hi20 << 12);
- wr_u32_le(P_bytes, instr);
- return;
- }
- case R_RV_PCREL_HI20:
- case R_RV_GOT_HI20:
- case R_RV_TLS_GOT_HI20: {
- /* AUIPC pc-relative HI20: same encoding as HI20 but the
- * displacement is (S + A) - P. The paired PCREL_LO12 reloc at
- * the ADDI/load below recovers the low 12 bits of the same
- * displacement via a lookup keyed on this AUIPC's site vaddr.
- * GOT_HI20 collapses to PCREL_HI20 in static-link with no
- * indirection: the symbol resolves to its own address. */
- i64 disp = (i64)S + A - (i64)P;
- u32 hi20 = (u32)(((u64)(disp + 0x800)) >> 12) & 0xfffffu;
- u32 instr = rd_u32_le(P_bytes);
- instr = (instr & 0x00000fffu) | (hi20 << 12);
- wr_u32_le(P_bytes, instr);
- return;
- }
- case R_RV_LO12_I:
- case R_RV_TPREL_LO12_I: {
- /* I-type imm[11:0] in instruction bits [31:20]. Low 12 bits of
- * (S + A); the sign-extension at execute time pairs with HI20's
- * 0x800 bias to reconstruct the full address. */
- u64 v = (u64)((i64)S + A);
- u32 lo12 = (u32)(v & 0xfffu);
- u32 instr = rd_u32_le(P_bytes);
- instr = (instr & 0x000fffffu) | (lo12 << 20);
- wr_u32_le(P_bytes, instr);
- return;
- }
- case R_RV_LO12_S:
- case R_RV_TPREL_LO12_S: {
- /* S-type imm[11:5] in bits [31:25], imm[4:0] in bits [11:7]. */
- u64 v = (u64)((i64)S + A);
- u32 lo12 = (u32)(v & 0xfffu);
- u32 instr = rd_u32_le(P_bytes);
- instr = (instr & 0x01fff07fu) | ((lo12 & 0xfe0u) << 20) |
- ((lo12 & 0x1fu) << 7);
- wr_u32_le(P_bytes, instr);
- return;
- }
- case R_RV_BRANCH: {
- /* B-type 12-bit signed displacement in 2-byte units (13-bit
- * range). imm[12] in bit 31, imm[10:5] in 30:25, imm[4:1] in
- * 11:8, imm[11] in bit 7. */
- i64 disp = (i64)S + A - (i64)P;
- u32 instr;
- u32 b;
- if (disp & 1)
- compiler_panic(c, SRCLOC_NONE,
- "link: RV BRANCH misaligned displacement");
- if (disp < -(i64)(1 << 12) || disp >= (i64)(1 << 12))
- compiler_panic(c, SRCLOC_NONE,
- "link: RV BRANCH out of range (need ±4KiB)");
- b = (u32)((u64)disp & 0x1ffeu) | ((u32)(((u64)disp >> 11) & 1u) << 11) |
- ((u32)(((u64)disp >> 12) & 1u) << 12);
- instr = rd_u32_le(P_bytes);
- instr &= 0x01fff07fu;
- instr |= ((b >> 12) & 1u) << 31;
- instr |= ((b >> 5) & 0x3fu) << 25;
- instr |= ((b >> 1) & 0xfu) << 8;
- instr |= ((b >> 11) & 1u) << 7;
- wr_u32_le(P_bytes, instr);
- return;
- }
- case R_RV_JAL: {
- /* J-type 20-bit signed displacement in 2-byte units (21-bit
- * range). imm[20] in bit 31, imm[10:1] in 30:21, imm[11] in bit
- * 20, imm[19:12] in bits 19:12. */
- i64 disp = (i64)S + A - (i64)P;
- u32 instr;
- u32 b;
- if (disp & 1)
- compiler_panic(c, SRCLOC_NONE, "link: RV JAL misaligned displacement");
- if (disp < -(i64)(1 << 20) || disp >= (i64)(1 << 20))
- compiler_panic(c, SRCLOC_NONE,
- "link: RV JAL out of range (need ±1MiB)");
- b = (u32)((u64)disp & 0x1ffffeu) | ((u32)(((u64)disp >> 11) & 1u) << 11) |
- ((u32)(((u64)disp >> 20) & 1u) << 20);
- instr = rd_u32_le(P_bytes);
- instr &= 0x00000fffu;
- instr |= ((b >> 20) & 1u) << 31;
- instr |= ((b >> 1) & 0x3ffu) << 21;
- instr |= ((b >> 11) & 1u) << 20;
- instr |= ((b >> 12) & 0xffu) << 12;
- wr_u32_le(P_bytes, instr);
- return;
- }
- case R_RV_CALL:
- case R_PLT32: {
- /* AUIPC + JALR pair encoding the same 32-bit signed PC-relative
- * displacement. AUIPC at P, JALR at P+4. The 0x800 bias on the
- * AUIPC immediate compensates JALR's signed 12-bit imm so that
- * (auipc_imm << 12) + signext12(jalr_imm) == disp.
- *
- * R_PLT32 is the kit-canonical RelocKind that
- * elf_riscv64_reloc_from(R_RISCV_CALL_PLT) maps to; static-link
- * with no PLT collapses CALL_PLT to a direct CALL (no
- * indirection). */
- i64 disp = (i64)S + A - (i64)P;
- u32 hi20 = (u32)(((u64)(disp + 0x800)) >> 12) & 0xfffffu;
- u32 lo12 = (u32)((u64)disp & 0xfffu);
- u32 auipc = rd_u32_le(P_bytes);
- u32 jalr = rd_u32_le(P_bytes + 4);
- if (disp < -(i64)(1ll << 31) || disp >= (i64)(1ll << 31))
- compiler_panic(c, SRCLOC_NONE,
- "link: RV CALL out of range (need ±2GiB)");
- auipc = (auipc & 0x00000fffu) | (hi20 << 12);
- jalr = (jalr & 0x000fffffu) | (lo12 << 20);
- wr_u32_le(P_bytes, auipc);
- wr_u32_le(P_bytes + 4, jalr);
- return;
- }
- case R_RV_RVC_BRANCH: {
- /* CB-type 8-bit signed displacement in 2-byte units (9-bit
- * range). c.beqz / c.bnez. Encoding (16-bit instruction):
- * bit 12 = imm[8]
- * bits 11:10 = imm[4:3]
- * bits 9:7 = rs1' (untouched)
- * bits 6:5 = imm[7:6]
- * bits 4:3 = imm[2:1]
- * bit 2 = imm[5] */
- i64 disp = (i64)S + A - (i64)P;
- u16 instr = (u16)(P_bytes[0] | ((u16)P_bytes[1] << 8));
- u32 b;
- if (disp & 1)
- compiler_panic(c, SRCLOC_NONE,
- "link: RV RVC_BRANCH misaligned displacement");
- if (disp < -(i64)(1 << 8) || disp >= (i64)(1 << 8))
- compiler_panic(c, SRCLOC_NONE,
- "link: RV RVC_BRANCH out of range (need ±256B)");
- b = (u32)((u64)disp & 0x1feu);
- instr = (u16)(instr & 0xe383u);
- instr = (u16)(instr | (((b >> 8) & 1u) << 12));
- instr = (u16)(instr | (((b >> 3) & 3u) << 10));
- instr = (u16)(instr | (((b >> 6) & 3u) << 5));
- instr = (u16)(instr | (((b >> 1) & 3u) << 3));
- instr = (u16)(instr | (((b >> 5) & 1u) << 2));
- P_bytes[0] = (u8)(instr & 0xffu);
- P_bytes[1] = (u8)((instr >> 8) & 0xffu);
- return;
- }
- case R_RV_RVC_JUMP: {
- /* CJ-type 11-bit signed displacement in 2-byte units (12-bit
- * range). c.j / c.jal. Encoding bits in the 16-bit instruction:
- * 12=imm[11], 11=imm[4], 10:9=imm[9:8], 8=imm[10],
- * 7=imm[6], 6=imm[7], 5:3=imm[3:1], 2=imm[5]. */
- i64 disp = (i64)S + A - (i64)P;
- u16 instr = (u16)(P_bytes[0] | ((u16)P_bytes[1] << 8));
- u32 b;
- if (disp & 1)
- compiler_panic(c, SRCLOC_NONE,
- "link: RV RVC_JUMP misaligned displacement");
- if (disp < -(i64)(1 << 11) || disp >= (i64)(1 << 11))
- compiler_panic(c, SRCLOC_NONE,
- "link: RV RVC_JUMP out of range (need ±2KiB)");
- b = (u32)((u64)disp & 0xffeu);
- instr = (u16)(instr & 0xe003u);
- instr = (u16)(instr | (((b >> 11) & 1u) << 12));
- instr = (u16)(instr | (((b >> 4) & 1u) << 11));
- instr = (u16)(instr | (((b >> 8) & 3u) << 9));
- instr = (u16)(instr | (((b >> 10) & 1u) << 8));
- instr = (u16)(instr | (((b >> 6) & 1u) << 7));
- instr = (u16)(instr | (((b >> 7) & 1u) << 6));
- instr = (u16)(instr | (((b >> 1) & 7u) << 3));
- instr = (u16)(instr | (((b >> 5) & 1u) << 2));
- P_bytes[0] = (u8)(instr & 0xffu);
- P_bytes[1] = (u8)((instr >> 8) & 0xffu);
- return;
+ return 1;
}
- case R_RV_RELAX:
- case R_RV_TPREL_ADD:
- /* Marker relocs only — RELAX permits the prior reloc to be
- * compressed, TPREL_ADD annotates a TLS thread-pointer ADD that
- * the linker may fold during relaxation. We don't relax, so
- * both are no-ops. */
- return;
case R_RV_ADD8: {
/* word8 += S + A. Used (paired with a SUB8 against another sym
* at the same site) to encode symbol differences. */
u8 cur = P_bytes[0];
P_bytes[0] = (u8)(cur + (u8)((S + (u64)A) & 0xffu));
- return;
+ return 1;
}
case R_RV_SUB8: {
u8 cur = P_bytes[0];
P_bytes[0] = (u8)(cur - (u8)((S + (u64)A) & 0xffu));
- return;
+ return 1;
}
case R_RV_ADD16: {
u16 cur = rd_u16_le(P_bytes);
wr_u16_le(P_bytes, (u16)(cur + (u16)((S + (u64)A) & 0xffffu)));
- return;
+ return 1;
}
case R_RV_SUB16: {
u16 cur = rd_u16_le(P_bytes);
wr_u16_le(P_bytes, (u16)(cur - (u16)((S + (u64)A) & 0xffffu)));
- return;
+ return 1;
}
case R_RV_ADD32: {
u32 cur = rd_u32_le(P_bytes);
wr_u32_le(P_bytes, (u32)(cur + (u32)((S + (u64)A) & 0xffffffffu)));
- return;
+ return 1;
}
case R_RV_SUB32: {
u32 cur = rd_u32_le(P_bytes);
wr_u32_le(P_bytes, (u32)(cur - (u32)((S + (u64)A) & 0xffffffffu)));
- return;
+ return 1;
}
case R_RV_ADD64: {
u64 cur = rd_u64_le(P_bytes);
wr_u64_le(P_bytes, cur + S + (u64)A);
- return;
+ return 1;
}
case R_RV_SUB64: {
u64 cur = rd_u64_le(P_bytes);
wr_u64_le(P_bytes, cur - S - (u64)A);
- return;
+ return 1;
}
case R_RV_SUB6: {
/* Bottom 6 bits of byte = (byte - (S + A)) & 0x3f. */
u8 cur = P_bytes[0];
u8 v = (u8)((cur & 0x3fu) - (u8)((S + (u64)A) & 0x3fu));
P_bytes[0] = (u8)((cur & 0xc0u) | (v & 0x3fu));
- return;
+ return 1;
}
case R_RV_SET6: {
u8 cur = P_bytes[0];
P_bytes[0] = (u8)((cur & 0xc0u) | (u8)((S + (u64)A) & 0x3fu));
- return;
+ return 1;
}
case R_RV_SET8:
P_bytes[0] = (u8)((S + (u64)A) & 0xffu);
- return;
+ return 1;
case R_RV_SET16:
wr_u16_le(P_bytes, (u16)((S + (u64)A) & 0xffffu));
- return;
+ return 1;
case R_RV_SET32:
wr_u32_le(P_bytes, (u32)((S + (u64)A) & 0xffffffffu));
- return;
+ return 1;
case R_RV_SET_ULEB128: {
/* Variable-length ULEB128 field set to (S + A). These come as a
* PAIR at the same offset (RISC-V psABI): SET_ULEB128 sets the
@@ -588,7 +236,7 @@ void link_reloc_apply(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A,
u32 width = reloc_uleb128_len(P_bytes);
u64 v = S + (u64)A;
reloc_uleb128_write_fixed(P_bytes, v, width);
- return;
+ return 1;
}
case R_RV_SUB_ULEB128: {
/* field -= (S + A), preserving the original ULEB128 width. The
@@ -598,10 +246,9 @@ void link_reloc_apply(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A,
u64 cur = reloc_uleb128_read(P_bytes);
u64 v = cur - (S + (u64)A);
reloc_uleb128_write_fixed(P_bytes, v, width);
- return;
+ return 1;
}
default:
- compiler_panic(c, SRCLOC_NONE, "link: unsupported reloc kind %u",
- (unsigned)k);
+ return 0; /* not an arch-neutral kind — caller tries the arch hook */
}
}
diff --git a/src/obj/reloc_apply.h b/src/obj/reloc_apply.h
@@ -4,6 +4,20 @@
#include "core/core.h"
#include "obj/obj.h"
+/* The single public byte-patcher entry: patch the relocation site at P_bytes
+ * given the resolved addresses (S, P) and addend (A). Shared verbatim by the
+ * static linker, JIT linker, assembler, and emulator guest loader ("one
+ * encoder, three loaders", doc/OBJ.md). Defined in src/link/link_reloc_apply.c
+ * — it dispatches arch-neutral kinds to reloc_apply_neutral() (below) and
+ * instruction-immediate kinds to the per-arch LinkArchDesc.reloc_apply_insn,
+ * the latter needing the link layer's link_arch_desc_for(). See
+ * doc/plan/RELOC.md (WS-C). */
void link_reloc_apply(Compiler*, RelocKind, u8* P_bytes, u64 S, i64 A, u64 P);
+/* Apply an arch-neutral (data-word / ULEB128) relocation kind. Returns 1 if
+ * `k` is a neutral kind it patched, 0 otherwise (the dispatcher then consults
+ * the per-arch instruction encoders). Pure obj-core: no link/arch dependency,
+ * so it never touches `c` except to panic on the dynamic-only R_X64_COPY. */
+int reloc_apply_neutral(Compiler*, RelocKind, u8* P_bytes, u64 S, i64 A, u64 P);
+
#endif
diff --git a/test/link/reloc_apply_test.c b/test/link/reloc_apply_test.c
@@ -0,0 +1,208 @@
+/* Relocation byte-encoder migration guard (doc/plan/RELOC.md, WS-C).
+ *
+ * WS-C moves the per-arch *instruction-immediate* byte encoders out of the
+ * format-neutral src/obj/reloc_apply.c into each backend's
+ * src/arch/<arch>/reloc.c (reached via LinkArchDesc.reloc_apply_insn), while
+ * link_reloc_apply stays the single public byte-patcher entry. The move must
+ * be byte-identical: the dispatcher routes each kind to the same encoder it
+ * had before, producing the same patched bytes.
+ *
+ * This test pins that. For one representative, in-range relocation of every
+ * instruction-embedded kind (plus a sampling of the arch-neutral data-word
+ * kinds the obj core keeps), it drives link_reloc_apply and asserts the
+ * patched bytes equal a frozen golden. The goldens were captured from the
+ * pre-WS-C implementation; do not "improve" them — they are the spec the
+ * partitioned encoders must match. A drift in any encoder turns this red and
+ * bisects to the exact kind (hence arch).
+ *
+ * Run with KIT_RELOC_APPLY_CAPTURE=1 in the environment to re-emit the golden
+ * column (used once to seed it from the frozen implementation).
+ *
+ * Exit 0 = pass; non-zero = fail. */
+
+#include "obj/reloc_apply.h"
+
+#include <kit/cg.h>
+#include <kit/core.h>
+
+#include "core/core.h"
+#include "lib/kit_unit.h"
+#include "obj/obj.h"
+
+static KitUnit g_u;
+#define EXPECT(cond, ...) CU_EXPECT(&g_u, cond, __VA_ARGS__)
+
+typedef struct ApplyCase {
+ const char* name;
+ KitArchKind arch;
+ RelocKind kind;
+ u64 in; /* initial site bytes, little-endian packed */
+ u32 nbytes; /* bytes the encoder touches / we compare */
+ u64 S;
+ i64 A;
+ u64 P;
+ u64 want; /* golden patched bytes, little-endian packed */
+} ApplyCase;
+
+/* Instruction-immediate encoders (the WS-C movers) plus a sampling of the
+ * arch-neutral data-word arms the obj core retains. Inputs are chosen
+ * in-range so no encoder range check panics. */
+static const ApplyCase kCases[] = {
+ /* ---- AArch64 instruction-immediate encoders ---- */
+ {"aa64 CONDBR19", KIT_ARCH_ARM_64, R_AARCH64_CONDBR19, 0x54000000u, 4,
+ 0x2000, 0, 0x1000, 0x54008000u},
+ {"aa64 LD_PREL_LO19", KIT_ARCH_ARM_64, R_AARCH64_LD_PREL_LO19, 0x58000000u,
+ 4, 0x2000, 0, 0x1000, 0x58008000u},
+ {"aa64 TSTBR14", KIT_ARCH_ARM_64, R_AARCH64_TSTBR14, 0x36000000u, 4, 0x1100,
+ 0, 0x1000, 0x36000800u},
+ {"aa64 ADR_PREL_LO21", KIT_ARCH_ARM_64, R_AARCH64_ADR_PREL_LO21,
+ 0x10000000u, 4, 0x1234, 0, 0x1000, 0x100011a0u},
+ {"aa64 JUMP26", KIT_ARCH_ARM_64, R_AARCH64_JUMP26, 0x14000000u, 4, 0x5000,
+ 0, 0x1000, 0x14001000u},
+ {"aa64 CALL26", KIT_ARCH_ARM_64, R_AARCH64_CALL26, 0x94000000u, 4, 0x5000,
+ 0, 0x1000, 0x94001000u},
+ {"aa64 ADR_PREL_PG_HI21", KIT_ARCH_ARM_64, R_AARCH64_ADR_PREL_PG_HI21,
+ 0x90000000u, 4, 0x100000, 0, 0x1000, 0xf00007e0u},
+ {"aa64 ADR_PREL_PG_HI21_NC", KIT_ARCH_ARM_64, R_AARCH64_ADR_PREL_PG_HI21_NC,
+ 0x90000000u, 4, 0x100000, 0, 0x1000, 0xf00007e0u},
+ {"aa64 ADR_GOT_PAGE", KIT_ARCH_ARM_64, R_AARCH64_ADR_GOT_PAGE, 0x90000000u,
+ 4, 0x100000, 0, 0x1000, 0xf00007e0u},
+ {"aa64 TLSIE_ADR_GOTTPREL_PAGE21", KIT_ARCH_ARM_64,
+ R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21, 0x90000000u, 4, 0x100000, 0, 0x1000,
+ 0xf00007e0u},
+ {"aa64 TLVP_LOAD_PAGE21", KIT_ARCH_ARM_64, R_AARCH64_TLVP_LOAD_PAGE21,
+ 0x90000000u, 4, 0x100000, 0, 0x1000, 0xf00007e0u},
+ {"aa64 ADD_ABS_LO12_NC", KIT_ARCH_ARM_64, R_AARCH64_ADD_ABS_LO12_NC,
+ 0x91000000u, 4, 0x1abc, 0, 0, 0x912af000u},
+ {"aa64 TLSLE_ADD_TPREL_HI12", KIT_ARCH_ARM_64,
+ R_AARCH64_TLSLE_ADD_TPREL_HI12, 0x91000000u, 4, 0x12345, 0, 0,
+ 0x91004800u},
+ {"aa64 TLSLE_ADD_TPREL_LO12_NC", KIT_ARCH_ARM_64,
+ R_AARCH64_TLSLE_ADD_TPREL_LO12_NC, 0x91000000u, 4, 0x12345, 0, 0,
+ 0x910d1400u},
+ {"aa64 LDST8_ABS_LO12_NC", KIT_ARCH_ARM_64, R_AARCH64_LDST8_ABS_LO12_NC,
+ 0xf9400000u, 4, 0x1100, 0, 0, 0xf9440000u},
+ {"aa64 LDST16_ABS_LO12_NC", KIT_ARCH_ARM_64, R_AARCH64_LDST16_ABS_LO12_NC,
+ 0xf9400000u, 4, 0x1100, 0, 0, 0xf9420000u},
+ {"aa64 LDST32_ABS_LO12_NC", KIT_ARCH_ARM_64, R_AARCH64_LDST32_ABS_LO12_NC,
+ 0xf9400000u, 4, 0x1100, 0, 0, 0xf9410000u},
+ {"aa64 LDST64_ABS_LO12_NC", KIT_ARCH_ARM_64, R_AARCH64_LDST64_ABS_LO12_NC,
+ 0xf9400000u, 4, 0x1100, 0, 0, 0xf9408000u},
+ {"aa64 LDST128_ABS_LO12_NC", KIT_ARCH_ARM_64, R_AARCH64_LDST128_ABS_LO12_NC,
+ 0xf9400000u, 4, 0x1100, 0, 0, 0xf9404000u},
+ {"aa64 LD64_GOT_LO12_NC", KIT_ARCH_ARM_64, R_AARCH64_LD64_GOT_LO12_NC,
+ 0xf9400000u, 4, 0x1100, 0, 0, 0xf9408000u},
+ {"aa64 TLSIE_LD64_GOTTPREL_LO12_NC", KIT_ARCH_ARM_64,
+ R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, 0xf9400000u, 4, 0x1100, 0, 0,
+ 0xf9408000u},
+ {"aa64 TLVP_LOAD_PAGEOFF12", KIT_ARCH_ARM_64, R_AARCH64_TLVP_LOAD_PAGEOFF12,
+ 0xf9400000u, 4, 0x1100, 0, 0, 0xf9408000u},
+ /* ---- AArch64 neutral data words (kept in obj core) ---- */
+ {"aa64 ABS16", KIT_ARCH_ARM_64, R_AARCH64_ABS16, 0x0000u, 2, 0x1234, 0, 0,
+ 0x1234u},
+ {"aa64 PREL16", KIT_ARCH_ARM_64, R_AARCH64_PREL16, 0x0000u, 2, 0x2000, 0,
+ 0x1000, 0x1000u},
+
+ /* ---- x86-64 instruction-immediate + neutral data words ---- */
+ {"x64 PC8", KIT_ARCH_X86_64, R_X64_PC8, 0x00u, 1, 0x1010, -2, 0x1000,
+ 0x0eu},
+ {"x64 ABS32", KIT_ARCH_X86_64, R_ABS32, 0x0u, 4, 0xdeadbeefu, 0, 0,
+ 0xdeadbeefu},
+ {"x64 ABS64", KIT_ARCH_X86_64, R_ABS64, 0x0u, 8, 0x1122334455667788ull, 0,
+ 0, 0x1122334455667788ull},
+ {"x64 PC32", KIT_ARCH_X86_64, R_PC32, 0x0u, 4, 0x2000, -4, 0x1000, 0xffcu},
+ {"x64 TPOFF32", KIT_ARCH_X86_64, R_X64_TPOFF32, 0x0u, 4, 0x12345678u, 0, 0,
+ 0x12345678u},
+ {"x64 GLOB_DAT", KIT_ARCH_X86_64, R_X64_GLOB_DAT, 0x0u, 8, 0xcafebabeu, 0,
+ 0, 0xcafebabeu},
+
+ /* ---- RISC-V instruction-immediate encoders ---- */
+ {"rv HI20", KIT_ARCH_RV64, R_RV_HI20, 0x00000537u, 4, 0x12345, 0, 0,
+ 0x00012537u},
+ {"rv TPREL_HI20", KIT_ARCH_RV64, R_RV_TPREL_HI20, 0x00000537u, 4, 0x12345,
+ 0, 0, 0x00012537u},
+ {"rv PCREL_HI20", KIT_ARCH_RV64, R_RV_PCREL_HI20, 0x00000517u, 4, 0x13345,
+ 0, 0x1000, 0x00012517u},
+ {"rv GOT_HI20", KIT_ARCH_RV64, R_RV_GOT_HI20, 0x00000517u, 4, 0x13345, 0,
+ 0x1000, 0x00012517u},
+ {"rv TLS_GOT_HI20", KIT_ARCH_RV64, R_RV_TLS_GOT_HI20, 0x00000517u, 4,
+ 0x13345, 0, 0x1000, 0x00012517u},
+ {"rv LO12_I", KIT_ARCH_RV64, R_RV_LO12_I, 0x00000013u, 4, 0x12345, 0, 0,
+ 0x34500013u},
+ {"rv TPREL_LO12_I", KIT_ARCH_RV64, R_RV_TPREL_LO12_I, 0x00000013u, 4,
+ 0x12345, 0, 0, 0x34500013u},
+ {"rv LO12_S", KIT_ARCH_RV64, R_RV_LO12_S, 0x00000023u, 4, 0x12345, 0, 0,
+ 0x340002a3u},
+ {"rv TPREL_LO12_S", KIT_ARCH_RV64, R_RV_TPREL_LO12_S, 0x00000023u, 4,
+ 0x12345, 0, 0, 0x340002a3u},
+ {"rv BRANCH", KIT_ARCH_RV64, R_RV_BRANCH, 0x00000063u, 4, 0x1100, 0, 0x1000,
+ 0x10000063u},
+ {"rv JAL", KIT_ARCH_RV64, R_RV_JAL, 0x0000006fu, 4, 0x5100, 0, 0x1000,
+ 0x1000406fu},
+ {"rv CALL", KIT_ARCH_RV64, R_RV_CALL, 0x000080e700000097ull, 8, 0x100000, 0,
+ 0x1000, 0x000080e7000ff097ull},
+ {"rv PLT32", KIT_ARCH_RV64, R_PLT32, 0x000080e700000097ull, 8, 0x100000, 0,
+ 0x1000, 0x000080e7000ff097ull},
+ {"rv RVC_BRANCH", KIT_ARCH_RV64, R_RV_RVC_BRANCH, 0xc001u, 2, 0x1080, 0,
+ 0x1000, 0xc041u},
+ {"rv RVC_JUMP", KIT_ARCH_RV64, R_RV_RVC_JUMP, 0xa001u, 2, 0x1400, 0, 0x1000,
+ 0xa101u},
+ /* ---- RISC-V relaxation markers (no bytes patched) ---- */
+ {"rv RELAX", KIT_ARCH_RV64, R_RV_RELAX, 0xdeadbeefu, 4, 0x1000, 0, 0x2000,
+ 0xdeadbeefu},
+ {"rv TPREL_ADD", KIT_ARCH_RV64, R_RV_TPREL_ADD, 0xdeadbeefu, 4, 0x1000, 0,
+ 0x2000, 0xdeadbeefu},
+ /* ---- RISC-V neutral data-word arms (kept in obj core) ---- */
+ {"rv ADD32", KIT_ARCH_RV64, R_RV_ADD32, 0x00000010u, 4, 0x20, 0, 0,
+ 0x00000030u},
+ {"rv SUB32", KIT_ARCH_RV64, R_RV_SUB32, 0x00000030u, 4, 0x10, 0, 0,
+ 0x00000020u},
+ {"rv SET8", KIT_ARCH_RV64, R_RV_SET8, 0x00u, 1, 0x2a, 0, 0, 0x2au},
+};
+
+static KitCompiler* compiler_for(KitArchKind arch) {
+ static KitCompiler* aa64 = NULL;
+ static KitCompiler* x64 = NULL;
+ static KitCompiler* rv64 = NULL;
+ KitCompiler** slot = arch == KIT_ARCH_ARM_64 ? &aa64
+ : arch == KIT_ARCH_X86_64 ? &x64
+ : &rv64;
+ if (!*slot) {
+ KitTargetSpec t = kit_unit_target(arch, KIT_OS_LINUX, KIT_OBJ_ELF);
+ if (kit_unit_compiler_new(&g_u, t, slot) != KIT_OK || !*slot) {
+ fprintf(stderr, "compiler_new failed for arch=%d\n", (int)arch);
+ exit(2);
+ }
+ }
+ return *slot;
+}
+
+int main(void) {
+ size_t i;
+ int capture = getenv("KIT_RELOC_APPLY_CAPTURE") != NULL;
+
+ kit_unit_init(&g_u);
+
+ for (i = 0; i < sizeof kCases / sizeof kCases[0]; ++i) {
+ const ApplyCase* tc = &kCases[i];
+ u8 buf[8] = {0};
+ u64 got = 0;
+ u32 b;
+ memcpy(buf, &tc->in, tc->nbytes);
+ link_reloc_apply(compiler_for(tc->arch), tc->kind, buf, tc->S, tc->A,
+ tc->P);
+ for (b = 0; b < tc->nbytes; ++b) got |= (u64)buf[b] << (8u * b);
+
+ if (capture) {
+ printf(" /* %-32s */ 0x%016llxull,\n", tc->name,
+ (unsigned long long)got);
+ continue;
+ }
+ EXPECT(got == tc->want, "%s: got=0x%016llx want=0x%016llx", tc->name,
+ (unsigned long long)got, (unsigned long long)tc->want);
+ }
+
+ if (capture) return 0;
+ kit_unit_summary(&g_u, "reloc_apply_test");
+ return kit_unit_status(&g_u);
+}
diff --git a/test/smoke/rv64_tls_link.sh b/test/smoke/rv64_tls_link.sh
@@ -7,7 +7,7 @@
# layout/apply for that reloc, so linking failed hard with
# "link: unsupported reloc kind 80" instead of producing a binary. kit
# links whole-module/static, so the fix is to always emit Local-Exec
-# (R_RV_TPREL_HI20/LO12_I), matching the aa64 and x64 backends.
+# (R_RISCV_TPREL_HI20/LO12_I), matching the aa64 and x64 backends.
#
# This is link-only (no execution), so it runs on any host without qemu. It is
# a single synthetic structural case: one lane (L) that compiles two sources,
@@ -64,8 +64,8 @@ EOF
# The extern _Thread_local access must lower to TPREL, never TLS_GOT.
local relocs
relocs="$("$KIT" objdump -r "$KIT_WORK/tls_ie.o" 2>&1)"
- if ! printf '%s\n' "$relocs" | grep -q 'RV_TPREL_HI20'; then
- kit_fail "$KIT_NAME" "expected RV_TPREL_HI20 reloc; got: $relocs"; return
+ if ! printf '%s\n' "$relocs" | grep -q 'R_RISCV_TPREL_HI20'; then
+ kit_fail "$KIT_NAME" "expected R_RISCV_TPREL_HI20 reloc; got: $relocs"; return
fi
if printf '%s\n' "$relocs" | grep -q 'TLS_GOT'; then
kit_fail "$KIT_NAME" "unexpected TLS_GOT reloc (Initial-Exec regressed): $relocs"; return