commit 305d49af9c1d7c268a4db82e443d796923046ed1
parent b6df55cdbea665c89a313a54e17804032f8512c0
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 11 May 2026 05:37:39 -0700
libc: rv64/x64 GOT codegen + PIE link plumbing — full 10-cell test-libc green
Closes the 9-cell Linux matrix (musl-static, musl-dynamic, glibc-dynamic
× x64, aarch64, rv64) plus the existing darwin cell — 70/70 pass.
Codegen:
- arch/rv64: route OPK_GLOBAL through R_RV_GOT_HI20 + R_RV_PCREL_LO12_I
in rv_load/rv_store/rv_addr_of when extern_via_got and the symbol is
imported. Mirrors x64 GOTPCRELX / aa64 ADR_GOT_PAGE shape.
- arch/x64: x64_use_got_for_sym + REX_GOTPCRELX load path for extern
data on ELF PIE.
- obj/obj_secnames: obj_format_extern_via_got returns true for ELF PIC/PIE
(was Mach-O only).
Linker:
- link_layout: GOT slot recognition for x64 GOTPCREL family + R_RV_GOT_HI20;
pre-resolve synth of __dso_handle, _DYNAMIC, _GLOBAL_OFFSET_TABLE_, and
rv64 __global_pointer$ (RW base + 0x800).
- link_elf: arch-dispatched RELATIVE/GLOB_DAT/JUMP_SLOT dyn-reloc types;
reloc_is_branch26 covers R_X64_PLT32 / R_PLT32 / R_RV_CALL so imported
function calls route through the PLT.
- link_dyn: x64 PLT stub (ff 25 disp32 + nop pad) and rv64 PLT stub
(auipc/ld/jalr) emission; JUMP_SLOT type per arch; refinement of
_DYNAMIC / __dso_handle once dynamic_vaddr is known.
- link_reloc: apply paths for x64 GOTPCREL{,X}/REX_GOTPCRELX/GOTPC32 and
rv64 ADD/SUB/SET{6,8,16,32} + ULEB128 fixups.
Object reader:
- elf/elf_reloc_riscv64: R_RISCV_SET_ULEB128 (60) / R_RISCV_SUB_ULEB128 (61)
for musl rv64 libc.a debug sections.
Hosted shim build:
- Makefile: build/cfree_hosted/linux-*.o with -fPIE -fpic so static and
dynamic Linux cells share one shim.
- rt/Makefile: riscv64-linux variant with LDBL128=1 (rv64 lp64d uses
binary128 long double).
Diffstat:
14 files changed, 475 insertions(+), 23 deletions(-)
diff --git a/Makefile b/Makefile
@@ -106,15 +106,15 @@ hosted-linux-rv64: $(HOSTED_LINUX_RV64_OBJ)
$(HOSTED_LINUX_AARCH64_OBJ): $(HOSTED_LINUX_SRC) $(BIN)
@mkdir -p $(dir $@)
- $(BIN) cc -target aarch64-linux -c $< -o $@
+ $(BIN) cc -target aarch64-linux -fPIE -fpic -c $< -o $@
$(HOSTED_LINUX_X64_OBJ): $(HOSTED_LINUX_SRC) $(BIN)
@mkdir -p $(dir $@)
- $(BIN) cc -target x86_64-linux -c $< -o $@
+ $(BIN) cc -target x86_64-linux -fPIE -fpic -c $< -o $@
$(HOSTED_LINUX_RV64_OBJ): $(HOSTED_LINUX_SRC) $(BIN)
@mkdir -p $(dir $@)
- $(BIN) cc -target riscv64-linux -c $< -o $@
+ $(BIN) cc -target riscv64-linux -fPIE -fpic -c $< -o $@
# Replace the archive (`ar rcs` only adds/updates), so removing a .c file
# also removes its .o from the archive on the next build.
diff --git a/rt/Makefile b/rt/Makefile
@@ -72,6 +72,7 @@ ifeq ($(VARIANT),riscv64-linux)
ABI = lp64
INT128 = 1
CORO = riscv64
+ LDBL128 = 1
ARCH_FLAGS = -mabi=lp64d -march=rv64imafd
endif
ifeq ($(VARIANT),riscv64-elf)
diff --git a/src/api/pipeline.c b/src/api/pipeline.c
@@ -1148,6 +1148,10 @@ static const char* reloc_kind_name(u16 kind) {
return "R_RISCV_SET16";
case R_RV_SET32:
return "R_RISCV_SET32";
+ case R_RV_SET_ULEB128:
+ return "R_RISCV_SET_ULEB128";
+ case R_RV_SUB_ULEB128:
+ return "R_RISCV_SUB_ULEB128";
case R_WASM_FUNCIDX:
return "R_WASM_FUNCTION_INDEX_LEB";
case R_WASM_TABLEIDX:
diff --git a/src/arch/rv64.c b/src/arch/rv64.c
@@ -1071,6 +1071,17 @@ static u32 addr_base(CGTarget* t, Operand addr, i32* out_off, u32 tmp_reg) {
(int)addr.kind);
}
+/* True when the symbol must be reached via the GOT at this site: an
+ * undefined external on a target format that binds extern data through
+ * GOT indirection (Mach-O always; ELF when compiling -fPIC/-fPIE).
+ * Mirrors aarch64.c:use_got_for_sym / x64.c:x64_use_got_for_sym. */
+static int rv64_use_got_for_sym(CGTarget* t, ObjSymId sym) {
+ const ObjSym* s;
+ if (!obj_format_extern_via_got(t->c)) return 0;
+ s = obj_symbol_get(t->obj, sym);
+ return s && s->section_id == OBJ_SEC_NONE;
+}
+
/* Anchor symbol management for PCREL_LO12_*. Each AUIPC site gets a
* fresh local sym; the paired LO12 reloc references the anchor. */
static ObjSymId emit_pcrel_anchor(CGTarget* t, u32 sec, u32 auipc_pos) {
@@ -1089,6 +1100,39 @@ static ObjSymId emit_pcrel_anchor(CGTarget* t, u32 sec, u32 auipc_pos) {
return obj_symbol(t->obj, n, SB_LOCAL, SK_OBJ, sec, (u64)auipc_pos, 0);
}
+/* Emit `auipc dst, %got_pcrel_hi(sym) ; ld dst, %pcrel_lo(.)(dst)`,
+ * leaving the runtime address of `sym` (the GOT slot's contents) in
+ * `dst_reg`. Addends are omitted from the GOT relocs — most loaders
+ * disallow nonzero addends on GOT-load fixups — so callers apply any
+ * displacement with a follow-on ADDI/ADD against the loaded base. */
+static void emit_got_load_addr(CGTarget* t, u32 dst_reg, ObjSymId sym) {
+ MCEmitter* mc = t->mc;
+ u32 sec = mc->section_id;
+ u32 ap = mc->pos(mc);
+ emit32(mc, rv_auipc(dst_reg, 0));
+ mc->emit_reloc_at(mc, sec, ap, R_RV_GOT_HI20, sym, 0, 0, 0);
+ ObjSymId anchor = emit_pcrel_anchor(t, sec, ap);
+ u32 lp = mc->pos(mc);
+ emit32(mc, rv_ld(dst_reg, dst_reg, 0));
+ mc->emit_reloc_at(mc, sec, lp, R_RV_PCREL_LO12_I, anchor, 0, 0, 0);
+}
+
+/* Add a signed displacement `off` to `base`, writing into `rd`. Uses
+ * ADDI for ±2047, otherwise materializes the offset via emit_load_imm
+ * + ADD. Mirrors emit_addr_adjust in aarch64.c. */
+static void emit_addr_adjust(MCEmitter* mc, u32 rd, u32 base, i32 off) {
+ if (off == 0) {
+ if (rd != base) emit32(mc, rv_addi(rd, base, 0));
+ return;
+ }
+ if (off >= -2048 && off <= 2047) {
+ emit32(mc, rv_addi(rd, base, off));
+ return;
+ }
+ emit_load_imm(mc, 1, RV_T1, (i64)off);
+ emit32(mc, rv_add(rd, base, RV_T1));
+}
+
static void rv_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) {
u32 sz = ma.size ? ma.size : type_byte_size(addr.type);
MCEmitter* mc = t->mc;
@@ -1097,6 +1141,22 @@ static void rv_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) {
u32 sec = mc->section_id;
ObjSymId sym = addr.v.global.sym;
i64 add = addr.v.global.addend;
+ /* Extern-via-GOT path: load &sym from GOT, then load the value at
+ * +addend (addend baked into the data load's imm12; relies on the
+ * common case of `add` fitting ±2047 — larger addends would need a
+ * follow-on ADD). */
+ if (rv64_use_got_for_sym(t, sym)) {
+ emit_got_load_addr(t, RV_T0, sym);
+ i32 ao = (i32)add;
+ if (dst.cls == RC_FP) {
+ if (sz == 8) emit32(mc, rv_fld(reg_num(dst), RV_T0, ao));
+ else emit32(mc, rv_flw(reg_num(dst), RV_T0, ao));
+ } else {
+ int sx = type_is_signed(addr.type);
+ emit32(mc, enc_int_load(sz, sx, reg_num(dst), RV_T0, ao));
+ }
+ return;
+ }
u32 ap = mc->pos(mc);
emit32(mc, rv_auipc(RV_T0, 0));
mc->emit_reloc_at(mc, sec, ap, R_RV_PCREL_HI20, sym, add, 0, 0);
@@ -1144,6 +1204,19 @@ static void rv_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) {
} else {
src_reg = reg_num(src);
}
+ /* Extern-via-GOT path: load &sym from GOT into t0, then store with
+ * addend baked into the imm12 (no reloc on the store). */
+ if (rv64_use_got_for_sym(t, sym)) {
+ emit_got_load_addr(t, RV_T0, sym);
+ i32 ao = (i32)add;
+ if (src_fp) {
+ if (sz == 8) emit32(mc, rv_fsd(src_reg, RV_T0, ao));
+ else emit32(mc, rv_fsw(src_reg, RV_T0, ao));
+ } else {
+ emit32(mc, enc_int_store(sz, src_reg, RV_T0, ao));
+ }
+ return;
+ }
u32 ap = mc->pos(mc);
emit32(mc, rv_auipc(RV_T0, 0));
mc->emit_reloc_at(mc, sec, ap, R_RV_PCREL_HI20, sym, add, 0, 0);
@@ -1204,11 +1277,19 @@ static void rv_addr_of(CGTarget* t, Operand dst, Operand lv) {
return;
}
if (lv.kind == OPK_GLOBAL) {
+ ObjSymId sym = lv.v.global.sym;
+ i64 addend = lv.v.global.addend;
+ /* Extern-via-GOT path: GOT load yields &sym directly; apply any
+ * addend with a follow-on ADDI/ADD (GOT relocs disallow addends). */
+ if (rv64_use_got_for_sym(t, sym)) {
+ emit_got_load_addr(t, rd, sym);
+ if (addend) emit_addr_adjust(mc, rd, rd, (i32)addend);
+ return;
+ }
u32 sec = mc->section_id;
u32 ap = mc->pos(mc);
emit32(mc, rv_auipc(rd, 0));
- mc->emit_reloc_at(mc, sec, ap, R_RV_PCREL_HI20, lv.v.global.sym,
- lv.v.global.addend, 0, 0);
+ mc->emit_reloc_at(mc, sec, ap, R_RV_PCREL_HI20, sym, addend, 0, 0);
ObjSymId anchor = emit_pcrel_anchor(t, sec, ap);
u32 ip = mc->pos(mc);
emit32(mc, rv_addi(rd, rd, 0));
diff --git a/src/arch/x64.c b/src/arch/x64.c
@@ -1195,12 +1195,65 @@ static u32 addr_base(CGTarget* t, Operand addr, i32* out_off) {
(int)addr.kind);
}
-/* Emit `lea rd, [rip + disp32]` and attach an R_X64_PLT32 reloc on the
- * disp32 site. PLT32 is correct for both functions (linker may route
- * through PLT) and data symbols (resolves to the symbol directly when
- * no PLT is needed). Addend -4 because the PC is end-of-instruction. */
+/* True when the symbol must be reached via the GOT at this site:
+ * an undefined external on a format/PIC mode that binds extern data
+ * through indirection (Mach-O always; ELF when compiling -fPIC/-fPIE).
+ * Mirrors aarch64.c:use_got_for_sym. */
+static int x64_use_got_for_sym(CGTarget* t, ObjSymId sym) {
+ const ObjSym* s;
+ if (!obj_format_extern_via_got(t->c)) return 0;
+ s = obj_symbol_get(t->obj, sym);
+ return s && s->section_id == OBJ_SEC_NONE;
+}
+
+/* Materialize `&sym + addend` into `dst_reg`. For locally-defined or
+ * static-link extern symbols, emit `lea rd, [rip + disp32]` with
+ * R_X64_PLT32 (PLT32 collapses to a plain PC-relative LEA at link time
+ * — the PLT routing only fires when the linker actually needs the
+ * trampoline, i.e. function calls into a DSO). For undef externs in
+ * PIC/PIE we instead emit `mov rd, [rip + disp32]` against a GOT slot
+ * (R_X64_REX_GOTPCRELX) so the loader can resolve the symbol by
+ * patching a single slot rather than touching .text.
+ *
+ * Addend -4 because the PC is end-of-instruction. When routing
+ * through the GOT we omit any extra addend on the reloc (most loaders
+ * disallow nonzero addends on GOT-load fixups); a follow-up `add` /
+ * `lea` would have to add it after the load if the codegen needed
+ * `&sym + nonzero`. In practice the caller only ever passes
+ * addend=0 for global references that go through the GOT path. */
static void emit_global_lea(CGTarget* t, u32 dst_reg, ObjSymId sym,
i64 addend) {
+ if (x64_use_got_for_sym(t, sym)) {
+ /* mov rd, [rip + disp32] */
+ emit_rex(t->mc, 1, dst_reg, 0, 0);
+ u8 op = 0x8B;
+ t->mc->emit_bytes(t->mc, &op, 1);
+ u8 mr = modrm(0u, (dst_reg & 7u), 5u); /* [RIP + disp32] */
+ t->mc->emit_bytes(t->mc, &mr, 1);
+ u32 disp_pos = t->mc->pos(t->mc);
+ emit_u32le(t->mc, 0);
+ t->mc->emit_reloc_at(t->mc, t->mc->section_id, disp_pos,
+ R_X64_REX_GOTPCRELX, sym, -4, 1, 0);
+ /* Apply any nonzero addend by adjusting the loaded value. */
+ if (addend) {
+ i32 a = (i32)addend;
+ if (a >= -128 && a <= 127) {
+ /* add r/m64, imm8 (REX.W + 0x83 /0 ib) */
+ emit_rex(t->mc, 1, 0, 0, dst_reg);
+ u8 add_op[2] = {0x83, modrm(3u, 0u, (u8)(dst_reg & 7u))};
+ t->mc->emit_bytes(t->mc, add_op, 2);
+ u8 ib = (u8)a;
+ t->mc->emit_bytes(t->mc, &ib, 1);
+ } else {
+ /* add r/m64, imm32 (REX.W + 0x81 /0 id) */
+ emit_rex(t->mc, 1, 0, 0, dst_reg);
+ u8 add_op[2] = {0x81, modrm(3u, 0u, (u8)(dst_reg & 7u))};
+ t->mc->emit_bytes(t->mc, add_op, 2);
+ emit_u32le(t->mc, (u32)a);
+ }
+ }
+ return;
+ }
emit_rex(t->mc, 1, dst_reg, 0, 0);
u8 op = 0x8D;
t->mc->emit_bytes(t->mc, &op, 1);
diff --git a/src/link/link_dyn.c b/src/link/link_dyn.c
@@ -739,7 +739,7 @@ void layout_dyn(Linker* l, LinkImage* img) {
* stp x16, x30, [sp,#-16]! : 0xa9bf7bf0
* nop : 0xD503201F
*/
- {
+ if (l->c->target.arch == CFREE_ARCH_ARM_64) {
u8* plt_b = img->segment_bytes[rx_seg_idx];
/* PLT0: load .got.plt[2] (resolver) into x17 and tail-call. */
u64 plt0_pc = dyn->plt_vaddr + 4u;
@@ -776,6 +776,71 @@ void layout_dyn(Linker* l, LinkImage* img) {
wr_u32_le(p + 8, 0x91000210u | (e_lo12 << 10));
wr_u32_le(p + 12, 0xD61F0220u);
}
+ } else if (l->c->target.arch == CFREE_ARCH_X86_64) {
+ /* x86_64 PLT layout under DF_1_NOW:
+ *
+ * PLT0 (32 B): emitted as the canonical lazy-resolve trampoline
+ * for disassembler readability. Loaders patch every .got.plt
+ * slot from .rela.plt before user code, so PLT0 itself never
+ * runs. We just fill it with NOPs (0x90) — that's
+ * self-documenting and trivially well-formed.
+ *
+ * per-import (16 B), entry i targets .got.plt[3 + i]:
+ * ff 25 disp32 ; jmpq *[rip + disp_to_slot]
+ * 0f 1f 84 00 00 00 00 00 ; 8-byte NOP (Intel "long nop")
+ * 90 90 90 ; pad to 16
+ *
+ * The disp32 PC base is the END of the jmp (entry_vaddr + 6). */
+ u8* plt_b = img->segment_bytes[rx_seg_idx];
+ memset(plt_b, 0x90, (size_t)plt_bytes); /* default NOP pad */
+ u32 ki;
+ for (ki = 0; ki < imports.nfuncs; ++ki) {
+ u64 entry_vaddr = dyn->plt_vaddr + 32u + 16u * (u64)ki;
+ u64 slot_vaddr = dyn->got_plt_vaddr + 8u * (3u + ki);
+ i64 disp = (i64)slot_vaddr - (i64)(entry_vaddr + 6u);
+ u8* p = plt_b + 32u + 16u * (u64)ki;
+ p[0] = 0xff;
+ p[1] = 0x25;
+ wr_u32_le(p + 2, (u32)((u64)disp & 0xffffffffu));
+ /* p[6..15] left as 0x90 from the memset above. */
+ }
+ } else if (l->c->target.arch == CFREE_ARCH_RV64) {
+ /* RISC-V psABI PLT under DF_1_NOW. Each entry resolves through
+ * the GOT slot the loader pre-fills via R_RISCV_JUMP_SLOT:
+ *
+ * auipc t3, %pcrel_hi(slot)
+ * ld t3, %pcrel_lo(.)(t3)
+ * jalr t1, t3 ; tail-call (t1 trashed)
+ * nop ; 16-byte alignment pad
+ *
+ * Encoded as raw u32 instructions. AUIPC operand carries the
+ * usual +0x800 bias so the LO12 in the load sign-extends
+ * correctly. PLT0 is left as canonical nops (32 bytes of
+ * 0x00000013) — never executed under BIND_NOW. */
+ u8* plt_b = img->segment_bytes[rx_seg_idx];
+ u32 ki;
+ u32 i;
+ for (i = 0; i < (u32)plt_bytes; i += 4) wr_u32_le(plt_b + i, 0x00000013u);
+ for (ki = 0; ki < imports.nfuncs; ++ki) {
+ u64 entry_vaddr = dyn->plt_vaddr + 32u + 16u * (u64)ki;
+ u64 slot_vaddr = dyn->got_plt_vaddr + 8u * (3u + ki);
+ i64 disp = (i64)slot_vaddr - (i64)entry_vaddr;
+ u32 hi20 = (u32)(((u64)(disp + 0x800)) >> 12) & 0xfffffu;
+ u32 lo12 = (u32)((u64)disp & 0xfffu);
+ u8* p = plt_b + 32u + 16u * (u64)ki;
+ /* auipc t3, hi20 ; opcode 0x17, rd=t3(28). */
+ wr_u32_le(p + 0, 0x00000e17u | (hi20 << 12));
+ /* ld t3, lo12(t3) ; opcode 0x03, funct3=3, rs1=t3, rd=t3. */
+ wr_u32_le(p + 4, 0x000e3e03u | (lo12 << 20));
+ /* jalr t1, 0(t3) ; opcode 0x67, funct3=0, rs1=t3, rd=t1(6). */
+ wr_u32_le(p + 8, 0x000e0367u);
+ /* nop */
+ wr_u32_le(p + 12, 0x00000013u);
+ }
+ } else {
+ compiler_panic(l->c, no_loc(),
+ "link: PLT emit for arch %u not implemented",
+ (u32)l->c->target.arch);
}
}
@@ -928,7 +993,25 @@ void layout_dyn(Linker* l, LinkImage* img) {
u64 plt_entry_vaddr = dyn->plt_vaddr + 32u + 16u * (u64)ki;
DynRela* r = &dyn->rela_plt[ki];
r->r_offset = slot_vaddr;
- r->r_info = ELF64_R_INFO((u64)dynidx, ELF_R_AARCH64_JUMP_SLOT);
+ {
+ u32 jt;
+ switch (l->c->target.arch) {
+ case CFREE_ARCH_ARM_64:
+ jt = ELF_R_AARCH64_JUMP_SLOT;
+ break;
+ case CFREE_ARCH_X86_64:
+ jt = ELF_R_X86_64_JUMP_SLOT;
+ break;
+ case CFREE_ARCH_RV64:
+ jt = ELF_R_RISCV_JUMP_SLOT;
+ break;
+ default:
+ compiler_panic(l->c, no_loc(),
+ "link: JUMP_SLOT type for arch %u not configured",
+ (u32)l->c->target.arch);
+ }
+ r->r_info = ELF64_R_INFO((u64)dynidx, jt);
+ }
r->r_addend = 0;
/* Serialize into segment bytes (will be re-serialized post-shift). */
u8* p = ro_bytes + rela_plt_off + (u64)ki * 24u;
@@ -966,6 +1049,16 @@ void layout_dyn(Linker* l, LinkImage* img) {
* post-shift vaddrs of every other dyn section and writes one
* DT_* entry per index. */
+ /* Synthesize linker-defined symbols that reference the .dynamic
+ * vaddr. Scrt1.o on Linux loads `_DYNAMIC` via ADRP+ADD, and
+ * libc_nonshared.a's atexit shim takes `__dso_handle` as the
+ * per-image identity (we use the .dynamic vaddr — any stable
+ * per-image address satisfies the contract since the shim only
+ * passes it through to __cxa_atexit, which the program-side glibc
+ * just stashes). */
+ link_define_boundary(l, img, "_DYNAMIC", dyn->dynamic_vaddr);
+ link_define_boundary(l, img, "__dso_handle", dyn->dynamic_vaddr);
+
free_imports(h, &imports);
}
diff --git a/src/link/link_elf.c b/src/link/link_elf.c
@@ -229,8 +229,14 @@ static int reloc_is_x64_tlsle(RelocKind k) {
static int reloc_is_abs(RelocKind k) { return k == R_ABS32 || k == R_ABS64; }
+/* Function-call relocs that may route through the PLT when the target
+ * is imported. aarch64 CALL26/JUMP26, x86_64 PLT32, and risc-v CALL_PLT
+ * (which cfree maps to R_PLT32) all carry the "call this address; if
+ * it's not resolvable here use the PLT trampoline" contract; the apply
+ * pass overwrites S with the PLT entry vaddr in that case. */
static int reloc_is_branch26(RelocKind k) {
- return k == R_AARCH64_CALL26 || k == R_AARCH64_JUMP26;
+ return k == R_AARCH64_CALL26 || k == R_AARCH64_JUMP26 ||
+ k == R_X64_PLT32 || k == R_PLT32 || k == R_RV_CALL;
}
static void emit_dyn_record(LinkImage* img, u64 site_vaddr, u32 reloc_type,
@@ -249,13 +255,38 @@ static void emit_dyn_record(LinkImage* img, u64 site_vaddr, u32 reloc_type,
r->r_addend = addend;
}
+/* Dynamic-reloc type numbers are arch-specific (aarch64 starts at 1024;
+ * x86_64 lives in the low single-digit range; risc-v has its own
+ * encoding). Pick the right constant for the active target.arch so the
+ * loader recognizes our .rela.dyn entries. */
+static u32 dyn_reloc_type(LinkImage* img, u32 aarch64, u32 x86_64, u32 rv64) {
+ switch (img->c->target.arch) {
+ case CFREE_ARCH_ARM_64:
+ return aarch64;
+ case CFREE_ARCH_X86_64:
+ return x86_64;
+ case CFREE_ARCH_RV64:
+ return rv64;
+ default:
+ compiler_panic(img->c, no_loc(),
+ "link: dyn reloc type for arch %u not configured",
+ (u32)img->c->target.arch);
+ }
+}
+
static void emit_relative_record(LinkImage* img, u64 site_vaddr, u64 addend) {
- emit_dyn_record(img, site_vaddr, ELF_R_AARCH64_RELATIVE, 0, (i64)addend);
+ u32 t = dyn_reloc_type(img, ELF_R_AARCH64_RELATIVE, ELF_R_X86_64_RELATIVE,
+ ELF_R_RISCV_RELATIVE);
+ emit_dyn_record(img, site_vaddr, t, 0, (i64)addend);
}
static void emit_globdat_record(LinkImage* img, u64 site_vaddr, u32 dynidx,
i64 addend) {
- emit_dyn_record(img, site_vaddr, ELF_R_AARCH64_GLOB_DAT, dynidx, addend);
+ u32 t = dyn_reloc_type(img, ELF_R_AARCH64_GLOB_DAT, ELF_R_X86_64_GLOB_DAT,
+ /* RISC-V dyn loader uses R_RISCV_64 for GOT-slot
+ * data imports — no dedicated GLOB_DAT type. */
+ ELF_R_RISCV_64);
+ emit_dyn_record(img, site_vaddr, t, dynidx, addend);
}
/* RISC-V PCREL_LO12_* references the address of an AUIPC carrying the
@@ -392,9 +423,15 @@ static void apply_all_relocs(LinkImage* img, u64 img_base) {
* fill saves a write. */
continue;
}
- compiler_panic(img->c, no_loc(),
- "link: unhandled reloc kind %u against imported symbol",
- (unsigned)r->kind);
+ {
+ size_t nl = 0;
+ const char* nm =
+ tgt->name ? pool_str(img->c->global, tgt->name, &nl) : "";
+ compiler_panic(
+ img->c, no_loc(),
+ "link: unhandled reloc kind %u against imported symbol '%.*s'",
+ (unsigned)r->kind, (int)nl, nm);
+ }
}
/* PIE: an absolute reloc against a defined non-imported symbol
diff --git a/src/link/link_internal.h b/src/link/link_internal.h
@@ -120,6 +120,14 @@ void link_ingest_archives(struct Linker*);
void layout_dyn(struct Linker*, LinkImage*);
void link_dyn_state_free(LinkImage*);
+/* Define / upsert a synthetic global symbol resolved to `vaddr`.
+ * Satisfies any prior undef ref (e.g. _DYNAMIC from Scrt1.o,
+ * __dso_handle from libc_nonshared.a) and fans out across per-input
+ * duplicate name slots so emit_reloc_records sees the resolved
+ * vaddr. Implemented in link_layout.c. */
+void link_define_boundary(struct Linker*, LinkImage*, const char* name,
+ u64 vaddr);
+
/* SegVec instances for image-owned tables. Pointers returned by *_at /
* *_push remain valid for the LinkImage's lifetime. */
SEGVEC_DEFINE(LinkSyms, LinkSymbol, 6); /* 64 entries per segment */
diff --git a/src/link/link_layout.c b/src/link/link_layout.c
@@ -1598,6 +1598,11 @@ static void link_symbols_to_sections(Linker* l, LinkImage* img) {
/* ---- pass 3b: linker-synthesized boundary symbols ---- */
+void link_define_boundary(Linker* l, LinkImage* img, const char* name,
+ u64 vaddr) {
+ emit_boundary_sym(l, img, name, vaddr);
+}
+
static void emit_boundary_sym(Linker* l, LinkImage* img, const char* name,
u64 vaddr) {
Sym sym = boundary_name(l, name);
@@ -1640,6 +1645,11 @@ static void emit_boundary_sym(Linker* l, LinkImage* img, const char* name,
s->vaddr = vaddr;
s->kind = SK_OBJ;
s->defined = 1;
+ /* If resolve_undefs previously matched this name as an import
+ * from a DSO (e.g. libc.so exports _DYNAMIC for its own image),
+ * the linker-supplied definition wins — clear the import marker
+ * so apply_all_relocs treats it as a normal defined symbol. */
+ s->imported = 0;
}
}
@@ -1832,6 +1842,10 @@ static u8 reloc_width(RelocKind k) {
case R_X64_PLT32:
case R_X64_32S:
case R_X64_TPOFF32:
+ case R_X64_GOTPCREL:
+ case R_X64_GOTPCRELX:
+ case R_X64_REX_GOTPCRELX:
+ case R_X64_GOTPC32:
return 4;
case R_ABS64:
case R_REL64:
@@ -1883,13 +1897,43 @@ static u8 reloc_width(RelocKind k) {
case R_RV_RELAX:
case R_RV_TPREL_ADD:
return 4;
+ /* RISC-V ADD/SUB/SET fixup relocs — modify the section bytes in
+ * place. Width is the byte count touched. SUB6/SET6 modify one
+ * byte (the low 6 bits) like SET8/SUB8. */
+ case R_RV_ADD8:
+ case R_RV_SUB8:
+ case R_RV_SUB6:
+ case R_RV_SET6:
+ case R_RV_SET8:
+ return 1;
+ case R_RV_ADD16:
+ case R_RV_SUB16:
+ case R_RV_SET16:
+ return 2;
+ case R_RV_ADD32:
+ case R_RV_SUB32:
+ case R_RV_SET32:
+ return 4;
+ case R_RV_ADD64:
+ case R_RV_SUB64:
+ return 8;
default:
return 0;
}
}
static int reloc_uses_got(u16 kind) {
- return kind == R_AARCH64_ADR_GOT_PAGE || kind == R_AARCH64_LD64_GOT_LO12_NC;
+ switch (kind) {
+ case R_AARCH64_ADR_GOT_PAGE:
+ case R_AARCH64_LD64_GOT_LO12_NC:
+ case R_X64_GOTPCREL:
+ case R_X64_GOTPCRELX:
+ case R_X64_REX_GOTPCRELX:
+ case R_RV_GOT_HI20:
+ return 1;
+ default:
+ return 0;
+ }
}
static void emit_reloc_records(Linker* l, LinkImage* img,
@@ -1910,10 +1954,13 @@ static void emit_reloc_records(Linker* l, LinkImage* img,
if (!s || !section_kept(s)) continue;
/* Skip relocs whose containing section was GC'd. */
if (m->section[r->section_id] == LINK_SEC_NONE) continue;
- /* RISC-V marker relocs (RELAX, TPREL_ADD) reference no symbol —
- * they annotate the prior reloc for relaxation or TLS-add folding.
+ /* RISC-V marker relocs (RELAX, TPREL_ADD, ALIGN) reference no
+ * symbol — they annotate the prior reloc for relaxation, TLS
+ * thread-pointer ADD folding, or alignment-aware code shrinking.
* We don't relax, so drop them entirely. */
- if (r->kind == R_RV_RELAX || r->kind == R_RV_TPREL_ADD) continue;
+ if (r->kind == R_RV_RELAX || r->kind == R_RV_TPREL_ADD ||
+ r->kind == R_RV_ALIGN)
+ continue;
if (r->sym == OBJ_SYM_NONE || r->sym >= m->nsym)
compiler_panic(l->c, no_loc(), "link: reloc references unknown symbol");
target = m->sym[r->sym];
@@ -2916,6 +2963,41 @@ LinkImage* link_resolve(Linker* l) {
emit_array_boundaries(l, img);
emit_tls_boundaries(l, img);
emit_encoding_section_boundaries(l, img);
+ /* Linker-defined synthetic symbols that may be referenced as
+ * undefs (often hidden) by sysroot startfiles / nonshared archives.
+ * Pre-defining them here satisfies resolve_undefs' undef sweep so
+ * it doesn't panic on hidden-undef references that no object or
+ * DSO supplies. vaddr=0 is a placeholder; layout_dyn may refine
+ * _DYNAMIC to the actual .dynamic vaddr later. */
+ emit_boundary_sym(l, img, "__dso_handle", 0);
+ emit_boundary_sym(l, img, "_DYNAMIC", 0);
+ /* _GLOBAL_OFFSET_TABLE_ is referenced as a SHN_UNDEF marker by
+ * any x86_64 input that uses the GOT (musl/glibc libc.a routinely
+ * do). GNU ld auto-defines it at the .got base; cfree-ld doesn't
+ * use the symbol for any actual reloc, so a placeholder vaddr=0
+ * keeps the undef sweep happy without affecting code that
+ * computes GOT addresses through their own GOTPC32 relocs. */
+ emit_boundary_sym(l, img, "_GLOBAL_OFFSET_TABLE_", 0);
+ /* RISC-V startfiles use `__global_pointer$` to load gp in _start;
+ * the RISC-V psABI says it's defined as `.sdata + 0x800` so
+ * gp-relative addressing covers [.sdata - 2KiB, .sdata + 2KiB).
+ * We don't have .sdata as a distinct section, but any address in
+ * the writable data region is functionally adequate when the code
+ * doesn't actually use gp-relative addressing (cfree-cc doesn't
+ * emit `-mrelax`, and musl's static crt only loads gp without
+ * dereferencing through it). Pick the first RW segment base +
+ * 0x800. Only relevant for rv64; harmless on other arches. */
+ if (l->c->target.arch == CFREE_ARCH_RV64) {
+ u32 si;
+ u64 gp_vaddr = 0;
+ for (si = 0; si < img->nsegments; ++si) {
+ if (img->segments[si].flags & SF_WRITE) {
+ gp_vaddr = img->segments[si].vaddr + 0x800u;
+ break;
+ }
+ }
+ emit_boundary_sym(l, img, "__global_pointer$", gp_vaddr);
+ }
resolve_undefs(l, img);
gc_drop_dead_globals(l, img, &g);
/* layout_iplt runs last among the symbol-shaping passes: it
diff --git a/src/link/link_reloc.c b/src/link/link_reloc.c
@@ -46,7 +46,11 @@ void link_reloc_apply(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A,
}
case R_REL32:
case R_PC32:
- case R_X64_PLT32: {
+ case R_X64_PLT32:
+ case R_X64_GOTPCREL:
+ case R_X64_GOTPCRELX:
+ case R_X64_REX_GOTPCRELX:
+ case R_X64_GOTPC32: {
/* AArch64 ELF: PREL32 maps to either of these; both encode a
* 32-bit signed PC-relative displacement. The cfree-canonical
* distinction (section-relative vs PC-relative) collapses on
@@ -411,6 +415,69 @@ void link_reloc_apply(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A,
* the linker may fold during relaxation. We don't relax, so
* both are no-ops. */
return;
+ case R_RV_ADD8: {
+ /* word8 += S + A. Used (paired with a SUB8 against another sym
+ * at the same site) to encode symbol differences. */
+ u8 cur = P_bytes[0];
+ P_bytes[0] = (u8)(cur + (u8)((S + (u64)A) & 0xffu));
+ return;
+ }
+ case R_RV_SUB8: {
+ u8 cur = P_bytes[0];
+ P_bytes[0] = (u8)(cur - (u8)((S + (u64)A) & 0xffu));
+ return;
+ }
+ case R_RV_ADD16: {
+ u16 cur = rd_u16_le(P_bytes);
+ wr_u16_le(P_bytes, (u16)(cur + (u16)((S + (u64)A) & 0xffffu)));
+ return;
+ }
+ case R_RV_SUB16: {
+ u16 cur = rd_u16_le(P_bytes);
+ wr_u16_le(P_bytes, (u16)(cur - (u16)((S + (u64)A) & 0xffffu)));
+ return;
+ }
+ case R_RV_ADD32: {
+ u32 cur = rd_u32_le(P_bytes);
+ wr_u32_le(P_bytes, (u32)(cur + (u32)((S + (u64)A) & 0xffffffffu)));
+ return;
+ }
+ case R_RV_SUB32: {
+ u32 cur = rd_u32_le(P_bytes);
+ wr_u32_le(P_bytes, (u32)(cur - (u32)((S + (u64)A) & 0xffffffffu)));
+ return;
+ }
+ case R_RV_ADD64: {
+ u64 cur = rd_u64_le(P_bytes);
+ wr_u64_le(P_bytes, cur + S + (u64)A);
+ return;
+ }
+ case R_RV_SUB64: {
+ u64 cur = rd_u64_le(P_bytes);
+ wr_u64_le(P_bytes, cur - S - (u64)A);
+ return;
+ }
+ case R_RV_SUB6: {
+ /* Bottom 6 bits of byte = (byte - (S + A)) & 0x3f. */
+ u8 cur = P_bytes[0];
+ u8 v = (u8)((cur & 0x3fu) - (u8)((S + (u64)A) & 0x3fu));
+ P_bytes[0] = (u8)((cur & 0xc0u) | (v & 0x3fu));
+ return;
+ }
+ case R_RV_SET6: {
+ u8 cur = P_bytes[0];
+ P_bytes[0] = (u8)((cur & 0xc0u) | (u8)((S + (u64)A) & 0x3fu));
+ return;
+ }
+ case R_RV_SET8:
+ P_bytes[0] = (u8)((S + (u64)A) & 0xffu);
+ return;
+ case R_RV_SET16:
+ wr_u16_le(P_bytes, (u16)((S + (u64)A) & 0xffffu));
+ return;
+ case R_RV_SET32:
+ wr_u32_le(P_bytes, (u32)((S + (u64)A) & 0xffffffffu));
+ return;
default:
compiler_panic(c, no_loc(),
"link: unsupported reloc kind %u",
diff --git a/src/obj/elf.h b/src/obj/elf.h
@@ -331,6 +331,8 @@ u32 elf_x86_64_reloc_from(u32 elf_type);
#define ELF_R_RISCV_SET16 55
#define ELF_R_RISCV_SET32 56
#define ELF_R_RISCV_32_PCREL 57
+#define ELF_R_RISCV_SET_ULEB128 60
+#define ELF_R_RISCV_SUB_ULEB128 61
u32 elf_riscv64_reloc_to(u32 kind /* RelocKind */);
u32 elf_riscv64_reloc_from(u32 elf_type);
diff --git a/src/obj/elf_reloc_riscv64.c b/src/obj/elf_reloc_riscv64.c
@@ -85,6 +85,10 @@ u32 elf_riscv64_reloc_to(u32 kind /* RelocKind */) {
return ELF_R_RISCV_SET16;
case R_RV_SET32:
return ELF_R_RISCV_SET32;
+ case R_RV_SET_ULEB128:
+ return ELF_R_RISCV_SET_ULEB128;
+ case R_RV_SUB_ULEB128:
+ return ELF_R_RISCV_SUB_ULEB128;
default:
return ELF_R_RISCV_NONE;
}
@@ -164,6 +168,10 @@ u32 elf_riscv64_reloc_from(u32 elf_type) {
return R_RV_SET16;
case ELF_R_RISCV_SET32:
return R_RV_SET32;
+ case ELF_R_RISCV_SET_ULEB128:
+ return R_RV_SET_ULEB128;
+ case ELF_R_RISCV_SUB_ULEB128:
+ return R_RV_SUB_ULEB128;
default:
return (u32)-1; /* sentinel */
}
diff --git a/src/obj/obj.h b/src/obj/obj.h
@@ -201,6 +201,8 @@ typedef enum RelocKind {
R_RV_SET8,
R_RV_SET16,
R_RV_SET32,
+ R_RV_SET_ULEB128,
+ R_RV_SUB_ULEB128,
R_WASM_FUNCIDX,
R_WASM_TABLEIDX,
R_WASM_MEMOFS,
diff --git a/src/obj/obj_secnames.c b/src/obj/obj_secnames.c
@@ -91,5 +91,19 @@ Sym obj_secname_tbss(Compiler* c) {
}
int obj_format_extern_via_got(const Compiler* c) {
- return c->target.obj == CFREE_OBJ_MACHO;
+ /* Mach-O always binds extern data through __got / non-lazy pointers
+ * — direct ADRP+ADD to an imported symbol isn't representable in
+ * ld64's reloc set.
+ *
+ * ELF static link: extern data is resolved at link time, so direct
+ * page-relative addressing works (linker patches the ADRP+ADD).
+ *
+ * ELF -fPIC / -fPIE: extern data may resolve to a symbol defined
+ * in a DSO at runtime; the codegen must route through the GOT so
+ * the loader can patch a single slot rather than touching .text. */
+ if (c->target.obj == CFREE_OBJ_MACHO) return 1;
+ if (c->target.obj == CFREE_OBJ_ELF &&
+ (c->target.pic == CFREE_PIC_PIC || c->target.pic == CFREE_PIC_PIE))
+ return 1;
+ return 0;
}