kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 305d49af9c1d7c268a4db82e443d796923046ed1
parent b6df55cdbea665c89a313a54e17804032f8512c0
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon, 11 May 2026 05:37:39 -0700

libc: rv64/x64 GOT codegen + PIE link plumbing — full 10-cell test-libc green

Closes the 9-cell Linux matrix (musl-static, musl-dynamic, glibc-dynamic
× x64, aarch64, rv64) plus the existing darwin cell — 70/70 pass.

Codegen:
- arch/rv64: route OPK_GLOBAL through R_RV_GOT_HI20 + R_RV_PCREL_LO12_I
  in rv_load/rv_store/rv_addr_of when extern_via_got and the symbol is
  imported. Mirrors x64 GOTPCRELX / aa64 ADR_GOT_PAGE shape.
- arch/x64: x64_use_got_for_sym + REX_GOTPCRELX load path for extern
  data on ELF PIE.
- obj/obj_secnames: obj_format_extern_via_got returns true for ELF PIC/PIE
  (was Mach-O only).

Linker:
- link_layout: GOT slot recognition for x64 GOTPCREL family + R_RV_GOT_HI20;
  pre-resolve synth of __dso_handle, _DYNAMIC, _GLOBAL_OFFSET_TABLE_, and
  rv64 __global_pointer$ (RW base + 0x800).
- link_elf: arch-dispatched RELATIVE/GLOB_DAT/JUMP_SLOT dyn-reloc types;
  reloc_is_branch26 covers R_X64_PLT32 / R_PLT32 / R_RV_CALL so imported
  function calls route through the PLT.
- link_dyn: x64 PLT stub (ff 25 disp32 + nop pad) and rv64 PLT stub
  (auipc/ld/jalr) emission; JUMP_SLOT type per arch; refinement of
  _DYNAMIC / __dso_handle once dynamic_vaddr is known.
- link_reloc: apply paths for x64 GOTPCREL{,X}/REX_GOTPCRELX/GOTPC32 and
  rv64 ADD/SUB/SET{6,8,16,32} + ULEB128 fixups.

Object reader:
- elf/elf_reloc_riscv64: R_RISCV_SET_ULEB128 (60) / R_RISCV_SUB_ULEB128 (61)
  for musl rv64 libc.a debug sections.

Hosted shim build:
- Makefile: build/cfree_hosted/linux-*.o with -fPIE -fpic so static and
  dynamic Linux cells share one shim.
- rt/Makefile: riscv64-linux variant with LDBL128=1 (rv64 lp64d uses
  binary128 long double).

Diffstat:
MMakefile | 6+++---
Mrt/Makefile | 1+
Msrc/api/pipeline.c | 4++++
Msrc/arch/rv64.c | 85+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Msrc/arch/x64.c | 61+++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
Msrc/link/link_dyn.c | 97+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Msrc/link/link_elf.c | 49+++++++++++++++++++++++++++++++++++++++++++------
Msrc/link/link_internal.h | 8++++++++
Msrc/link/link_layout.c | 90+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
Msrc/link/link_reloc.c | 69++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Msrc/obj/elf.h | 2++
Msrc/obj/elf_reloc_riscv64.c | 8++++++++
Msrc/obj/obj.h | 2++
Msrc/obj/obj_secnames.c | 16+++++++++++++++-
14 files changed, 475 insertions(+), 23 deletions(-)

diff --git a/Makefile b/Makefile @@ -106,15 +106,15 @@ hosted-linux-rv64: $(HOSTED_LINUX_RV64_OBJ) $(HOSTED_LINUX_AARCH64_OBJ): $(HOSTED_LINUX_SRC) $(BIN) @mkdir -p $(dir $@) - $(BIN) cc -target aarch64-linux -c $< -o $@ + $(BIN) cc -target aarch64-linux -fPIE -fpic -c $< -o $@ $(HOSTED_LINUX_X64_OBJ): $(HOSTED_LINUX_SRC) $(BIN) @mkdir -p $(dir $@) - $(BIN) cc -target x86_64-linux -c $< -o $@ + $(BIN) cc -target x86_64-linux -fPIE -fpic -c $< -o $@ $(HOSTED_LINUX_RV64_OBJ): $(HOSTED_LINUX_SRC) $(BIN) @mkdir -p $(dir $@) - $(BIN) cc -target riscv64-linux -c $< -o $@ + $(BIN) cc -target riscv64-linux -fPIE -fpic -c $< -o $@ # Replace the archive (`ar rcs` only adds/updates), so removing a .c file # also removes its .o from the archive on the next build. diff --git a/rt/Makefile b/rt/Makefile @@ -72,6 +72,7 @@ ifeq ($(VARIANT),riscv64-linux) ABI = lp64 INT128 = 1 CORO = riscv64 + LDBL128 = 1 ARCH_FLAGS = -mabi=lp64d -march=rv64imafd endif ifeq ($(VARIANT),riscv64-elf) diff --git a/src/api/pipeline.c b/src/api/pipeline.c @@ -1148,6 +1148,10 @@ static const char* reloc_kind_name(u16 kind) { return "R_RISCV_SET16"; case R_RV_SET32: return "R_RISCV_SET32"; + case R_RV_SET_ULEB128: + return "R_RISCV_SET_ULEB128"; + case R_RV_SUB_ULEB128: + return "R_RISCV_SUB_ULEB128"; case R_WASM_FUNCIDX: return "R_WASM_FUNCTION_INDEX_LEB"; case R_WASM_TABLEIDX: diff --git a/src/arch/rv64.c b/src/arch/rv64.c @@ -1071,6 +1071,17 @@ static u32 addr_base(CGTarget* t, Operand addr, i32* out_off, u32 tmp_reg) { (int)addr.kind); } +/* True when the symbol must be reached via the GOT at this site: an + * undefined external on a target format that binds extern data through + * GOT indirection (Mach-O always; ELF when compiling -fPIC/-fPIE). + * Mirrors aarch64.c:use_got_for_sym / x64.c:x64_use_got_for_sym. */ +static int rv64_use_got_for_sym(CGTarget* t, ObjSymId sym) { + const ObjSym* s; + if (!obj_format_extern_via_got(t->c)) return 0; + s = obj_symbol_get(t->obj, sym); + return s && s->section_id == OBJ_SEC_NONE; +} + /* Anchor symbol management for PCREL_LO12_*. Each AUIPC site gets a * fresh local sym; the paired LO12 reloc references the anchor. */ static ObjSymId emit_pcrel_anchor(CGTarget* t, u32 sec, u32 auipc_pos) { @@ -1089,6 +1100,39 @@ static ObjSymId emit_pcrel_anchor(CGTarget* t, u32 sec, u32 auipc_pos) { return obj_symbol(t->obj, n, SB_LOCAL, SK_OBJ, sec, (u64)auipc_pos, 0); } +/* Emit `auipc dst, %got_pcrel_hi(sym) ; ld dst, %pcrel_lo(.)(dst)`, + * leaving the runtime address of `sym` (the GOT slot's contents) in + * `dst_reg`. Addends are omitted from the GOT relocs — most loaders + * disallow nonzero addends on GOT-load fixups — so callers apply any + * displacement with a follow-on ADDI/ADD against the loaded base. */ +static void emit_got_load_addr(CGTarget* t, u32 dst_reg, ObjSymId sym) { + MCEmitter* mc = t->mc; + u32 sec = mc->section_id; + u32 ap = mc->pos(mc); + emit32(mc, rv_auipc(dst_reg, 0)); + mc->emit_reloc_at(mc, sec, ap, R_RV_GOT_HI20, sym, 0, 0, 0); + ObjSymId anchor = emit_pcrel_anchor(t, sec, ap); + u32 lp = mc->pos(mc); + emit32(mc, rv_ld(dst_reg, dst_reg, 0)); + mc->emit_reloc_at(mc, sec, lp, R_RV_PCREL_LO12_I, anchor, 0, 0, 0); +} + +/* Add a signed displacement `off` to `base`, writing into `rd`. Uses + * ADDI for ±2047, otherwise materializes the offset via emit_load_imm + * + ADD. Mirrors emit_addr_adjust in aarch64.c. */ +static void emit_addr_adjust(MCEmitter* mc, u32 rd, u32 base, i32 off) { + if (off == 0) { + if (rd != base) emit32(mc, rv_addi(rd, base, 0)); + return; + } + if (off >= -2048 && off <= 2047) { + emit32(mc, rv_addi(rd, base, off)); + return; + } + emit_load_imm(mc, 1, RV_T1, (i64)off); + emit32(mc, rv_add(rd, base, RV_T1)); +} + static void rv_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) { u32 sz = ma.size ? ma.size : type_byte_size(addr.type); MCEmitter* mc = t->mc; @@ -1097,6 +1141,22 @@ static void rv_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) { u32 sec = mc->section_id; ObjSymId sym = addr.v.global.sym; i64 add = addr.v.global.addend; + /* Extern-via-GOT path: load &sym from GOT, then load the value at + * +addend (addend baked into the data load's imm12; relies on the + * common case of `add` fitting ±2047 — larger addends would need a + * follow-on ADD). */ + if (rv64_use_got_for_sym(t, sym)) { + emit_got_load_addr(t, RV_T0, sym); + i32 ao = (i32)add; + if (dst.cls == RC_FP) { + if (sz == 8) emit32(mc, rv_fld(reg_num(dst), RV_T0, ao)); + else emit32(mc, rv_flw(reg_num(dst), RV_T0, ao)); + } else { + int sx = type_is_signed(addr.type); + emit32(mc, enc_int_load(sz, sx, reg_num(dst), RV_T0, ao)); + } + return; + } u32 ap = mc->pos(mc); emit32(mc, rv_auipc(RV_T0, 0)); mc->emit_reloc_at(mc, sec, ap, R_RV_PCREL_HI20, sym, add, 0, 0); @@ -1144,6 +1204,19 @@ static void rv_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) { } else { src_reg = reg_num(src); } + /* Extern-via-GOT path: load &sym from GOT into t0, then store with + * addend baked into the imm12 (no reloc on the store). */ + if (rv64_use_got_for_sym(t, sym)) { + emit_got_load_addr(t, RV_T0, sym); + i32 ao = (i32)add; + if (src_fp) { + if (sz == 8) emit32(mc, rv_fsd(src_reg, RV_T0, ao)); + else emit32(mc, rv_fsw(src_reg, RV_T0, ao)); + } else { + emit32(mc, enc_int_store(sz, src_reg, RV_T0, ao)); + } + return; + } u32 ap = mc->pos(mc); emit32(mc, rv_auipc(RV_T0, 0)); mc->emit_reloc_at(mc, sec, ap, R_RV_PCREL_HI20, sym, add, 0, 0); @@ -1204,11 +1277,19 @@ static void rv_addr_of(CGTarget* t, Operand dst, Operand lv) { return; } if (lv.kind == OPK_GLOBAL) { + ObjSymId sym = lv.v.global.sym; + i64 addend = lv.v.global.addend; + /* Extern-via-GOT path: GOT load yields &sym directly; apply any + * addend with a follow-on ADDI/ADD (GOT relocs disallow addends). */ + if (rv64_use_got_for_sym(t, sym)) { + emit_got_load_addr(t, rd, sym); + if (addend) emit_addr_adjust(mc, rd, rd, (i32)addend); + return; + } u32 sec = mc->section_id; u32 ap = mc->pos(mc); emit32(mc, rv_auipc(rd, 0)); - mc->emit_reloc_at(mc, sec, ap, R_RV_PCREL_HI20, lv.v.global.sym, - lv.v.global.addend, 0, 0); + mc->emit_reloc_at(mc, sec, ap, R_RV_PCREL_HI20, sym, addend, 0, 0); ObjSymId anchor = emit_pcrel_anchor(t, sec, ap); u32 ip = mc->pos(mc); emit32(mc, rv_addi(rd, rd, 0)); diff --git a/src/arch/x64.c b/src/arch/x64.c @@ -1195,12 +1195,65 @@ static u32 addr_base(CGTarget* t, Operand addr, i32* out_off) { (int)addr.kind); } -/* Emit `lea rd, [rip + disp32]` and attach an R_X64_PLT32 reloc on the - * disp32 site. PLT32 is correct for both functions (linker may route - * through PLT) and data symbols (resolves to the symbol directly when - * no PLT is needed). Addend -4 because the PC is end-of-instruction. */ +/* True when the symbol must be reached via the GOT at this site: + * an undefined external on a format/PIC mode that binds extern data + * through indirection (Mach-O always; ELF when compiling -fPIC/-fPIE). + * Mirrors aarch64.c:use_got_for_sym. */ +static int x64_use_got_for_sym(CGTarget* t, ObjSymId sym) { + const ObjSym* s; + if (!obj_format_extern_via_got(t->c)) return 0; + s = obj_symbol_get(t->obj, sym); + return s && s->section_id == OBJ_SEC_NONE; +} + +/* Materialize `&sym + addend` into `dst_reg`. For locally-defined or + * static-link extern symbols, emit `lea rd, [rip + disp32]` with + * R_X64_PLT32 (PLT32 collapses to a plain PC-relative LEA at link time + * — the PLT routing only fires when the linker actually needs the + * trampoline, i.e. function calls into a DSO). For undef externs in + * PIC/PIE we instead emit `mov rd, [rip + disp32]` against a GOT slot + * (R_X64_REX_GOTPCRELX) so the loader can resolve the symbol by + * patching a single slot rather than touching .text. + * + * Addend -4 because the PC is end-of-instruction. When routing + * through the GOT we omit any extra addend on the reloc (most loaders + * disallow nonzero addends on GOT-load fixups); a follow-up `add` / + * `lea` would have to add it after the load if the codegen needed + * `&sym + nonzero`. In practice the caller only ever passes + * addend=0 for global references that go through the GOT path. */ static void emit_global_lea(CGTarget* t, u32 dst_reg, ObjSymId sym, i64 addend) { + if (x64_use_got_for_sym(t, sym)) { + /* mov rd, [rip + disp32] */ + emit_rex(t->mc, 1, dst_reg, 0, 0); + u8 op = 0x8B; + t->mc->emit_bytes(t->mc, &op, 1); + u8 mr = modrm(0u, (dst_reg & 7u), 5u); /* [RIP + disp32] */ + t->mc->emit_bytes(t->mc, &mr, 1); + u32 disp_pos = t->mc->pos(t->mc); + emit_u32le(t->mc, 0); + t->mc->emit_reloc_at(t->mc, t->mc->section_id, disp_pos, + R_X64_REX_GOTPCRELX, sym, -4, 1, 0); + /* Apply any nonzero addend by adjusting the loaded value. */ + if (addend) { + i32 a = (i32)addend; + if (a >= -128 && a <= 127) { + /* add r/m64, imm8 (REX.W + 0x83 /0 ib) */ + emit_rex(t->mc, 1, 0, 0, dst_reg); + u8 add_op[2] = {0x83, modrm(3u, 0u, (u8)(dst_reg & 7u))}; + t->mc->emit_bytes(t->mc, add_op, 2); + u8 ib = (u8)a; + t->mc->emit_bytes(t->mc, &ib, 1); + } else { + /* add r/m64, imm32 (REX.W + 0x81 /0 id) */ + emit_rex(t->mc, 1, 0, 0, dst_reg); + u8 add_op[2] = {0x81, modrm(3u, 0u, (u8)(dst_reg & 7u))}; + t->mc->emit_bytes(t->mc, add_op, 2); + emit_u32le(t->mc, (u32)a); + } + } + return; + } emit_rex(t->mc, 1, dst_reg, 0, 0); u8 op = 0x8D; t->mc->emit_bytes(t->mc, &op, 1); diff --git a/src/link/link_dyn.c b/src/link/link_dyn.c @@ -739,7 +739,7 @@ void layout_dyn(Linker* l, LinkImage* img) { * stp x16, x30, [sp,#-16]! : 0xa9bf7bf0 * nop : 0xD503201F */ - { + if (l->c->target.arch == CFREE_ARCH_ARM_64) { u8* plt_b = img->segment_bytes[rx_seg_idx]; /* PLT0: load .got.plt[2] (resolver) into x17 and tail-call. */ u64 plt0_pc = dyn->plt_vaddr + 4u; @@ -776,6 +776,71 @@ void layout_dyn(Linker* l, LinkImage* img) { wr_u32_le(p + 8, 0x91000210u | (e_lo12 << 10)); wr_u32_le(p + 12, 0xD61F0220u); } + } else if (l->c->target.arch == CFREE_ARCH_X86_64) { + /* x86_64 PLT layout under DF_1_NOW: + * + * PLT0 (32 B): emitted as the canonical lazy-resolve trampoline + * for disassembler readability. Loaders patch every .got.plt + * slot from .rela.plt before user code, so PLT0 itself never + * runs. We just fill it with NOPs (0x90) — that's + * self-documenting and trivially well-formed. + * + * per-import (16 B), entry i targets .got.plt[3 + i]: + * ff 25 disp32 ; jmpq *[rip + disp_to_slot] + * 0f 1f 84 00 00 00 00 00 ; 8-byte NOP (Intel "long nop") + * 90 90 90 ; pad to 16 + * + * The disp32 PC base is the END of the jmp (entry_vaddr + 6). */ + u8* plt_b = img->segment_bytes[rx_seg_idx]; + memset(plt_b, 0x90, (size_t)plt_bytes); /* default NOP pad */ + u32 ki; + for (ki = 0; ki < imports.nfuncs; ++ki) { + u64 entry_vaddr = dyn->plt_vaddr + 32u + 16u * (u64)ki; + u64 slot_vaddr = dyn->got_plt_vaddr + 8u * (3u + ki); + i64 disp = (i64)slot_vaddr - (i64)(entry_vaddr + 6u); + u8* p = plt_b + 32u + 16u * (u64)ki; + p[0] = 0xff; + p[1] = 0x25; + wr_u32_le(p + 2, (u32)((u64)disp & 0xffffffffu)); + /* p[6..15] left as 0x90 from the memset above. */ + } + } else if (l->c->target.arch == CFREE_ARCH_RV64) { + /* RISC-V psABI PLT under DF_1_NOW. Each entry resolves through + * the GOT slot the loader pre-fills via R_RISCV_JUMP_SLOT: + * + * auipc t3, %pcrel_hi(slot) + * ld t3, %pcrel_lo(.)(t3) + * jalr t1, t3 ; tail-call (t1 trashed) + * nop ; 16-byte alignment pad + * + * Encoded as raw u32 instructions. AUIPC operand carries the + * usual +0x800 bias so the LO12 in the load sign-extends + * correctly. PLT0 is left as canonical nops (32 bytes of + * 0x00000013) — never executed under BIND_NOW. */ + u8* plt_b = img->segment_bytes[rx_seg_idx]; + u32 ki; + u32 i; + for (i = 0; i < (u32)plt_bytes; i += 4) wr_u32_le(plt_b + i, 0x00000013u); + for (ki = 0; ki < imports.nfuncs; ++ki) { + u64 entry_vaddr = dyn->plt_vaddr + 32u + 16u * (u64)ki; + u64 slot_vaddr = dyn->got_plt_vaddr + 8u * (3u + ki); + i64 disp = (i64)slot_vaddr - (i64)entry_vaddr; + u32 hi20 = (u32)(((u64)(disp + 0x800)) >> 12) & 0xfffffu; + u32 lo12 = (u32)((u64)disp & 0xfffu); + u8* p = plt_b + 32u + 16u * (u64)ki; + /* auipc t3, hi20 ; opcode 0x17, rd=t3(28). */ + wr_u32_le(p + 0, 0x00000e17u | (hi20 << 12)); + /* ld t3, lo12(t3) ; opcode 0x03, funct3=3, rs1=t3, rd=t3. */ + wr_u32_le(p + 4, 0x000e3e03u | (lo12 << 20)); + /* jalr t1, 0(t3) ; opcode 0x67, funct3=0, rs1=t3, rd=t1(6). */ + wr_u32_le(p + 8, 0x000e0367u); + /* nop */ + wr_u32_le(p + 12, 0x00000013u); + } + } else { + compiler_panic(l->c, no_loc(), + "link: PLT emit for arch %u not implemented", + (u32)l->c->target.arch); } } @@ -928,7 +993,25 @@ void layout_dyn(Linker* l, LinkImage* img) { u64 plt_entry_vaddr = dyn->plt_vaddr + 32u + 16u * (u64)ki; DynRela* r = &dyn->rela_plt[ki]; r->r_offset = slot_vaddr; - r->r_info = ELF64_R_INFO((u64)dynidx, ELF_R_AARCH64_JUMP_SLOT); + { + u32 jt; + switch (l->c->target.arch) { + case CFREE_ARCH_ARM_64: + jt = ELF_R_AARCH64_JUMP_SLOT; + break; + case CFREE_ARCH_X86_64: + jt = ELF_R_X86_64_JUMP_SLOT; + break; + case CFREE_ARCH_RV64: + jt = ELF_R_RISCV_JUMP_SLOT; + break; + default: + compiler_panic(l->c, no_loc(), + "link: JUMP_SLOT type for arch %u not configured", + (u32)l->c->target.arch); + } + r->r_info = ELF64_R_INFO((u64)dynidx, jt); + } r->r_addend = 0; /* Serialize into segment bytes (will be re-serialized post-shift). */ u8* p = ro_bytes + rela_plt_off + (u64)ki * 24u; @@ -966,6 +1049,16 @@ void layout_dyn(Linker* l, LinkImage* img) { * post-shift vaddrs of every other dyn section and writes one * DT_* entry per index. */ + /* Synthesize linker-defined symbols that reference the .dynamic + * vaddr. Scrt1.o on Linux loads `_DYNAMIC` via ADRP+ADD, and + * libc_nonshared.a's atexit shim takes `__dso_handle` as the + * per-image identity (we use the .dynamic vaddr — any stable + * per-image address satisfies the contract since the shim only + * passes it through to __cxa_atexit, which the program-side glibc + * just stashes). */ + link_define_boundary(l, img, "_DYNAMIC", dyn->dynamic_vaddr); + link_define_boundary(l, img, "__dso_handle", dyn->dynamic_vaddr); + free_imports(h, &imports); } diff --git a/src/link/link_elf.c b/src/link/link_elf.c @@ -229,8 +229,14 @@ static int reloc_is_x64_tlsle(RelocKind k) { static int reloc_is_abs(RelocKind k) { return k == R_ABS32 || k == R_ABS64; } +/* Function-call relocs that may route through the PLT when the target + * is imported. aarch64 CALL26/JUMP26, x86_64 PLT32, and risc-v CALL_PLT + * (which cfree maps to R_PLT32) all carry the "call this address; if + * it's not resolvable here use the PLT trampoline" contract; the apply + * pass overwrites S with the PLT entry vaddr in that case. */ static int reloc_is_branch26(RelocKind k) { - return k == R_AARCH64_CALL26 || k == R_AARCH64_JUMP26; + return k == R_AARCH64_CALL26 || k == R_AARCH64_JUMP26 || + k == R_X64_PLT32 || k == R_PLT32 || k == R_RV_CALL; } static void emit_dyn_record(LinkImage* img, u64 site_vaddr, u32 reloc_type, @@ -249,13 +255,38 @@ static void emit_dyn_record(LinkImage* img, u64 site_vaddr, u32 reloc_type, r->r_addend = addend; } +/* Dynamic-reloc type numbers are arch-specific (aarch64 starts at 1024; + * x86_64 lives in the low single-digit range; risc-v has its own + * encoding). Pick the right constant for the active target.arch so the + * loader recognizes our .rela.dyn entries. */ +static u32 dyn_reloc_type(LinkImage* img, u32 aarch64, u32 x86_64, u32 rv64) { + switch (img->c->target.arch) { + case CFREE_ARCH_ARM_64: + return aarch64; + case CFREE_ARCH_X86_64: + return x86_64; + case CFREE_ARCH_RV64: + return rv64; + default: + compiler_panic(img->c, no_loc(), + "link: dyn reloc type for arch %u not configured", + (u32)img->c->target.arch); + } +} + static void emit_relative_record(LinkImage* img, u64 site_vaddr, u64 addend) { - emit_dyn_record(img, site_vaddr, ELF_R_AARCH64_RELATIVE, 0, (i64)addend); + u32 t = dyn_reloc_type(img, ELF_R_AARCH64_RELATIVE, ELF_R_X86_64_RELATIVE, + ELF_R_RISCV_RELATIVE); + emit_dyn_record(img, site_vaddr, t, 0, (i64)addend); } static void emit_globdat_record(LinkImage* img, u64 site_vaddr, u32 dynidx, i64 addend) { - emit_dyn_record(img, site_vaddr, ELF_R_AARCH64_GLOB_DAT, dynidx, addend); + u32 t = dyn_reloc_type(img, ELF_R_AARCH64_GLOB_DAT, ELF_R_X86_64_GLOB_DAT, + /* RISC-V dyn loader uses R_RISCV_64 for GOT-slot + * data imports — no dedicated GLOB_DAT type. */ + ELF_R_RISCV_64); + emit_dyn_record(img, site_vaddr, t, dynidx, addend); } /* RISC-V PCREL_LO12_* references the address of an AUIPC carrying the @@ -392,9 +423,15 @@ static void apply_all_relocs(LinkImage* img, u64 img_base) { * fill saves a write. */ continue; } - compiler_panic(img->c, no_loc(), - "link: unhandled reloc kind %u against imported symbol", - (unsigned)r->kind); + { + size_t nl = 0; + const char* nm = + tgt->name ? pool_str(img->c->global, tgt->name, &nl) : ""; + compiler_panic( + img->c, no_loc(), + "link: unhandled reloc kind %u against imported symbol '%.*s'", + (unsigned)r->kind, (int)nl, nm); + } } /* PIE: an absolute reloc against a defined non-imported symbol diff --git a/src/link/link_internal.h b/src/link/link_internal.h @@ -120,6 +120,14 @@ void link_ingest_archives(struct Linker*); void layout_dyn(struct Linker*, LinkImage*); void link_dyn_state_free(LinkImage*); +/* Define / upsert a synthetic global symbol resolved to `vaddr`. + * Satisfies any prior undef ref (e.g. _DYNAMIC from Scrt1.o, + * __dso_handle from libc_nonshared.a) and fans out across per-input + * duplicate name slots so emit_reloc_records sees the resolved + * vaddr. Implemented in link_layout.c. */ +void link_define_boundary(struct Linker*, LinkImage*, const char* name, + u64 vaddr); + /* SegVec instances for image-owned tables. Pointers returned by *_at / * *_push remain valid for the LinkImage's lifetime. */ SEGVEC_DEFINE(LinkSyms, LinkSymbol, 6); /* 64 entries per segment */ diff --git a/src/link/link_layout.c b/src/link/link_layout.c @@ -1598,6 +1598,11 @@ static void link_symbols_to_sections(Linker* l, LinkImage* img) { /* ---- pass 3b: linker-synthesized boundary symbols ---- */ +void link_define_boundary(Linker* l, LinkImage* img, const char* name, + u64 vaddr) { + emit_boundary_sym(l, img, name, vaddr); +} + static void emit_boundary_sym(Linker* l, LinkImage* img, const char* name, u64 vaddr) { Sym sym = boundary_name(l, name); @@ -1640,6 +1645,11 @@ static void emit_boundary_sym(Linker* l, LinkImage* img, const char* name, s->vaddr = vaddr; s->kind = SK_OBJ; s->defined = 1; + /* If resolve_undefs previously matched this name as an import + * from a DSO (e.g. libc.so exports _DYNAMIC for its own image), + * the linker-supplied definition wins — clear the import marker + * so apply_all_relocs treats it as a normal defined symbol. */ + s->imported = 0; } } @@ -1832,6 +1842,10 @@ static u8 reloc_width(RelocKind k) { case R_X64_PLT32: case R_X64_32S: case R_X64_TPOFF32: + case R_X64_GOTPCREL: + case R_X64_GOTPCRELX: + case R_X64_REX_GOTPCRELX: + case R_X64_GOTPC32: return 4; case R_ABS64: case R_REL64: @@ -1883,13 +1897,43 @@ static u8 reloc_width(RelocKind k) { case R_RV_RELAX: case R_RV_TPREL_ADD: return 4; + /* RISC-V ADD/SUB/SET fixup relocs — modify the section bytes in + * place. Width is the byte count touched. SUB6/SET6 modify one + * byte (the low 6 bits) like SET8/SUB8. */ + case R_RV_ADD8: + case R_RV_SUB8: + case R_RV_SUB6: + case R_RV_SET6: + case R_RV_SET8: + return 1; + case R_RV_ADD16: + case R_RV_SUB16: + case R_RV_SET16: + return 2; + case R_RV_ADD32: + case R_RV_SUB32: + case R_RV_SET32: + return 4; + case R_RV_ADD64: + case R_RV_SUB64: + return 8; default: return 0; } } static int reloc_uses_got(u16 kind) { - return kind == R_AARCH64_ADR_GOT_PAGE || kind == R_AARCH64_LD64_GOT_LO12_NC; + switch (kind) { + case R_AARCH64_ADR_GOT_PAGE: + case R_AARCH64_LD64_GOT_LO12_NC: + case R_X64_GOTPCREL: + case R_X64_GOTPCRELX: + case R_X64_REX_GOTPCRELX: + case R_RV_GOT_HI20: + return 1; + default: + return 0; + } } static void emit_reloc_records(Linker* l, LinkImage* img, @@ -1910,10 +1954,13 @@ static void emit_reloc_records(Linker* l, LinkImage* img, if (!s || !section_kept(s)) continue; /* Skip relocs whose containing section was GC'd. */ if (m->section[r->section_id] == LINK_SEC_NONE) continue; - /* RISC-V marker relocs (RELAX, TPREL_ADD) reference no symbol — - * they annotate the prior reloc for relaxation or TLS-add folding. + /* RISC-V marker relocs (RELAX, TPREL_ADD, ALIGN) reference no + * symbol — they annotate the prior reloc for relaxation, TLS + * thread-pointer ADD folding, or alignment-aware code shrinking. * We don't relax, so drop them entirely. */ - if (r->kind == R_RV_RELAX || r->kind == R_RV_TPREL_ADD) continue; + if (r->kind == R_RV_RELAX || r->kind == R_RV_TPREL_ADD || + r->kind == R_RV_ALIGN) + continue; if (r->sym == OBJ_SYM_NONE || r->sym >= m->nsym) compiler_panic(l->c, no_loc(), "link: reloc references unknown symbol"); target = m->sym[r->sym]; @@ -2916,6 +2963,41 @@ LinkImage* link_resolve(Linker* l) { emit_array_boundaries(l, img); emit_tls_boundaries(l, img); emit_encoding_section_boundaries(l, img); + /* Linker-defined synthetic symbols that may be referenced as + * undefs (often hidden) by sysroot startfiles / nonshared archives. + * Pre-defining them here satisfies resolve_undefs' undef sweep so + * it doesn't panic on hidden-undef references that no object or + * DSO supplies. vaddr=0 is a placeholder; layout_dyn may refine + * _DYNAMIC to the actual .dynamic vaddr later. */ + emit_boundary_sym(l, img, "__dso_handle", 0); + emit_boundary_sym(l, img, "_DYNAMIC", 0); + /* _GLOBAL_OFFSET_TABLE_ is referenced as a SHN_UNDEF marker by + * any x86_64 input that uses the GOT (musl/glibc libc.a routinely + * do). GNU ld auto-defines it at the .got base; cfree-ld doesn't + * use the symbol for any actual reloc, so a placeholder vaddr=0 + * keeps the undef sweep happy without affecting code that + * computes GOT addresses through their own GOTPC32 relocs. */ + emit_boundary_sym(l, img, "_GLOBAL_OFFSET_TABLE_", 0); + /* RISC-V startfiles use `__global_pointer$` to load gp in _start; + * the RISC-V psABI says it's defined as `.sdata + 0x800` so + * gp-relative addressing covers [.sdata - 2KiB, .sdata + 2KiB). + * We don't have .sdata as a distinct section, but any address in + * the writable data region is functionally adequate when the code + * doesn't actually use gp-relative addressing (cfree-cc doesn't + * emit `-mrelax`, and musl's static crt only loads gp without + * dereferencing through it). Pick the first RW segment base + + * 0x800. Only relevant for rv64; harmless on other arches. */ + if (l->c->target.arch == CFREE_ARCH_RV64) { + u32 si; + u64 gp_vaddr = 0; + for (si = 0; si < img->nsegments; ++si) { + if (img->segments[si].flags & SF_WRITE) { + gp_vaddr = img->segments[si].vaddr + 0x800u; + break; + } + } + emit_boundary_sym(l, img, "__global_pointer$", gp_vaddr); + } resolve_undefs(l, img); gc_drop_dead_globals(l, img, &g); /* layout_iplt runs last among the symbol-shaping passes: it diff --git a/src/link/link_reloc.c b/src/link/link_reloc.c @@ -46,7 +46,11 @@ void link_reloc_apply(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A, } case R_REL32: case R_PC32: - case R_X64_PLT32: { + case R_X64_PLT32: + case R_X64_GOTPCREL: + case R_X64_GOTPCRELX: + case R_X64_REX_GOTPCRELX: + case R_X64_GOTPC32: { /* AArch64 ELF: PREL32 maps to either of these; both encode a * 32-bit signed PC-relative displacement. The cfree-canonical * distinction (section-relative vs PC-relative) collapses on @@ -411,6 +415,69 @@ void link_reloc_apply(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A, * the linker may fold during relaxation. We don't relax, so * both are no-ops. */ return; + case R_RV_ADD8: { + /* word8 += S + A. Used (paired with a SUB8 against another sym + * at the same site) to encode symbol differences. */ + u8 cur = P_bytes[0]; + P_bytes[0] = (u8)(cur + (u8)((S + (u64)A) & 0xffu)); + return; + } + case R_RV_SUB8: { + u8 cur = P_bytes[0]; + P_bytes[0] = (u8)(cur - (u8)((S + (u64)A) & 0xffu)); + return; + } + case R_RV_ADD16: { + u16 cur = rd_u16_le(P_bytes); + wr_u16_le(P_bytes, (u16)(cur + (u16)((S + (u64)A) & 0xffffu))); + return; + } + case R_RV_SUB16: { + u16 cur = rd_u16_le(P_bytes); + wr_u16_le(P_bytes, (u16)(cur - (u16)((S + (u64)A) & 0xffffu))); + return; + } + case R_RV_ADD32: { + u32 cur = rd_u32_le(P_bytes); + wr_u32_le(P_bytes, (u32)(cur + (u32)((S + (u64)A) & 0xffffffffu))); + return; + } + case R_RV_SUB32: { + u32 cur = rd_u32_le(P_bytes); + wr_u32_le(P_bytes, (u32)(cur - (u32)((S + (u64)A) & 0xffffffffu))); + return; + } + case R_RV_ADD64: { + u64 cur = rd_u64_le(P_bytes); + wr_u64_le(P_bytes, cur + S + (u64)A); + return; + } + case R_RV_SUB64: { + u64 cur = rd_u64_le(P_bytes); + wr_u64_le(P_bytes, cur - S - (u64)A); + return; + } + case R_RV_SUB6: { + /* Bottom 6 bits of byte = (byte - (S + A)) & 0x3f. */ + u8 cur = P_bytes[0]; + u8 v = (u8)((cur & 0x3fu) - (u8)((S + (u64)A) & 0x3fu)); + P_bytes[0] = (u8)((cur & 0xc0u) | (v & 0x3fu)); + return; + } + case R_RV_SET6: { + u8 cur = P_bytes[0]; + P_bytes[0] = (u8)((cur & 0xc0u) | (u8)((S + (u64)A) & 0x3fu)); + return; + } + case R_RV_SET8: + P_bytes[0] = (u8)((S + (u64)A) & 0xffu); + return; + case R_RV_SET16: + wr_u16_le(P_bytes, (u16)((S + (u64)A) & 0xffffu)); + return; + case R_RV_SET32: + wr_u32_le(P_bytes, (u32)((S + (u64)A) & 0xffffffffu)); + return; default: compiler_panic(c, no_loc(), "link: unsupported reloc kind %u", diff --git a/src/obj/elf.h b/src/obj/elf.h @@ -331,6 +331,8 @@ u32 elf_x86_64_reloc_from(u32 elf_type); #define ELF_R_RISCV_SET16 55 #define ELF_R_RISCV_SET32 56 #define ELF_R_RISCV_32_PCREL 57 +#define ELF_R_RISCV_SET_ULEB128 60 +#define ELF_R_RISCV_SUB_ULEB128 61 u32 elf_riscv64_reloc_to(u32 kind /* RelocKind */); u32 elf_riscv64_reloc_from(u32 elf_type); diff --git a/src/obj/elf_reloc_riscv64.c b/src/obj/elf_reloc_riscv64.c @@ -85,6 +85,10 @@ u32 elf_riscv64_reloc_to(u32 kind /* RelocKind */) { return ELF_R_RISCV_SET16; case R_RV_SET32: return ELF_R_RISCV_SET32; + case R_RV_SET_ULEB128: + return ELF_R_RISCV_SET_ULEB128; + case R_RV_SUB_ULEB128: + return ELF_R_RISCV_SUB_ULEB128; default: return ELF_R_RISCV_NONE; } @@ -164,6 +168,10 @@ u32 elf_riscv64_reloc_from(u32 elf_type) { return R_RV_SET16; case ELF_R_RISCV_SET32: return R_RV_SET32; + case ELF_R_RISCV_SET_ULEB128: + return R_RV_SET_ULEB128; + case ELF_R_RISCV_SUB_ULEB128: + return R_RV_SUB_ULEB128; default: return (u32)-1; /* sentinel */ } diff --git a/src/obj/obj.h b/src/obj/obj.h @@ -201,6 +201,8 @@ typedef enum RelocKind { R_RV_SET8, R_RV_SET16, R_RV_SET32, + R_RV_SET_ULEB128, + R_RV_SUB_ULEB128, R_WASM_FUNCIDX, R_WASM_TABLEIDX, R_WASM_MEMOFS, diff --git a/src/obj/obj_secnames.c b/src/obj/obj_secnames.c @@ -91,5 +91,19 @@ Sym obj_secname_tbss(Compiler* c) { } int obj_format_extern_via_got(const Compiler* c) { - return c->target.obj == CFREE_OBJ_MACHO; + /* Mach-O always binds extern data through __got / non-lazy pointers + * — direct ADRP+ADD to an imported symbol isn't representable in + * ld64's reloc set. + * + * ELF static link: extern data is resolved at link time, so direct + * page-relative addressing works (linker patches the ADRP+ADD). + * + * ELF -fPIC / -fPIE: extern data may resolve to a symbol defined + * in a DSO at runtime; the codegen must route through the GOT so + * the loader can patch a single slot rather than touching .text. */ + if (c->target.obj == CFREE_OBJ_MACHO) return 1; + if (c->target.obj == CFREE_OBJ_ELF && + (c->target.pic == CFREE_PIC_PIC || c->target.pic == CFREE_PIC_PIE)) + return 1; + return 0; }