link.c (4445B)
1 /* RV64 link-time arch descriptor. See link_arch.h for the contract. 2 * 3 * The PLT0/PLT-entry/IPLT-stub byte layouts here mirror what used to 4 * live inline in link_dyn.c (PLT) and link_layout.c (IPLT) before the 5 * vtable refactor; comments preserve the WHY (notably the +0x800 bias 6 * on AUIPC immediates). */ 7 8 #include "arch/rv64/isa.h" 9 #include "core/bytes.h" 10 #include "core/core.h" 11 #include "link/link_arch.h" 12 13 /* PLT0 is 8 canonical NOPs (32 bytes); each PLT entry and IPLT stub is 14 * 4 instructions (16 bytes) / 3 instructions (12 bytes) respectively. 15 * Encoded once here so the descriptor and emitters stay in sync. */ 16 #define RV64_PLT0_SIZE 32u 17 #define RV64_PLT_ENTRY_SIZE 16u 18 #define RV64_IPLT_STUB_SIZE 12u 19 20 /* Split a PC-relative displacement into the (hi20, lo12) pair consumed 21 * by the AUIPC + I-type sequence. The +0x800 bias is the standard 22 * RISC-V two-instruction PCREL trick: AUIPC adds an upper-20 immediate 23 * shifted left 12, then the second instruction adds a sign-extended 24 * 12-bit lo12. If we naively split disp into (disp>>12, disp&0xfff) 25 * the lo12 sign-extends as a *negative* number whenever bit 11 is set, 26 * which underflows the AUIPC result by 0x1000. Adding 0x800 before 27 * the shift rounds the high half up in exactly the cases that need it 28 * so AUIPC + sign-extended-lo12 reconstructs disp correctly. */ 29 static inline void rv64_split_pcrel(i64 disp, u32* hi20_out, u32* lo12_out) { 30 *hi20_out = (u32)(((u64)(disp + 0x800)) >> 12) & 0xfffffu; 31 *lo12_out = (u32)((u64)disp & 0xfffu); 32 } 33 34 /* PLT0 under DF_1_NOW is never executed — the loader resolves every 35 * JUMP_SLOT before transferring control — but we still emit it in 36 * canonical form (8 NOPs) so disassemblers and unwinders see a well- 37 * formed prologue at the top of .plt. */ 38 static void rv64_emit_plt0(u8* dst, u64 plt0_vaddr, u64 gotplt_vaddr) { 39 u32 i; 40 (void)plt0_vaddr; 41 (void)gotplt_vaddr; 42 for (i = 0; i < RV64_PLT0_SIZE; i += 4u) wr_u32_le(dst + i, rv_nop()); 43 } 44 45 /* Per-import PLT entry: load the GOT slot pre-filled by the loader 46 * (R_RISCV_JUMP_SLOT) and tail-call through it. t1 is the standard 47 * psABI scratch for the trampoline return-address (clobbered by the 48 * lazy resolver in the non-BIND_NOW path); t3 holds the slot pointer. */ 49 static void rv64_emit_plt_entry(u8* dst, u64 entry_vaddr, u64 slot_vaddr) { 50 i64 disp = (i64)slot_vaddr - (i64)entry_vaddr; 51 u32 hi20; 52 u32 lo12; 53 rv64_split_pcrel(disp, &hi20, &lo12); 54 wr_u32_le(dst + 0, rv_auipc(RV_T3, hi20)); 55 wr_u32_le(dst + 4, rv_ld(RV_T3, RV_T3, (i32)lo12)); 56 wr_u32_le(dst + 8, rv_jalr(RV_T1, RV_T3, 0)); 57 wr_u32_le(dst + 12, rv_nop()); 58 } 59 60 /* IPLT stub: load .igot.plt[i] (filled at startup by the resolver) and 61 * tail-call to it. The stub->slot displacement is invariant under the 62 * segment-base shift (both addresses live in the same image), so we 63 * bake it directly into the instructions and report zero apply-time 64 * relocs — unlike aarch64, which cannot encode a 32-bit pcrel inline. */ 65 static u32 rv64_emit_iplt_stub(u8* dst, u64 stub_vaddr, u64 slot_vaddr, 66 LinkArchIPltReloc out[2]) { 67 i64 disp = (i64)slot_vaddr - (i64)stub_vaddr; 68 u32 hi20; 69 u32 lo12; 70 (void)out; 71 rv64_split_pcrel(disp, &hi20, &lo12); 72 wr_u32_le(dst + 0, rv_auipc(RV_T1, hi20)); 73 wr_u32_le(dst + 4, rv_ld(RV_T1, RV_T1, (i32)lo12)); 74 wr_u32_le(dst + 8, rv_jr(RV_T1)); 75 return 0u; 76 } 77 78 /* A direct rv64 call (R_RV_CALL = AUIPC+JALR) reaches only ±2GiB. In the JIT, 79 * an external SK_ABS target (a host libc symbol resolved to an arbitrary 80 * address) can lie farther than that from the JIT-allocated code region, where 81 * link_reloc_apply would panic "RV CALL out of range". Reporting these as 82 * branch relocs routes them through the JIT call-stub pass, which reuses 83 * emit_iplt_stub (AUIPC+LD+JR) to reach an arbitrary address held in an 84 * in-image slot — the same safety net aa64 and x64 already wire. */ 85 static int rv64_is_branch_reloc(RelocKind kind) { 86 return kind == R_RV_CALL || kind == R_PLT32; 87 } 88 89 const LinkArchDesc link_arch_rv64 = { 90 .plt0_size = RV64_PLT0_SIZE, 91 .plt_entry_size = RV64_PLT_ENTRY_SIZE, 92 .iplt_stub_size = RV64_IPLT_STUB_SIZE, 93 .global_pointer_symbol = "__global_pointer$", 94 .global_pointer_rw_offset = 0x800u, 95 .emit_plt0 = rv64_emit_plt0, 96 .emit_plt_entry = rv64_emit_plt_entry, 97 .emit_iplt_stub = rv64_emit_iplt_stub, 98 .needs_jit_call_stub = rv64_is_branch_reloc, 99 };