kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

link.c (4445B)


      1 /* RV64 link-time arch descriptor.  See link_arch.h for the contract.
      2  *
      3  * The PLT0/PLT-entry/IPLT-stub byte layouts here mirror what used to
      4  * live inline in link_dyn.c (PLT) and link_layout.c (IPLT) before the
      5  * vtable refactor; comments preserve the WHY (notably the +0x800 bias
      6  * on AUIPC immediates). */
      7 
      8 #include "arch/rv64/isa.h"
      9 #include "core/bytes.h"
     10 #include "core/core.h"
     11 #include "link/link_arch.h"
     12 
     13 /* PLT0 is 8 canonical NOPs (32 bytes); each PLT entry and IPLT stub is
     14  * 4 instructions (16 bytes) / 3 instructions (12 bytes) respectively.
     15  * Encoded once here so the descriptor and emitters stay in sync. */
     16 #define RV64_PLT0_SIZE 32u
     17 #define RV64_PLT_ENTRY_SIZE 16u
     18 #define RV64_IPLT_STUB_SIZE 12u
     19 
     20 /* Split a PC-relative displacement into the (hi20, lo12) pair consumed
     21  * by the AUIPC + I-type sequence.  The +0x800 bias is the standard
     22  * RISC-V two-instruction PCREL trick: AUIPC adds an upper-20 immediate
     23  * shifted left 12, then the second instruction adds a sign-extended
     24  * 12-bit lo12.  If we naively split disp into (disp>>12, disp&0xfff)
     25  * the lo12 sign-extends as a *negative* number whenever bit 11 is set,
     26  * which underflows the AUIPC result by 0x1000.  Adding 0x800 before
     27  * the shift rounds the high half up in exactly the cases that need it
     28  * so AUIPC + sign-extended-lo12 reconstructs disp correctly. */
     29 static inline void rv64_split_pcrel(i64 disp, u32* hi20_out, u32* lo12_out) {
     30   *hi20_out = (u32)(((u64)(disp + 0x800)) >> 12) & 0xfffffu;
     31   *lo12_out = (u32)((u64)disp & 0xfffu);
     32 }
     33 
     34 /* PLT0 under DF_1_NOW is never executed — the loader resolves every
     35  * JUMP_SLOT before transferring control — but we still emit it in
     36  * canonical form (8 NOPs) so disassemblers and unwinders see a well-
     37  * formed prologue at the top of .plt. */
     38 static void rv64_emit_plt0(u8* dst, u64 plt0_vaddr, u64 gotplt_vaddr) {
     39   u32 i;
     40   (void)plt0_vaddr;
     41   (void)gotplt_vaddr;
     42   for (i = 0; i < RV64_PLT0_SIZE; i += 4u) wr_u32_le(dst + i, rv_nop());
     43 }
     44 
     45 /* Per-import PLT entry: load the GOT slot pre-filled by the loader
     46  * (R_RISCV_JUMP_SLOT) and tail-call through it.  t1 is the standard
     47  * psABI scratch for the trampoline return-address (clobbered by the
     48  * lazy resolver in the non-BIND_NOW path); t3 holds the slot pointer. */
     49 static void rv64_emit_plt_entry(u8* dst, u64 entry_vaddr, u64 slot_vaddr) {
     50   i64 disp = (i64)slot_vaddr - (i64)entry_vaddr;
     51   u32 hi20;
     52   u32 lo12;
     53   rv64_split_pcrel(disp, &hi20, &lo12);
     54   wr_u32_le(dst + 0, rv_auipc(RV_T3, hi20));
     55   wr_u32_le(dst + 4, rv_ld(RV_T3, RV_T3, (i32)lo12));
     56   wr_u32_le(dst + 8, rv_jalr(RV_T1, RV_T3, 0));
     57   wr_u32_le(dst + 12, rv_nop());
     58 }
     59 
     60 /* IPLT stub: load .igot.plt[i] (filled at startup by the resolver) and
     61  * tail-call to it.  The stub->slot displacement is invariant under the
     62  * segment-base shift (both addresses live in the same image), so we
     63  * bake it directly into the instructions and report zero apply-time
     64  * relocs — unlike aarch64, which cannot encode a 32-bit pcrel inline. */
     65 static u32 rv64_emit_iplt_stub(u8* dst, u64 stub_vaddr, u64 slot_vaddr,
     66                                LinkArchIPltReloc out[2]) {
     67   i64 disp = (i64)slot_vaddr - (i64)stub_vaddr;
     68   u32 hi20;
     69   u32 lo12;
     70   (void)out;
     71   rv64_split_pcrel(disp, &hi20, &lo12);
     72   wr_u32_le(dst + 0, rv_auipc(RV_T1, hi20));
     73   wr_u32_le(dst + 4, rv_ld(RV_T1, RV_T1, (i32)lo12));
     74   wr_u32_le(dst + 8, rv_jr(RV_T1));
     75   return 0u;
     76 }
     77 
     78 /* A direct rv64 call (R_RV_CALL = AUIPC+JALR) reaches only ±2GiB. In the JIT,
     79  * an external SK_ABS target (a host libc symbol resolved to an arbitrary
     80  * address) can lie farther than that from the JIT-allocated code region, where
     81  * link_reloc_apply would panic "RV CALL out of range". Reporting these as
     82  * branch relocs routes them through the JIT call-stub pass, which reuses
     83  * emit_iplt_stub (AUIPC+LD+JR) to reach an arbitrary address held in an
     84  * in-image slot — the same safety net aa64 and x64 already wire. */
     85 static int rv64_is_branch_reloc(RelocKind kind) {
     86   return kind == R_RV_CALL || kind == R_PLT32;
     87 }
     88 
     89 const LinkArchDesc link_arch_rv64 = {
     90     .plt0_size = RV64_PLT0_SIZE,
     91     .plt_entry_size = RV64_PLT_ENTRY_SIZE,
     92     .iplt_stub_size = RV64_IPLT_STUB_SIZE,
     93     .global_pointer_symbol = "__global_pointer$",
     94     .global_pointer_rw_offset = 0x800u,
     95     .emit_plt0 = rv64_emit_plt0,
     96     .emit_plt_entry = rv64_emit_plt_entry,
     97     .emit_iplt_stub = rv64_emit_iplt_stub,
     98     .needs_jit_call_stub = rv64_is_branch_reloc,
     99 };