kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

link.c (6621B)


      1 /* RV64 link-time arch descriptor.  See link_arch.h for the contract.
      2  *
      3  * The PLT0/PLT-entry/IPLT-stub byte layouts here mirror what used to
      4  * live inline in link_dyn.c (PLT) and link_layout.c (IPLT) before the
      5  * vtable refactor; comments preserve the WHY (notably the +0x800 bias
      6  * on AUIPC immediates). */
      7 
      8 #include "arch/riscv/isa.h"
      9 #include "core/bytes.h"
     10 #include "core/core.h"
     11 #include "link/link_arch.h"
     12 
     13 /* PLT0 is 8 canonical NOPs (32 bytes); each PLT entry and IPLT stub is
     14  * 4 instructions (16 bytes) / 3 instructions (12 bytes) respectively.
     15  * Encoded once here so the descriptor and emitters stay in sync. */
     16 #define RV64_PLT0_SIZE 32u
     17 #define RV64_PLT_ENTRY_SIZE 16u
     18 #define RV64_IPLT_STUB_SIZE 12u
     19 
     20 /* Split a PC-relative displacement into the (hi20, lo12) pair consumed
     21  * by the AUIPC + I-type sequence.  The +0x800 bias is the standard
     22  * RISC-V two-instruction PCREL trick: AUIPC adds an upper-20 immediate
     23  * shifted left 12, then the second instruction adds a sign-extended
     24  * 12-bit lo12.  If we naively split disp into (disp>>12, disp&0xfff)
     25  * the lo12 sign-extends as a *negative* number whenever bit 11 is set,
     26  * which underflows the AUIPC result by 0x1000.  Adding 0x800 before
     27  * the shift rounds the high half up in exactly the cases that need it
     28  * so AUIPC + sign-extended-lo12 reconstructs disp correctly. */
     29 static inline void rv64_split_pcrel(i64 disp, u32* hi20_out, u32* lo12_out) {
     30   *hi20_out = (u32)(((u64)(disp + 0x800)) >> 12) & 0xfffffu;
     31   *lo12_out = (u32)((u64)disp & 0xfffu);
     32 }
     33 
     34 /* PLT0 under DF_1_NOW is never executed — the loader resolves every
     35  * JUMP_SLOT before transferring control — but we still emit it in
     36  * canonical form (8 NOPs) so disassemblers and unwinders see a well-
     37  * formed prologue at the top of .plt. */
     38 static void rv64_emit_plt0(u8* dst, u64 plt0_vaddr, u64 gotplt_vaddr) {
     39   u32 i;
     40   (void)plt0_vaddr;
     41   (void)gotplt_vaddr;
     42   for (i = 0; i < RV64_PLT0_SIZE; i += 4u) wr_u32_le(dst + i, rv_nop());
     43 }
     44 
     45 /* Per-import PLT entry: load the GOT slot pre-filled by the loader
     46  * (R_RISCV_JUMP_SLOT) and tail-call through it.  t1 is the standard
     47  * psABI scratch for the trampoline return-address (clobbered by the
     48  * lazy resolver in the non-BIND_NOW path); t3 holds the slot pointer. */
     49 static void rv64_emit_plt_entry(u8* dst, u64 entry_vaddr, u64 slot_vaddr) {
     50   i64 disp = (i64)slot_vaddr - (i64)entry_vaddr;
     51   u32 hi20;
     52   u32 lo12;
     53   rv64_split_pcrel(disp, &hi20, &lo12);
     54   wr_u32_le(dst + 0, rv_auipc(RV_T3, hi20));
     55   wr_u32_le(dst + 4, rv_ld(RV_T3, RV_T3, (i32)lo12));
     56   wr_u32_le(dst + 8, rv_jalr(RV_T1, RV_T3, 0));
     57   wr_u32_le(dst + 12, rv_nop());
     58 }
     59 
     60 /* IPLT stub: load .igot.plt[i] (filled at startup by the resolver) and
     61  * tail-call to it.  The stub->slot displacement is invariant under the
     62  * segment-base shift (both addresses live in the same image), so we
     63  * bake it directly into the instructions and report zero apply-time
     64  * relocs — unlike aarch64, which cannot encode a 32-bit pcrel inline. */
     65 static u32 rv64_emit_iplt_stub(u8* dst, u64 stub_vaddr, u64 slot_vaddr,
     66                                LinkArchIPltReloc out[2]) {
     67   i64 disp = (i64)slot_vaddr - (i64)stub_vaddr;
     68   u32 hi20;
     69   u32 lo12;
     70   (void)out;
     71   rv64_split_pcrel(disp, &hi20, &lo12);
     72   wr_u32_le(dst + 0, rv_auipc(RV_T1, hi20));
     73   wr_u32_le(dst + 4, rv_ld(RV_T1, RV_T1, (i32)lo12));
     74   wr_u32_le(dst + 8, rv_jr(RV_T1));
     75   return 0u;
     76 }
     77 
     78 /* RV32 PLT entry: identical to rv64_emit_plt_entry except the GOT slot
     79  * is 4 bytes (one XLEN word), so the load is LW not LD.  Entry stays
     80  * 16 bytes / 4 insns; the AUIPC + (hi20,lo12) split is XLEN-neutral. */
     81 static void rv32_emit_plt_entry(u8* dst, u64 entry_vaddr, u64 slot_vaddr) {
     82   i64 disp = (i64)slot_vaddr - (i64)entry_vaddr;
     83   u32 hi20;
     84   u32 lo12;
     85   rv64_split_pcrel(disp, &hi20, &lo12);
     86   wr_u32_le(dst + 0, rv_auipc(RV_T3, hi20));
     87   wr_u32_le(dst + 4, rv_lw(RV_T3, RV_T3, (i32)lo12));
     88   wr_u32_le(dst + 8, rv_jalr(RV_T1, RV_T3, 0));
     89   wr_u32_le(dst + 12, rv_nop());
     90 }
     91 
     92 /* RV32 IPLT stub: identical to rv64_emit_iplt_stub except the
     93  * .igot.plt slot is 4 bytes, so LW not LD.  Stub stays 12 bytes /
     94  * 3 insns; displacement is baked inline, so zero apply-time relocs. */
     95 static u32 rv32_emit_iplt_stub(u8* dst, u64 stub_vaddr, u64 slot_vaddr,
     96                                LinkArchIPltReloc out[2]) {
     97   i64 disp = (i64)slot_vaddr - (i64)stub_vaddr;
     98   u32 hi20;
     99   u32 lo12;
    100   (void)out;
    101   rv64_split_pcrel(disp, &hi20, &lo12);
    102   wr_u32_le(dst + 0, rv_auipc(RV_T1, hi20));
    103   wr_u32_le(dst + 4, rv_lw(RV_T1, RV_T1, (i32)lo12));
    104   wr_u32_le(dst + 8, rv_jr(RV_T1));
    105   return 0u;
    106 }
    107 
    108 /* Width + classification rows for RISC-V's relocation kinds (shared by rv64
    109  * and rv32); defined in src/arch/riscv/reloc.c and consulted through the
    110  * .reloc_desc hook.  R_RV_CALL / R_PLT32 carry RELOC_IS_BRANCH: a direct
    111  * AUIPC+JALR reaches only ±2GiB, so a too-far target (e.g. a JIT-resolved
    112  * host libc symbol) routes through the call-stub pass, the same safety net
    113  * aa64 and x64 wire.  rv_reloc_apply_insn (same file) holds the matching
    114  * U/I/S/B/J + RVC instruction-immediate byte encoders. */
    115 const RelocDesc* rv_reloc_desc(RelocKind);
    116 int rv_reloc_apply_insn(Compiler*, RelocKind, u8*, u64, i64, u64);
    117 void rv_jit_tls_le_relax(Compiler*, RelocKind, u8*, u64, u64);
    118 int rv_jit_reloc_relax(Compiler*, RelocKind, const JitRelaxCtx*);
    119 
    120 const LinkArchDesc link_arch_rv64 = {
    121     .plt0_size = RV64_PLT0_SIZE,
    122     .plt_entry_size = RV64_PLT_ENTRY_SIZE,
    123     .iplt_stub_size = RV64_IPLT_STUB_SIZE,
    124     .global_pointer_symbol = "__global_pointer$",
    125     .global_pointer_rw_offset = 0x800u,
    126     .emit_plt0 = rv64_emit_plt0,
    127     .emit_plt_entry = rv64_emit_plt_entry,
    128     .emit_iplt_stub = rv64_emit_iplt_stub,
    129     .reloc_desc = rv_reloc_desc,
    130     .reloc_apply_insn = rv_reloc_apply_insn,
    131     .jit_tls_le_relax = rv_jit_tls_le_relax,
    132     .jit_reloc_relax = rv_jit_reloc_relax,
    133 };
    134 
    135 /* RV32 link descriptor: identical to rv64 (PLT0/entry/stub byte sizes,
    136  * __global_pointer$ + 0x800 RW bias, canonical 8-NOP PLT0, and the JIT
    137  * call-stub predicate) EXCEPT the PLT/IPLT emitters load 4-byte GOT
    138  * slots with LW instead of LD. */
    139 const LinkArchDesc link_arch_rv32 = {
    140     .plt0_size = RV64_PLT0_SIZE,
    141     .plt_entry_size = RV64_PLT_ENTRY_SIZE,
    142     .iplt_stub_size = RV64_IPLT_STUB_SIZE,
    143     .global_pointer_symbol = "__global_pointer$",
    144     .global_pointer_rw_offset = 0x800u,
    145     .emit_plt0 = rv64_emit_plt0,
    146     .emit_plt_entry = rv32_emit_plt_entry,
    147     .emit_iplt_stub = rv32_emit_iplt_stub,
    148     .reloc_desc = rv_reloc_desc,
    149     .reloc_apply_insn = rv_reloc_apply_insn,
    150     .jit_reloc_relax = rv_jit_reloc_relax,
    151 };