kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

reloc.c (13719B)


      1 /* RISC-V relocation descriptors (width + classification), shared by the
      2  * rv64 and rv32 backends (their reloc kinds are identical).
      3  *
      4  * One row per relocation kind this backend applies.  Reached through
      5  * LinkArchDesc.reloc_desc (wired in link.c for both rv64 and rv32) and the
      6  * arch-aware reloc_desc() dispatcher.  Wire encoding + name live in
      7  * src/obj/<fmt>/reloc_riscv{32,64}.c.
      8  *
      9  * R_RV_CALL and the arch-neutral R_PLT32 (the kit-canonical kind
     10  * R_RISCV_CALL_PLT maps onto) are both branches: too-far targets route
     11  * through the JIT/range call-stub pass.  R_RV_CALL patches an 8-byte
     12  * AUIPC+JALR pair; R_PLT32 keeps its neutral 4-byte width (a gate value —
     13  * the apply path re-derives the real span from the kind).
     14  *
     15  * RELAX / TPREL_ADD are relaxation markers (no bytes); SET/SUB_ULEB128 are
     16  * variable-width (the apply path reads the true field length from the bytes
     17  * — the width here is the nominal gate value).  R_RV_ALIGN is skipped
     18  * before the reloc record is built and carries no descriptor. */
     19 
     20 #include "obj/reloc.h"
     21 
     22 #include "core/bytes.h"
     23 #include "link/link_arch.h"
     24 
     25 static const RelocDescRow rv_rows[] = {
     26     {R_RV_HI20, {4, 0}},
     27     {R_RV_LO12_I, {4, 0}},
     28     {R_RV_LO12_S, {4, 0}},
     29     {R_RV_BRANCH, {4, 0}},
     30     {R_RV_JAL, {4, 0}},
     31     {R_RV_PCREL_HI20, {4, RELOC_IS_PCREL_ANCHOR}},
     32     {R_RV_PCREL_LO12_I, {4, 0}},
     33     {R_RV_PCREL_LO12_S, {4, 0}},
     34     {R_RV_GOT_HI20, {4, RELOC_USES_GOT | RELOC_IS_PCREL_ANCHOR}},
     35     {R_RV_TLS_GOT_HI20, {4, RELOC_IS_TLS_GOT}},
     36     {R_RV_TPREL_HI20, {4, RELOC_IS_TLS_LE}},
     37     {R_RV_TPREL_LO12_I, {4, RELOC_IS_TLS_LE}},
     38     {R_RV_TPREL_LO12_S, {4, RELOC_IS_TLS_LE}},
     39     {R_RV_CALL, {8, RELOC_IS_BRANCH}},
     40     {R_PLT32, {4, RELOC_IS_BRANCH}},
     41     {R_RV_RVC_BRANCH, {2, 0}},
     42     {R_RV_RVC_JUMP, {2, 0}},
     43     {R_RV_RELAX, {4, RELOC_MARKER}},
     44     {R_RV_TPREL_ADD, {4, RELOC_MARKER}},
     45     {R_ADD8, {1, 0}},
     46     {R_SUB8, {1, 0}},
     47     {R_SUB6, {1, 0}},
     48     {R_SET6, {1, 0}},
     49     {R_ADD16, {2, 0}},
     50     {R_SUB16, {2, 0}},
     51     {R_ADD32, {4, 0}},
     52     {R_SUB32, {4, 0}},
     53     {R_ADD64, {8, 0}},
     54     {R_SUB64, {8, 0}},
     55     {R_SET_ULEB128, {1, RELOC_WIDTH_DYN}},
     56     {R_SUB_ULEB128, {1, RELOC_WIDTH_DYN}},
     57 };
     58 
     59 const RelocDesc* rv_reloc_desc(RelocKind k) {
     60   return reloc_desc_row_find(rv_rows, (u32)(sizeof rv_rows / sizeof rv_rows[0]),
     61                              k);
     62 }
     63 
     64 /* RISC-V instruction-immediate byte encoders (WS-C), shared by rv64 and rv32.
     65  * Moved verbatim from the format-neutral byte-patcher; reached via
     66  * LinkArchDesc.reloc_apply_insn.  Encoding references: "RISC-V ELF psABI" §3
     67  * and "The RISC-V Instruction Set Manual, Volume I" Ch.19.  The +0x800 bias on
     68  * HI20 / CALL compensates the sign-extension of the paired 12-bit immediate.
     69  * The data-word ADD/SUB/SET arms and the ULEB128 codec are arch-neutral byte
     70  * writes and stay in the obj-core neutral path.  Returns 1 if it owns `k`. */
     71 int rv_reloc_apply_insn(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A,
     72                         u64 P) {
     73   switch (k) {
     74     case R_RV_HI20:
     75     case R_RV_TPREL_HI20: {
     76       /* U-type (LUI/AUIPC) imm[31:12] = high 20 bits of (S + A + 0x800).
     77        * The 0x800 bias compensates the sign-extension of the paired
     78        * 12-bit ADDI/load/store immediate, so HI20 + signext12(LO12)
     79        * reconstructs the full value. */
     80       i64 v = (i64)S + A;
     81       u32 hi20 = (u32)(((u64)(v + 0x800)) >> 12) & 0xfffffu;
     82       u32 instr = rd_u32_le(P_bytes);
     83       instr = (instr & 0x00000fffu) | (hi20 << 12);
     84       wr_u32_le(P_bytes, instr);
     85       return 1;
     86     }
     87     case R_RV_PCREL_HI20:
     88     case R_RV_GOT_HI20:
     89     case R_RV_TLS_GOT_HI20: {
     90       /* AUIPC pc-relative HI20: same encoding as HI20 but the
     91        * displacement is (S + A) - P. The paired PCREL_LO12 reloc at
     92        * the ADDI/load below recovers the low 12 bits of the same
     93        * displacement via a lookup keyed on this AUIPC's site vaddr.
     94        * GOT_HI20 collapses to PCREL_HI20 in static-link with no
     95        * indirection: the symbol resolves to its own address. */
     96       i64 disp = (i64)S + A - (i64)P;
     97       u32 hi20 = (u32)(((u64)(disp + 0x800)) >> 12) & 0xfffffu;
     98       u32 instr = rd_u32_le(P_bytes);
     99       instr = (instr & 0x00000fffu) | (hi20 << 12);
    100       wr_u32_le(P_bytes, instr);
    101       return 1;
    102     }
    103     case R_RV_LO12_I:
    104     case R_RV_TPREL_LO12_I: {
    105       /* I-type imm[11:0] in instruction bits [31:20]. Low 12 bits of
    106        * (S + A); the sign-extension at execute time pairs with HI20's
    107        * 0x800 bias to reconstruct the full address. */
    108       u64 v = (u64)((i64)S + A);
    109       u32 lo12 = (u32)(v & 0xfffu);
    110       u32 instr = rd_u32_le(P_bytes);
    111       instr = (instr & 0x000fffffu) | (lo12 << 20);
    112       wr_u32_le(P_bytes, instr);
    113       return 1;
    114     }
    115     case R_RV_LO12_S:
    116     case R_RV_TPREL_LO12_S: {
    117       /* S-type imm[11:5] in bits [31:25], imm[4:0] in bits [11:7]. */
    118       u64 v = (u64)((i64)S + A);
    119       u32 lo12 = (u32)(v & 0xfffu);
    120       u32 instr = rd_u32_le(P_bytes);
    121       instr = (instr & 0x01fff07fu) | ((lo12 & 0xfe0u) << 20) |
    122               ((lo12 & 0x1fu) << 7);
    123       wr_u32_le(P_bytes, instr);
    124       return 1;
    125     }
    126     case R_RV_BRANCH: {
    127       /* B-type 12-bit signed displacement in 2-byte units (13-bit
    128        * range). imm[12] in bit 31, imm[10:5] in 30:25, imm[4:1] in
    129        * 11:8, imm[11] in bit 7. */
    130       i64 disp = (i64)S + A - (i64)P;
    131       u32 instr;
    132       u32 b;
    133       if (disp & 1)
    134         compiler_panic(c, SRCLOC_NONE,
    135                        "link: RV BRANCH misaligned displacement");
    136       if (disp < -(i64)(1 << 12) || disp >= (i64)(1 << 12))
    137         compiler_panic(c, SRCLOC_NONE,
    138                        "link: RV BRANCH out of range (need ±4KiB)");
    139       b = (u32)((u64)disp & 0x1ffeu) | ((u32)(((u64)disp >> 11) & 1u) << 11) |
    140           ((u32)(((u64)disp >> 12) & 1u) << 12);
    141       instr = rd_u32_le(P_bytes);
    142       instr &= 0x01fff07fu;
    143       instr |= ((b >> 12) & 1u) << 31;
    144       instr |= ((b >> 5) & 0x3fu) << 25;
    145       instr |= ((b >> 1) & 0xfu) << 8;
    146       instr |= ((b >> 11) & 1u) << 7;
    147       wr_u32_le(P_bytes, instr);
    148       return 1;
    149     }
    150     case R_RV_JAL: {
    151       /* J-type 20-bit signed displacement in 2-byte units (21-bit
    152        * range). imm[20] in bit 31, imm[10:1] in 30:21, imm[11] in bit
    153        * 20, imm[19:12] in bits 19:12. */
    154       i64 disp = (i64)S + A - (i64)P;
    155       u32 instr;
    156       u32 b;
    157       if (disp & 1)
    158         compiler_panic(c, SRCLOC_NONE, "link: RV JAL misaligned displacement");
    159       if (disp < -(i64)(1 << 20) || disp >= (i64)(1 << 20))
    160         compiler_panic(c, SRCLOC_NONE,
    161                        "link: RV JAL out of range (need ±1MiB)");
    162       b = (u32)((u64)disp & 0x1ffffeu) | ((u32)(((u64)disp >> 11) & 1u) << 11) |
    163           ((u32)(((u64)disp >> 20) & 1u) << 20);
    164       instr = rd_u32_le(P_bytes);
    165       instr &= 0x00000fffu;
    166       instr |= ((b >> 20) & 1u) << 31;
    167       instr |= ((b >> 1) & 0x3ffu) << 21;
    168       instr |= ((b >> 11) & 1u) << 20;
    169       instr |= ((b >> 12) & 0xffu) << 12;
    170       wr_u32_le(P_bytes, instr);
    171       return 1;
    172     }
    173     case R_RV_CALL:
    174     case R_PLT32: {
    175       /* AUIPC + JALR pair encoding the same 32-bit signed PC-relative
    176        * displacement. AUIPC at P, JALR at P+4. The 0x800 bias on the
    177        * AUIPC immediate compensates JALR's signed 12-bit imm so that
    178        * (auipc_imm << 12) + signext12(jalr_imm) == disp.
    179        *
    180        * R_PLT32 is the kit-canonical RelocKind that
    181        * elf_riscv64_reloc_from(R_RISCV_CALL_PLT) maps to; static-link
    182        * with no PLT collapses CALL_PLT to a direct CALL (no
    183        * indirection). */
    184       i64 disp = (i64)S + A - (i64)P;
    185       u32 hi20 = (u32)(((u64)(disp + 0x800)) >> 12) & 0xfffffu;
    186       u32 lo12 = (u32)((u64)disp & 0xfffu);
    187       u32 auipc = rd_u32_le(P_bytes);
    188       u32 jalr = rd_u32_le(P_bytes + 4);
    189       if (disp < -(i64)(1ll << 31) || disp >= (i64)(1ll << 31))
    190         compiler_panic(c, SRCLOC_NONE,
    191                        "link: RV CALL out of range (need ±2GiB)");
    192       auipc = (auipc & 0x00000fffu) | (hi20 << 12);
    193       jalr = (jalr & 0x000fffffu) | (lo12 << 20);
    194       wr_u32_le(P_bytes, auipc);
    195       wr_u32_le(P_bytes + 4, jalr);
    196       return 1;
    197     }
    198     case R_RV_RVC_BRANCH: {
    199       /* CB-type 8-bit signed displacement in 2-byte units (9-bit
    200        * range). c.beqz / c.bnez. Encoding (16-bit instruction):
    201        *   bit 12   = imm[8]
    202        *   bits 11:10 = imm[4:3]
    203        *   bits 9:7  = rs1' (untouched)
    204        *   bits 6:5  = imm[7:6]
    205        *   bits 4:3  = imm[2:1]
    206        *   bit 2    = imm[5] */
    207       i64 disp = (i64)S + A - (i64)P;
    208       u16 instr = (u16)(P_bytes[0] | ((u16)P_bytes[1] << 8));
    209       u32 b;
    210       if (disp & 1)
    211         compiler_panic(c, SRCLOC_NONE,
    212                        "link: RV RVC_BRANCH misaligned displacement");
    213       if (disp < -(i64)(1 << 8) || disp >= (i64)(1 << 8))
    214         compiler_panic(c, SRCLOC_NONE,
    215                        "link: RV RVC_BRANCH out of range (need ±256B)");
    216       b = (u32)((u64)disp & 0x1feu);
    217       instr = (u16)(instr & 0xe383u);
    218       instr = (u16)(instr | (((b >> 8) & 1u) << 12));
    219       instr = (u16)(instr | (((b >> 3) & 3u) << 10));
    220       instr = (u16)(instr | (((b >> 6) & 3u) << 5));
    221       instr = (u16)(instr | (((b >> 1) & 3u) << 3));
    222       instr = (u16)(instr | (((b >> 5) & 1u) << 2));
    223       P_bytes[0] = (u8)(instr & 0xffu);
    224       P_bytes[1] = (u8)((instr >> 8) & 0xffu);
    225       return 1;
    226     }
    227     case R_RV_RVC_JUMP: {
    228       /* CJ-type 11-bit signed displacement in 2-byte units (12-bit
    229        * range). c.j / c.jal. Encoding bits in the 16-bit instruction:
    230        *   12=imm[11], 11=imm[4], 10:9=imm[9:8], 8=imm[10],
    231        *   7=imm[6], 6=imm[7], 5:3=imm[3:1], 2=imm[5]. */
    232       i64 disp = (i64)S + A - (i64)P;
    233       u16 instr = (u16)(P_bytes[0] | ((u16)P_bytes[1] << 8));
    234       u32 b;
    235       if (disp & 1)
    236         compiler_panic(c, SRCLOC_NONE,
    237                        "link: RV RVC_JUMP misaligned displacement");
    238       if (disp < -(i64)(1 << 11) || disp >= (i64)(1 << 11))
    239         compiler_panic(c, SRCLOC_NONE,
    240                        "link: RV RVC_JUMP out of range (need ±2KiB)");
    241       b = (u32)((u64)disp & 0xffeu);
    242       instr = (u16)(instr & 0xe003u);
    243       instr = (u16)(instr | (((b >> 11) & 1u) << 12));
    244       instr = (u16)(instr | (((b >> 4) & 1u) << 11));
    245       instr = (u16)(instr | (((b >> 8) & 3u) << 9));
    246       instr = (u16)(instr | (((b >> 10) & 1u) << 8));
    247       instr = (u16)(instr | (((b >> 6) & 1u) << 7));
    248       instr = (u16)(instr | (((b >> 7) & 1u) << 6));
    249       instr = (u16)(instr | (((b >> 1) & 7u) << 3));
    250       instr = (u16)(instr | (((b >> 5) & 1u) << 2));
    251       P_bytes[0] = (u8)(instr & 0xffu);
    252       P_bytes[1] = (u8)((instr >> 8) & 0xffu);
    253       return 1;
    254     }
    255     case R_RV_RELAX:
    256     case R_RV_TPREL_ADD:
    257       /* Marker relocs only — RELAX permits the prior reloc to be
    258        * compressed, TPREL_ADD annotates a TLS thread-pointer ADD that
    259        * the linker may fold during relaxation. We don't relax, so
    260        * both are no-ops. */
    261       return 1;
    262     default:
    263       return 0;
    264   }
    265 }
    266 
    267 /* In-process JIT TLS Local-Exec relaxation (LinkArchDesc.jit_tls_le_relax).
    268  * Codegen emits, per access:
    269  *   lui  t, %tprel_hi(var)      R_RV_TPREL_HI20   <- `site`
    270  *   add  t, tp, t              (no reloc)
    271  *   addi rd, t, %tprel_lo(var)  R_RV_TPREL_LO12_I
    272  * Single-threaded JIT: address the in-image storage PC-relative, dropping the
    273  * tp add:
    274  *   auipc t, %pcrel_hi(&var) ; nop ; addi rd, t, %pcrel_lo(&var)
    275  * The HI20 reloc drives the whole rewrite; the LO12 half is then a no-op. The
    276  * riscv `tp` ABI register is x4. */
    277 void rv_jit_tls_le_relax(Compiler* c, RelocKind k, u8* site, u64 storage,
    278                          u64 site_pc) {
    279   u8* add;
    280   u8* addi;
    281   u32 rd_tmp, add_w, addi_w, hi20;
    282   i64 disp;
    283   i32 lo12;
    284   if (k == R_RV_TPREL_LO12_I || k == R_RV_TPREL_LO12_S)
    285     return; /* handled with HI20 */
    286   if (k != R_RV_TPREL_HI20)
    287     compiler_panic(c, SRCLOC_NONE, "riscv jit tls: unexpected reloc kind %u",
    288                    (unsigned)k);
    289   add = site + 4;  /* add t, tp, t -> nop */
    290   addi = site + 8; /* addi rd, t, %lo -> addi rd, t, %pcrel_lo */
    291   if ((rd_u32_le(site) & 0x7fu) != 0x37u) /* lui */
    292     compiler_panic(c, SRCLOC_NONE, "riscv jit tls: unexpected access sequence");
    293   add_w = rd_u32_le(add);
    294   if ((add_w & 0x7fu) != 0x33u || ((add_w >> 15) & 0x1fu) != 4u) /* add ?,tp,? */
    295     compiler_panic(c, SRCLOC_NONE, "riscv jit tls: unexpected access sequence");
    296   rd_tmp = (rd_u32_le(site) >> 7) & 0x1fu;
    297   disp = (i64)storage - (i64)site_pc;
    298   lo12 = (i32)((u32)disp & 0xfffu);
    299   if (lo12 & 0x800) lo12 -= 0x1000; /* sign-extend 12-bit */
    300   hi20 = (u32)(((disp - (i64)lo12) >> 12) & 0xfffffu);
    301   wr_u32_le(site, 0x00000017u | (rd_tmp << 7) | (hi20 << 12)); /* auipc */
    302   wr_u32_le(add, 0x00000013u);                                /* nop */
    303   addi_w = rd_u32_le(addi);
    304   wr_u32_le(addi, (addi_w & 0x000fffffu) | (((u32)lo12 & 0xfffu) << 20));
    305 }
    306 
    307 /* In-process JIT relaxation of RISC-V indirection idioms (LinkArchDesc
    308  * .jit_reloc_relax).  Today only PCREL_LO12: it has no S of its own — its
    309  * low-12 bits must match the paired AUIPC's PC-relative displacement, which
    310  * the JIT recomputes against its layout via ctx->resolve_pair.  Feed that
    311  * displacement to the LO12_I/S encoder (same instruction encoding; the addend
    312  * is unused per the psABI).  Returns 1 if it owned `k`, 0 otherwise. */
    313 int rv_jit_reloc_relax(Compiler* c, RelocKind k, const JitRelaxCtx* ctx) {
    314   if (k == R_RV_PCREL_LO12_I || k == R_RV_PCREL_LO12_S) {
    315     i64 disp = ctx->resolve_pair(ctx->resolve_user, ctx->anchor_vaddr);
    316     RelocKind alias = (k == R_RV_PCREL_LO12_I) ? R_RV_LO12_I : R_RV_LO12_S;
    317     rv_reloc_apply_insn(c, alias, ctx->site, (u64)disp, 0, ctx->site_pc);
    318     return 1;
    319   }
    320   return 0;
    321 }