kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

link_arch.h (8940B)


      1 #ifndef KIT_LINK_ARCH_H
      2 #define KIT_LINK_ARCH_H
      3 
      4 /* Per-architecture link-time descriptor.
      5  *
      6  * Pure-data + a small set of stub-emit function pointers, indexed by
      7  * Compiler.target.arch.  Lets link_dyn.c / link_layout.c / link_elf.c
      8  * stay arch-agnostic instead of branching on target.arch and hand-
      9  * encoding instruction bytes inline.  Each backend's descriptor lives
     10  * under src/arch/<arch>/ and leans on that arch's ISA encoders for
     11  * everything but small format-specific constants.
     12  *
     13  * The struct intentionally collects only fields the LINKER needs.
     14  * Code-generation arch dispatch belongs in CGTarget (arch/arch.h);
     15  * reloc-apply dispatch is keyed on RelocKind in link_reloc.c. */
     16 
     17 #include "core/core.h"
     18 #include "obj/obj.h"
     19 #include "obj/reloc.h"
     20 
     21 /* IPLT relocation slot reported by emit_iplt_stub.  Some arches
     22  * (aarch64) cannot encode the stub->slot displacement inline and need
     23  * the linker to generate apply-time fixups; others (x64, rv64) bake
     24  * the displacement directly into the stub bytes and report zero relocs.
     25  *
     26  * Fields are everything the emitter knows; the caller fills in
     27  * link_section_id, write_vaddr / write_file_offset, target sym, addend
     28  * from its own context. */
     29 typedef struct LinkArchIPltReloc {
     30   u32 offset_in_stub; /* byte offset within the 12-byte stub */
     31   u32 width;
     32   RelocKind kind;
     33 } LinkArchIPltReloc;
     34 
     35 /* Context for LinkArchDesc.jit_reloc_relax — the in-process JIT's per-reloc
     36  * "collapse runtime indirection to direct in-image addressing" pass.  The
     37  * single-threaded JIT has no dynamic loader, GOT, or TLV resolver, so the
     38  * arch rewrites the access idiom in place.  The hook reads only the fields
     39  * relevant to the kinds it owns; the rest cost nothing. */
     40 typedef struct JitRelaxCtx {
     41   u8* site;     /* write-alias bytes at the reloc site */
     42   u64 S;        /* resolved runtime symbol address (0 when weak_undef_zero) */
     43   i64 addend;   /* the reloc's addend */
     44   u64 site_pc;  /* runtime address of the reloc site */
     45 
     46   /* Address-of a weak *undefined* symbol must read as 0.  Set when the target
     47    * is weak + SK_ABS + vaddr==0; the arch zeroes the materialized register
     48    * rather than forming a (huge, out-of-range) PC-relative address. */
     49   int weak_undef_zero;
     50 
     51   /* The image has no real GOT — the incremental-append path does not re-run
     52    * GOT layout, so GOT-load relocs must be relaxed to direct in-image
     53    * addressing.  0 for the full from-image link, where .got slots exist and
     54    * GOT loads apply normally (S already points at the slot). */
     55   int got_relaxed;
     56 
     57   /* RISC-V PCREL_LO12 pairs with an AUIPC anchor whose runtime PC-relative
     58    * displacement must be recomputed against the JIT layout.  resolve_pair
     59    * returns that displacement for the anchor at image vaddr `anchor_vaddr`;
     60    * the arch calls it opaquely (the implementation, in link_jit.c, needs
     61    * LinkImage internals the arch must not reach). */
     62   i64 (*resolve_pair)(void* user, u64 anchor_vaddr);
     63   void* resolve_user;
     64   u64 anchor_vaddr;
     65 } JitRelaxCtx;
     66 
     67 typedef struct LinkArchDesc {
     68   /* ---- PLT geometry ----
     69    * All three arches today use a 32-byte PLT0 + 16-byte per-import
     70    * entry, but exposing the sizes keeps the linker free of magic
     71    * numbers and lets a future port pick its own layout. */
     72   u32 plt0_size;
     73   u32 plt_entry_size;
     74 
     75   /* ---- IPLT geometry (ifunc trampolines, layout_iplt) ---- */
     76   u32 iplt_stub_size;
     77 
     78   /* ---- Optional arch-defined linker symbols ----
     79    * Some ABIs reserve a global-pointer anchor.  When non-NULL, the
     80    * top-level layout pass defines the named symbol at the first
     81    * writable segment plus global_pointer_rw_offset. */
     82   const char* global_pointer_symbol;
     83   u64 global_pointer_rw_offset;
     84 
     85   /* ---- Stub emitters ----
     86    * Each writes its full byte range; callers do not need to pre-fill
     87    * the buffer.  All vaddrs are post-shift (final image addresses).
     88    *
     89    * emit_plt0:        writes plt0_size bytes at dst.  PLT0 is the
     90    *                   lazy-resolve trampoline; under DF_1_NOW it's
     91    *                   never executed but is emitted in canonical
     92    *                   form for disassembler / unwinder consumption.
     93    *                   gotplt_vaddr is the base of .got.plt.
     94    *
     95    * emit_plt_entry:   writes plt_entry_size bytes at dst for one
     96    *                   imported function.  entry_vaddr is the
     97    *                   absolute address of this entry; slot_vaddr is
     98    *                   its corresponding .got.plt slot.
     99    *
    100    * emit_iplt_stub:   writes iplt_stub_size bytes for one ifunc
    101    *                   trampoline that loads .igot.plt[i] and tail-
    102    *                   calls.  Returns the number of LinkRelocApply
    103    *                   records the caller must enqueue (0 or 2);
    104    *                   the records' offset_in_stub / width / kind are
    105    *                   populated, the caller fills the rest. */
    106   void (*emit_plt0)(u8* dst, u64 plt0_vaddr, u64 gotplt_vaddr);
    107   void (*emit_plt_entry)(u8* dst, u64 entry_vaddr, u64 slot_vaddr);
    108   u32 (*emit_iplt_stub)(u8* dst, u64 stub_vaddr, u64 slot_vaddr,
    109                         LinkArchIPltReloc out[2]);
    110 
    111   /* This arch's relocation-descriptor slice: width + classification flags
    112    * for each kind it applies, or NULL for kinds it does not own.  The
    113    * arch-aware reloc_desc() dispatcher (link_reloc_desc.c) consults this
    114    * before the neutral table, and the reloc_kind_* predicates read the
    115    * flags — replacing the former is_branch / is_got_load / is_tlvp /
    116    * is_direct_page / needs_jit_call_stub hooks and the generic
    117    * reloc_width / reloc_uses_got / reloc_is_tls_got switches. */
    118   const RelocDesc* (*reloc_desc)(RelocKind);
    119 
    120   /* This arch's instruction-immediate relocation byte encoders: patch the
    121    * `width` bytes at P_bytes for an instruction-embedded kind (imm19/imm26/
    122    * ADRP-page on AArch64; U/I/S/B/J + RVC immediate scatter on RISC-V; the
    123    * pc-relative rel8 on x86-64).  Returns 1 if it owns and applied `k`, 0 if
    124    * `k` is not one of this arch's instruction kinds (the dispatcher then
    125    * panics "unsupported reloc kind").  The arch-neutral data-word kinds
    126    * (the R_ABS / R_REL / R_PC / R_TPOFF writes, the RISC-V data ADD/SUB/SET
    127    * arithmetic, and the ULEB128 codec) are handled by reloc_apply_neutral() in
    128    * the obj core before this hook is consulted — see link_reloc_apply
    129    * (src/link/link_reloc_apply.c). */
    130   int (*reloc_apply_insn)(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A,
    131                           u64 P);
    132 
    133   /* In-process JIT only: relax an ELF Local-Exec TLS access (a reloc whose
    134    * RelocDesc carries RELOC_IS_TLS_LE) to ordinary in-image addressing.  The
    135    * JIT is single-threaded, so the image's in-image .tdata/.tbss is the
    136    * variable's one instance; the per-arch tp-based idiom (mrs tpidr_el0 / add
    137    * tp / mov fs:0) is rewritten in place to address that storage directly,
    138    * dropping the thread-pointer read (which would alias into the host's TLS).
    139    * `site` is the write-alias bytes at the reloc; `storage` the runtime
    140    * address of the variable's in-image storage; `site_pc` the runtime address
    141    * of the reloc site.  Codegen emits the idiom per-access and contiguous, so
    142    * the primary (HI/offset) reloc rewrites the whole idiom and the LO12 half is
    143    * a no-op.  NULL on arches with no ELF Local-Exec TLS. */
    144   void (*jit_tls_le_relax)(Compiler* c, RelocKind k, u8* site, u64 storage,
    145                            u64 site_pc);
    146 
    147   /* In-process JIT only: collapse one relocation's runtime indirection to
    148    * direct in-image addressing — the Mach-O TLV descriptor call, a GOT load
    149    * (append path), the weak-undef-zero materialization, and the RISC-V
    150    * PCREL_LO12 anchor recompute.  The arch owns the instruction byte rewrites
    151    * for exactly the kinds it recognizes (consulting JitRelaxCtx for the
    152    * weak/GOT/pair context).  Returns 1 if it handled `k`, 0 to fall through to
    153    * link_reloc_apply.  NULL on arches with no JIT relaxation beyond TLS LE
    154    * (x86-64). */
    155   int (*jit_reloc_relax)(Compiler* c, RelocKind k, const JitRelaxCtx* ctx);
    156 
    157   /* TLS variant: 1 = variant II (x86-64, tpoff = X - tls_memsz_rounded);
    158    * 0 = variant I (AArch64/RISC-V, tpoff = (X - tls_vaddr) + tcb_bias).
    159    * Consulted by the ELF linker when applying R_TPOFF64. */
    160   u8 tls_variant_ii;
    161 
    162   /* ---- Optional COFF __chkstk stub ----
    163    * Arches that cannot emit inline stack probes (aarch64) carry the bytes of a
    164    * __chkstk function that link_synth_coff_ctor_dtor_list emits into a retained
    165    * .text$chkstk section for PE/COFF targets.  NULL/0 = none: x64 emits inline
    166    * probes, and the RISC-V and wasm arches are not COFF targets. */
    167   const u8* coff_chkstk_bytes;
    168   u32 coff_chkstk_len;
    169 } LinkArchDesc;
    170 
    171 /* Returns NULL for an unsupported arch.  Callers panic with their own
    172  * context-rich message rather than this helper picking one. */
    173 const LinkArchDesc* link_arch_desc_for(const Compiler*);
    174 
    175 #endif