kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

link.h (12711B)


      1 #ifndef KIT_INTERNAL_LINK_H
      2 #define KIT_INTERNAL_LINK_H
      3 
      4 #include <kit/core.h>
      5 #include <kit/jit.h>
      6 #include <kit/link.h>
      7 
      8 #include "obj/obj.h"
      9 
     10 typedef struct Linker Linker;
     11 typedef struct LinkImage LinkImage;
     12 
     13 struct KitLinkSession {
     14   Compiler* c;
     15   Linker* linker;
     16   KitLinkSessionOptions opts;
     17   LinkImage* image;
     18   KitObjBuilder** publish_objs;
     19   u32 npublish_objs;
     20   u32 publish_objs_cap;
     21   u32 non_obj_inputs;
     22   u8 resolved;
     23   u8 linker_transferred;
     24   u8 pad[2];
     25 };
     26 
     27 typedef enum LinkInputKind {
     28   LINK_INPUT_OBJ,
     29   LINK_INPUT_OBJ_BYTES,
     30   LINK_INPUT_ARCHIVE_BYTES,
     31   /* Shared-object input (ET_DYN). Parsed via read_elf_dso into an
     32    * ObjBuilder containing only the DSO's exported (dynsym) symbols.
     33    * Contributes nothing to layout — its symbols are searched by
     34    * resolve_undefs to satisfy imported references. */
     35   LINK_INPUT_DSO_BYTES,
     36 } LinkInputKind;
     37 
     38 typedef u32 LinkInputId;
     39 #define LINK_INPUT_NONE 0u
     40 
     41 typedef u32 LinkSymId;
     42 #define LINK_SYM_NONE 0u
     43 
     44 typedef u32 LinkSegmentId;
     45 #define LINK_SEG_NONE 0u
     46 
     47 typedef u32 LinkSectionId;
     48 #define LINK_SEC_NONE 0u
     49 
     50 typedef struct LinkInput {
     51   LinkInputId id;
     52   u8 kind; /* LinkInputKind */
     53   u8 pad[3];
     54   u32 order;
     55   ObjBuilder* obj; /* for LINK_INPUT_OBJ, otherwise NULL until read */
     56   Sym name;        /* diagnostic name for bytes inputs */
     57   /* DSO-only: SONAME extracted from PT_DYNAMIC.DT_SONAME. 0 if absent.
     58    * Used as the DT_NEEDED entry for the consuming exe / shared lib —
     59    * the runtime loader looks up the dependency by SONAME, not by the
     60    * filesystem path passed at link time. */
     61   Sym soname;
     62   /* COFF short-import only: the name the loader must resolve in the DLL when
     63    * it differs from the symbol's link name (Microsoft short-import NameType
     64    * NOPREFIX/UNDECORATE/EXPORTAS — e.g. local __msvcrt_assert -> export
     65    * _assert). 0 when the import name equals the symbol name. Consumed by the
     66    * COFF import-table synthesis for the PE hint/name-table entry. */
     67   Sym coff_import_name;
     68 } LinkInput;
     69 
     70 typedef struct LinkSymbol {
     71   LinkSymId id;
     72   Sym name;
     73   LinkInputId input_id;
     74   ObjSymId obj_sym;
     75   ObjSecId section_id;
     76   ObjAtomId atom_id;
     77   u64 value;
     78   u64 vaddr; /* final linked address, 0 for unresolved undef */
     79   u64 size;
     80   u32 common_align; /* alignment for SK_COMMON symbols */
     81   u8 bind;          /* SymBind */
     82   u8 kind;          /* SymKind */
     83   u8 defined;
     84   /* Dynamic-link bookkeeping. `imported` is set when an undef was
     85    * matched against a DSO input's exports — the symbol stays
     86    * structurally undefined (the static linker has no value for it)
     87    * but resolve_undefs no longer panics on it. `dso_input_id` is the
     88    * id of the providing DSO LinkInput; the DSO's SONAME ends up in
     89    * the produced image's DT_NEEDED list. The needs_* flags are set
     90    * during reloc-rewrite (Phase 5) — declared here so the model is
     91    * stable across the dyn-link work. */
     92   u8 imported;
     93   LinkInputId dso_input_id;
     94   u8 needs_plt;
     95   u8 needs_got;
     96   u8 needs_copy;
     97   u8 pad[5];
     98 } LinkSymbol;
     99 
    100 typedef struct LinkSegment {
    101   LinkSegmentId id;
    102   u32 flags; /* SecFlag-like permissions after layout */
    103   u64 file_offset;
    104   u64 vaddr;
    105   u64 mem_size;
    106   u64 file_size;
    107   u32 align;
    108   u32 nsections;
    109 } LinkSegment;
    110 
    111 typedef struct LinkSection {
    112   LinkSectionId id;
    113   LinkInputId input_id;
    114   ObjSecId obj_section_id;
    115   ObjAtomId obj_atom_id;
    116   LinkSegmentId segment_id;
    117   u64 obj_offset;
    118   u64 input_offset;
    119   u64 file_offset;
    120   u64 vaddr;
    121   u64 size;
    122   u32 flags;
    123   u32 align;
    124   Sym name; /* section name (interned); 0 if anon */
    125   u16 sem;  /* SecSem of the source obj section */
    126   /* Non-segment, file-resident section (a .debug_* contribution). It
    127    * lives in img->sections so its SK_SECTION symbol resolves and the
    128    * reloc engine applies, but it has segment_id == LINK_SEG_NONE and
    129    * carries its bytes in the LinkImage debug registry, not a segment
    130    * buffer. See link_layout_debug / link_fileonly_bytes. */
    131   u8 file_only;
    132   u8 pad;
    133 } LinkSection;
    134 
    135 typedef struct LinkRelocApply {
    136   LinkInputId input_id;
    137   ObjSecId section_id;
    138   LinkSectionId link_section_id;
    139   u32 offset;
    140   u32 width;
    141   u64 write_vaddr;
    142   u64 write_file_offset;
    143   RelocKind kind;
    144   LinkSymId target;
    145   i64 addend;
    146 } LinkRelocApply;
    147 
    148 /* Internal resolver type matches the public KitExternResolver: name is a
    149  * C string at the linker boundary. The Linker interns it on entry. */
    150 typedef KitExternResolver LinkExternResolver;
    151 
    152 Linker* link_new(Compiler*);
    153 void link_free(Linker*);
    154 
    155 /* Inputs are byte-buffer-shaped. Path-based adapters live in the driver
    156  * (see driver/driver.h) and use Compiler.env->file_io to read bytes before
    157  * calling these. All bytes inputs must remain alive until link_resolve
    158  * returns; ObjBuilder inputs must remain alive until link_image_free.
    159  *
    160  * `name` is an unowned diagnostic string; the linker interns it on entry
    161  * (callers do not need to pre-intern). */
    162 LinkInputId link_add_obj(Linker*, ObjBuilder*);
    163 LinkInputId link_add_obj_bytes(Linker*, const char* name, const u8* data,
    164                                size_t len);
    165 /* Shared-object input. The bytes are parsed as ET_DYN ELF; only the
    166  * DSO's dynsym (exported symbols) is materialized. The DSO contributes
    167  * no sections to the output image — its presence influences resolution
    168  * (an undef matched by name against this DSO's exports becomes an
    169  * imported symbol) and DT_NEEDED bookkeeping (the DSO's SONAME, or its
    170  * filename if no SONAME, is recorded as a runtime dependency). */
    171 LinkInputId link_add_dso_bytes(Linker*, const char* name, const u8* data,
    172                                size_t len);
    173 /* `whole_archive` (nonzero == --whole-archive) and `link_mode`
    174  * (KitLinkMode: -Bstatic / -Bdynamic / --as-needed positional state) are
    175  * orthogonal per-archive flags. `group_id == 0` means linear single-pass;
    176  * archives sharing a nonzero `group_id` are scanned cyclically (equivalent
    177  * to GNU ld --start-group ... --end-group). */
    178 LinkInputId link_add_archive_bytes(Linker*, const char* name, const u8* data,
    179                                    size_t len, u8 whole_archive, u8 link_mode,
    180                                    u8 group_id);
    181 
    182 void link_set_entry(Linker*, KitSlice name);
    183 void link_clear_entry(Linker*);
    184 /* Borrowed reference; the script and every sub-object must outlive
    185  * link_resolve. The linker accepts only the structured form — there is no
    186  * text-shaped setter. Hosts that have GNU-ld text use
    187  * kit_link_script_parse first. */
    188 void link_set_script(Linker*, const KitLinkScript*);
    189 void link_set_extern_resolver(Linker*, LinkExternResolver, void* user);
    190 /* Enable --gc-sections on this link. Roots are: entry symbol, exported
    191  * symbols (shared link), and any section flagged KEEP by the linker
    192  * script. Unreferenced sections are dropped from the output. */
    193 void link_set_gc_sections(Linker*, int enable);
    194 void link_set_strip_debug(Linker*, int enable);
    195 
    196 /* Mark this link as targeting a static ET_EXEC ELF binary (vs. the
    197  * in-process JIT).  Setter is called by kit_link_exe; the JIT path
    198  * leaves it disabled.  Currently controls the IFUNC startup-init
    199  * synthesis in layout_iplt: with this flag set, layout appends a
    200  * .init_array entry that calls __kit_ifunc_init at exe startup so
    201  * .igot.plt slots get filled before user code runs.  The JIT pre-
    202  * resolves slots in-process and doesn't need the ctor. */
    203 void link_set_emit_static_exe(Linker*, int enable);
    204 /* Mark this link as the in-process JIT lane (set by kit_link_jit).
    205  * Lets link_resolve tolerate platform undefs the JIT image patches
    206  * post-link (currently: Mach-O `__tlv_bootstrap`).  Leaves AOT lanes
    207  * untouched. */
    208 void link_set_jit_mode(Linker*, int enable);
    209 
    210 /* Mark this link as producing a position-independent ET_DYN exe (-pie).
    211  * Triggers Phase 4 layout_dyn pass (synthetic .interp/.dynsym/.dynstr/
    212  * .gnu.hash/.plt/.got.plt/.rela.dyn/.rela.plt/.dynamic) and Phase 6 ELF
    213  * emit (e_type=ET_DYN, IMAGE_BASE=0, PT_PHDR/PT_INTERP/PT_DYNAMIC,
    214  * R_AARCH64_RELATIVE on internal absolute relocs). Orthogonal to
    215  * emit_static_exe; both may be set in the same link (the IFUNC ctor
    216  * still wants to run on the exe path regardless of PIE). */
    217 void link_set_pie(Linker*, int enable);
    218 /* Override the static ET_EXEC image (text) base, from `kit ld -Ttext ADDR`. No
    219  * effect on PIE/shared (base 0) or scripted layout (script pins vaddrs). */
    220 void link_set_text_base(Linker*, u64 base);
    221 void link_set_pe_subsystem(Linker*, u16 subsystem);
    222 
    223 /* Runtime loader path written into PT_INTERP / .interp. NULL leaves the
    224  * default ("/lib/ld-musl-aarch64.so.1" for aarch64-linux). Only
    225  * consulted when -pie is enabled (or any DSO input is present). */
    226 void link_set_interp_path(Linker*, KitSlice path);
    227 
    228 /* Borrowed JIT host. The layout passes read execmem->page_size; the JIT
    229  * mapper reads the full host (execmem reserve/protect, tls). NULL on
    230  * AOT exe/shared lanes. The host and its sub-tables must outlive the
    231  * link / the produced KitJit. */
    232 void link_set_jit_host(Linker*, const KitJitHost*);
    233 
    234 /* Symbol resolution and layout are explicit so file linking and JIT share the
    235  * same resolved image. Fatal diagnostics use Compiler.panic.
    236  *
    237  * link_resolve registers the returned LinkImage with compiler_defer so a
    238  * panic between resolve and consumer (emit_writer / jit_from_image) reaps
    239  * it. Successful consumers either call link_image_free (which undefers and
    240  * frees) or transfer ownership via kit_jit_from_image (which undefers and
    241  * keeps the image alive for the JIT's lifetime).
    242  *
    243  * ---- Incremental-linking invariant (forward compat) ----
    244  * The single-shot link_resolve implementation must not destroy or consume
    245  * input-side state that a future incremental re-resolve would need.
    246  * Specifically:
    247  *   - LinkRelocApply records stay as data: do not burn them into segment
    248  *     bytes destructively without preserving the originals.
    249  *   - LinkInputId -> ObjBuilder* mappings stay stable for the lifetime of
    250  *     the Linker — adding an input never invalidates an existing handle.
    251  *   - Resolution is structured as a function from inputs to a fresh
    252  *     LinkImage, not as in-place mutation of the Linker.
    253  * Incremental linking is the single most likely future addition; this
    254  * comment locks in the implementation discipline that keeps the existing
    255  * surface amenable, with no speculative API. */
    256 LinkImage* link_resolve(Linker*);
    257 
    258 /* Incremental resolution (per doc/EMU.md §6). link_resolve_at reserves
    259  * the image's layout starting at the caller-specified base VA — used
    260  * by the emu so the JIT image's host addresses are stable for the
    261  * session (chaining patches live host code with section addresses).
    262  * link_resolve_extend appends new inputs to an existing image: places
    263  * new sections at the next free offset within the reserved region,
    264  * resolves new symbols against the existing image's globals plus the
    265  * registered LinkExternResolver, and applies new relocations. It
    266  * MUST NOT change host addresses of previously placed sections —
    267  * chaining and the code cache depend on it. The image must have been
    268  * produced by a prior link_resolve_at call on the same Linker. */
    269 LinkImage* link_resolve_at(Linker*, uintptr_t base_va);
    270 void link_resolve_extend(Linker*, LinkImage*);
    271 
    272 void link_image_free(LinkImage*);
    273 const LinkSymbol* link_symbol(LinkImage*, LinkSymId);
    274 LinkSymId link_symbol_lookup(LinkImage*, Sym name);
    275 u32 link_segment_count(LinkImage*);
    276 const LinkSegment* link_segment_get(LinkImage*, u32 id);
    277 const u8* link_segment_bytes(LinkImage*, LinkSegmentId, size_t* size_out);
    278 u32 link_section_count(LinkImage*);
    279 const LinkSection* link_section_get(LinkImage*, LinkSectionId id);
    280 u32 link_reloc_apply_count(LinkImage*);
    281 const LinkRelocApply* link_reloc_apply_get(LinkImage*, u32 id);
    282 
    283 /* Writes an executable in the format implied by Compiler.target into the
    284  * caller-provided Writer. Path-based emit lives in the driver. */
    285 void link_emit_image_writer(LinkImage*, Writer*);
    286 
    287 /* Writes an ET_REL / MH_OBJECT relocatable partial-link output. This consumes
    288  * the Linker's object/archive inputs and emits a fresh ObjBuilder through the
    289  * active object-format writer; it does not perform executable layout, section
    290  * GC, entry resolution, GOT/IPLT synthesis, or DSO binding. */
    291 void link_emit_relocatable_writer(Linker*, Writer*);
    292 
    293 /* JIT: maps the image into executable memory and returns an owning handle.
    294  * The returned KitJit takes ownership of the LinkImage (undefers it from
    295  * the cleanup stack registered by link_resolve); on kit_jit_free both the
    296  * JIT mapping and the LinkImage are released. Lookup is by name; the public
    297  * `kit_jit_lookup` and `kit_jit_free` declarations live in
    298  * <kit/jit.h>. */
    299 KitJit* kit_jit_from_image(LinkImage*);
    300 
    301 #endif