link.h (12711B)
1 #ifndef KIT_INTERNAL_LINK_H 2 #define KIT_INTERNAL_LINK_H 3 4 #include <kit/core.h> 5 #include <kit/jit.h> 6 #include <kit/link.h> 7 8 #include "obj/obj.h" 9 10 typedef struct Linker Linker; 11 typedef struct LinkImage LinkImage; 12 13 struct KitLinkSession { 14 Compiler* c; 15 Linker* linker; 16 KitLinkSessionOptions opts; 17 LinkImage* image; 18 KitObjBuilder** publish_objs; 19 u32 npublish_objs; 20 u32 publish_objs_cap; 21 u32 non_obj_inputs; 22 u8 resolved; 23 u8 linker_transferred; 24 u8 pad[2]; 25 }; 26 27 typedef enum LinkInputKind { 28 LINK_INPUT_OBJ, 29 LINK_INPUT_OBJ_BYTES, 30 LINK_INPUT_ARCHIVE_BYTES, 31 /* Shared-object input (ET_DYN). Parsed via read_elf_dso into an 32 * ObjBuilder containing only the DSO's exported (dynsym) symbols. 33 * Contributes nothing to layout — its symbols are searched by 34 * resolve_undefs to satisfy imported references. */ 35 LINK_INPUT_DSO_BYTES, 36 } LinkInputKind; 37 38 typedef u32 LinkInputId; 39 #define LINK_INPUT_NONE 0u 40 41 typedef u32 LinkSymId; 42 #define LINK_SYM_NONE 0u 43 44 typedef u32 LinkSegmentId; 45 #define LINK_SEG_NONE 0u 46 47 typedef u32 LinkSectionId; 48 #define LINK_SEC_NONE 0u 49 50 typedef struct LinkInput { 51 LinkInputId id; 52 u8 kind; /* LinkInputKind */ 53 u8 pad[3]; 54 u32 order; 55 ObjBuilder* obj; /* for LINK_INPUT_OBJ, otherwise NULL until read */ 56 Sym name; /* diagnostic name for bytes inputs */ 57 /* DSO-only: SONAME extracted from PT_DYNAMIC.DT_SONAME. 0 if absent. 58 * Used as the DT_NEEDED entry for the consuming exe / shared lib — 59 * the runtime loader looks up the dependency by SONAME, not by the 60 * filesystem path passed at link time. */ 61 Sym soname; 62 /* COFF short-import only: the name the loader must resolve in the DLL when 63 * it differs from the symbol's link name (Microsoft short-import NameType 64 * NOPREFIX/UNDECORATE/EXPORTAS — e.g. local __msvcrt_assert -> export 65 * _assert). 0 when the import name equals the symbol name. Consumed by the 66 * COFF import-table synthesis for the PE hint/name-table entry. */ 67 Sym coff_import_name; 68 } LinkInput; 69 70 typedef struct LinkSymbol { 71 LinkSymId id; 72 Sym name; 73 LinkInputId input_id; 74 ObjSymId obj_sym; 75 ObjSecId section_id; 76 ObjAtomId atom_id; 77 u64 value; 78 u64 vaddr; /* final linked address, 0 for unresolved undef */ 79 u64 size; 80 u32 common_align; /* alignment for SK_COMMON symbols */ 81 u8 bind; /* SymBind */ 82 u8 kind; /* SymKind */ 83 u8 defined; 84 /* Dynamic-link bookkeeping. `imported` is set when an undef was 85 * matched against a DSO input's exports — the symbol stays 86 * structurally undefined (the static linker has no value for it) 87 * but resolve_undefs no longer panics on it. `dso_input_id` is the 88 * id of the providing DSO LinkInput; the DSO's SONAME ends up in 89 * the produced image's DT_NEEDED list. The needs_* flags are set 90 * during reloc-rewrite (Phase 5) — declared here so the model is 91 * stable across the dyn-link work. */ 92 u8 imported; 93 LinkInputId dso_input_id; 94 u8 needs_plt; 95 u8 needs_got; 96 u8 needs_copy; 97 u8 pad[5]; 98 } LinkSymbol; 99 100 typedef struct LinkSegment { 101 LinkSegmentId id; 102 u32 flags; /* SecFlag-like permissions after layout */ 103 u64 file_offset; 104 u64 vaddr; 105 u64 mem_size; 106 u64 file_size; 107 u32 align; 108 u32 nsections; 109 } LinkSegment; 110 111 typedef struct LinkSection { 112 LinkSectionId id; 113 LinkInputId input_id; 114 ObjSecId obj_section_id; 115 ObjAtomId obj_atom_id; 116 LinkSegmentId segment_id; 117 u64 obj_offset; 118 u64 input_offset; 119 u64 file_offset; 120 u64 vaddr; 121 u64 size; 122 u32 flags; 123 u32 align; 124 Sym name; /* section name (interned); 0 if anon */ 125 u16 sem; /* SecSem of the source obj section */ 126 /* Non-segment, file-resident section (a .debug_* contribution). It 127 * lives in img->sections so its SK_SECTION symbol resolves and the 128 * reloc engine applies, but it has segment_id == LINK_SEG_NONE and 129 * carries its bytes in the LinkImage debug registry, not a segment 130 * buffer. See link_layout_debug / link_fileonly_bytes. */ 131 u8 file_only; 132 u8 pad; 133 } LinkSection; 134 135 typedef struct LinkRelocApply { 136 LinkInputId input_id; 137 ObjSecId section_id; 138 LinkSectionId link_section_id; 139 u32 offset; 140 u32 width; 141 u64 write_vaddr; 142 u64 write_file_offset; 143 RelocKind kind; 144 LinkSymId target; 145 i64 addend; 146 } LinkRelocApply; 147 148 /* Internal resolver type matches the public KitExternResolver: name is a 149 * C string at the linker boundary. The Linker interns it on entry. */ 150 typedef KitExternResolver LinkExternResolver; 151 152 Linker* link_new(Compiler*); 153 void link_free(Linker*); 154 155 /* Inputs are byte-buffer-shaped. Path-based adapters live in the driver 156 * (see driver/driver.h) and use Compiler.env->file_io to read bytes before 157 * calling these. All bytes inputs must remain alive until link_resolve 158 * returns; ObjBuilder inputs must remain alive until link_image_free. 159 * 160 * `name` is an unowned diagnostic string; the linker interns it on entry 161 * (callers do not need to pre-intern). */ 162 LinkInputId link_add_obj(Linker*, ObjBuilder*); 163 LinkInputId link_add_obj_bytes(Linker*, const char* name, const u8* data, 164 size_t len); 165 /* Shared-object input. The bytes are parsed as ET_DYN ELF; only the 166 * DSO's dynsym (exported symbols) is materialized. The DSO contributes 167 * no sections to the output image — its presence influences resolution 168 * (an undef matched by name against this DSO's exports becomes an 169 * imported symbol) and DT_NEEDED bookkeeping (the DSO's SONAME, or its 170 * filename if no SONAME, is recorded as a runtime dependency). */ 171 LinkInputId link_add_dso_bytes(Linker*, const char* name, const u8* data, 172 size_t len); 173 /* `whole_archive` (nonzero == --whole-archive) and `link_mode` 174 * (KitLinkMode: -Bstatic / -Bdynamic / --as-needed positional state) are 175 * orthogonal per-archive flags. `group_id == 0` means linear single-pass; 176 * archives sharing a nonzero `group_id` are scanned cyclically (equivalent 177 * to GNU ld --start-group ... --end-group). */ 178 LinkInputId link_add_archive_bytes(Linker*, const char* name, const u8* data, 179 size_t len, u8 whole_archive, u8 link_mode, 180 u8 group_id); 181 182 void link_set_entry(Linker*, KitSlice name); 183 void link_clear_entry(Linker*); 184 /* Borrowed reference; the script and every sub-object must outlive 185 * link_resolve. The linker accepts only the structured form — there is no 186 * text-shaped setter. Hosts that have GNU-ld text use 187 * kit_link_script_parse first. */ 188 void link_set_script(Linker*, const KitLinkScript*); 189 void link_set_extern_resolver(Linker*, LinkExternResolver, void* user); 190 /* Enable --gc-sections on this link. Roots are: entry symbol, exported 191 * symbols (shared link), and any section flagged KEEP by the linker 192 * script. Unreferenced sections are dropped from the output. */ 193 void link_set_gc_sections(Linker*, int enable); 194 void link_set_strip_debug(Linker*, int enable); 195 196 /* Mark this link as targeting a static ET_EXEC ELF binary (vs. the 197 * in-process JIT). Setter is called by kit_link_exe; the JIT path 198 * leaves it disabled. Currently controls the IFUNC startup-init 199 * synthesis in layout_iplt: with this flag set, layout appends a 200 * .init_array entry that calls __kit_ifunc_init at exe startup so 201 * .igot.plt slots get filled before user code runs. The JIT pre- 202 * resolves slots in-process and doesn't need the ctor. */ 203 void link_set_emit_static_exe(Linker*, int enable); 204 /* Mark this link as the in-process JIT lane (set by kit_link_jit). 205 * Lets link_resolve tolerate platform undefs the JIT image patches 206 * post-link (currently: Mach-O `__tlv_bootstrap`). Leaves AOT lanes 207 * untouched. */ 208 void link_set_jit_mode(Linker*, int enable); 209 210 /* Mark this link as producing a position-independent ET_DYN exe (-pie). 211 * Triggers Phase 4 layout_dyn pass (synthetic .interp/.dynsym/.dynstr/ 212 * .gnu.hash/.plt/.got.plt/.rela.dyn/.rela.plt/.dynamic) and Phase 6 ELF 213 * emit (e_type=ET_DYN, IMAGE_BASE=0, PT_PHDR/PT_INTERP/PT_DYNAMIC, 214 * R_AARCH64_RELATIVE on internal absolute relocs). Orthogonal to 215 * emit_static_exe; both may be set in the same link (the IFUNC ctor 216 * still wants to run on the exe path regardless of PIE). */ 217 void link_set_pie(Linker*, int enable); 218 /* Override the static ET_EXEC image (text) base, from `kit ld -Ttext ADDR`. No 219 * effect on PIE/shared (base 0) or scripted layout (script pins vaddrs). */ 220 void link_set_text_base(Linker*, u64 base); 221 void link_set_pe_subsystem(Linker*, u16 subsystem); 222 223 /* Runtime loader path written into PT_INTERP / .interp. NULL leaves the 224 * default ("/lib/ld-musl-aarch64.so.1" for aarch64-linux). Only 225 * consulted when -pie is enabled (or any DSO input is present). */ 226 void link_set_interp_path(Linker*, KitSlice path); 227 228 /* Borrowed JIT host. The layout passes read execmem->page_size; the JIT 229 * mapper reads the full host (execmem reserve/protect, tls). NULL on 230 * AOT exe/shared lanes. The host and its sub-tables must outlive the 231 * link / the produced KitJit. */ 232 void link_set_jit_host(Linker*, const KitJitHost*); 233 234 /* Symbol resolution and layout are explicit so file linking and JIT share the 235 * same resolved image. Fatal diagnostics use Compiler.panic. 236 * 237 * link_resolve registers the returned LinkImage with compiler_defer so a 238 * panic between resolve and consumer (emit_writer / jit_from_image) reaps 239 * it. Successful consumers either call link_image_free (which undefers and 240 * frees) or transfer ownership via kit_jit_from_image (which undefers and 241 * keeps the image alive for the JIT's lifetime). 242 * 243 * ---- Incremental-linking invariant (forward compat) ---- 244 * The single-shot link_resolve implementation must not destroy or consume 245 * input-side state that a future incremental re-resolve would need. 246 * Specifically: 247 * - LinkRelocApply records stay as data: do not burn them into segment 248 * bytes destructively without preserving the originals. 249 * - LinkInputId -> ObjBuilder* mappings stay stable for the lifetime of 250 * the Linker — adding an input never invalidates an existing handle. 251 * - Resolution is structured as a function from inputs to a fresh 252 * LinkImage, not as in-place mutation of the Linker. 253 * Incremental linking is the single most likely future addition; this 254 * comment locks in the implementation discipline that keeps the existing 255 * surface amenable, with no speculative API. */ 256 LinkImage* link_resolve(Linker*); 257 258 /* Incremental resolution (per doc/EMU.md §6). link_resolve_at reserves 259 * the image's layout starting at the caller-specified base VA — used 260 * by the emu so the JIT image's host addresses are stable for the 261 * session (chaining patches live host code with section addresses). 262 * link_resolve_extend appends new inputs to an existing image: places 263 * new sections at the next free offset within the reserved region, 264 * resolves new symbols against the existing image's globals plus the 265 * registered LinkExternResolver, and applies new relocations. It 266 * MUST NOT change host addresses of previously placed sections — 267 * chaining and the code cache depend on it. The image must have been 268 * produced by a prior link_resolve_at call on the same Linker. */ 269 LinkImage* link_resolve_at(Linker*, uintptr_t base_va); 270 void link_resolve_extend(Linker*, LinkImage*); 271 272 void link_image_free(LinkImage*); 273 const LinkSymbol* link_symbol(LinkImage*, LinkSymId); 274 LinkSymId link_symbol_lookup(LinkImage*, Sym name); 275 u32 link_segment_count(LinkImage*); 276 const LinkSegment* link_segment_get(LinkImage*, u32 id); 277 const u8* link_segment_bytes(LinkImage*, LinkSegmentId, size_t* size_out); 278 u32 link_section_count(LinkImage*); 279 const LinkSection* link_section_get(LinkImage*, LinkSectionId id); 280 u32 link_reloc_apply_count(LinkImage*); 281 const LinkRelocApply* link_reloc_apply_get(LinkImage*, u32 id); 282 283 /* Writes an executable in the format implied by Compiler.target into the 284 * caller-provided Writer. Path-based emit lives in the driver. */ 285 void link_emit_image_writer(LinkImage*, Writer*); 286 287 /* Writes an ET_REL / MH_OBJECT relocatable partial-link output. This consumes 288 * the Linker's object/archive inputs and emits a fresh ObjBuilder through the 289 * active object-format writer; it does not perform executable layout, section 290 * GC, entry resolution, GOT/IPLT synthesis, or DSO binding. */ 291 void link_emit_relocatable_writer(Linker*, Writer*); 292 293 /* JIT: maps the image into executable memory and returns an owning handle. 294 * The returned KitJit takes ownership of the LinkImage (undefers it from 295 * the cleanup stack registered by link_resolve); on kit_jit_free both the 296 * JIT mapping and the LinkImage are released. Lookup is by name; the public 297 * `kit_jit_lookup` and `kit_jit_free` declarations live in 298 * <kit/jit.h>. */ 299 KitJit* kit_jit_from_image(LinkImage*); 300 301 #endif