link_resolve.c (44398B)
1 /* link_resolve.c — archive ingest, symbol resolution, --gc-sections liveness. 2 * 3 * Phase 1 of the link pipeline: 4 * link_ingest_archives — pull archive members into l->inputs 5 * link_resolve_symbols — register every ObjSym, build img->globals 6 * link_resolve_undefs — satisfy remaining undefs (globals/DSOs/resolver) 7 * link_gc_compute — mark live sections (or mark all live if disabled) 8 * link_gc_drop_dead_globals — clear `defined` on syms in dropped sections 9 */ 10 11 #include <kit/core.h> 12 #include <stdlib.h> 13 #include <string.h> 14 15 #include "core/buf.h" 16 #include "core/bytes.h" 17 #include "core/heap.h" 18 #include "core/pool.h" 19 #include "core/slice.h" 20 #include "core/util.h" 21 #include "core/vec.h" 22 #include "link/link.h" 23 #include "link/link_arch.h" 24 #include "link/link_internal.h" 25 26 /* ---- per-input symbol/section maps ---- */ 27 28 typedef struct AtomSortRec { 29 ObjAtomId id; 30 ObjSecId section_id; 31 u32 offset; 32 } AtomSortRec; 33 34 static int atom_sort_rec_cmp(const void* av, const void* bv) { 35 const AtomSortRec* a = (const AtomSortRec*)av; 36 const AtomSortRec* b = (const AtomSortRec*)bv; 37 if (a->section_id < b->section_id) return -1; 38 if (a->section_id > b->section_id) return 1; 39 if (a->offset < b->offset) return -1; 40 if (a->offset > b->offset) return 1; 41 if (a->id < b->id) return -1; 42 if (a->id > b->id) return 1; 43 return 0; 44 } 45 46 static ObjAtomId input_map_find_atom(const InputMap* m, ObjBuilder* ob, 47 ObjSecId sid, u32 offset) { 48 u32 first, count, i; 49 if (!link_input_section_has_atoms(m, sid)) return OBJ_ATOM_NONE; 50 link_input_section_atoms(m, sid, &first, &count); 51 for (i = 0; i < count; ++i) { 52 ObjAtomId aid = m->section_atom_ids[first + i]; 53 const ObjAtom* a = obj_atom_get(ob, aid); 54 u64 begin, end; 55 if (!a || a->removed) continue; 56 begin = a->offset; 57 end = begin + a->size; 58 if (a->size != 0 && (u64)offset >= begin && (u64)offset < end) return aid; 59 } 60 for (i = 0; i < count; ++i) { 61 ObjAtomId aid = m->section_atom_ids[first + i]; 62 const ObjAtom* a = obj_atom_get(ob, aid); 63 if (!a || a->removed) continue; 64 if (a->size == 0 && offset == a->offset) return aid; 65 } 66 return OBJ_ATOM_NONE; 67 } 68 69 static ObjAtomId input_map_find_symbol_atom(const InputMap* m, ObjBuilder* ob, 70 ObjSymId sym) { 71 const ObjSym* s; 72 ObjAtomId aid; 73 u32 i; 74 if (!ob || sym == OBJ_SYM_NONE) return OBJ_ATOM_NONE; 75 s = obj_symbol_get(ob, sym); 76 if (!s || s->section_id == OBJ_SEC_NONE) return OBJ_ATOM_NONE; 77 aid = input_map_find_atom(m, ob, s->section_id, (u32)s->value); 78 if (aid != OBJ_ATOM_NONE) return aid; 79 for (i = 0; i < m->nsection_atom_ids; ++i) { 80 const ObjAtom* a = obj_atom_get(ob, m->section_atom_ids[i]); 81 if (a && !a->removed && a->signature == sym) return m->section_atom_ids[i]; 82 } 83 return OBJ_ATOM_NONE; 84 } 85 86 void link_input_map_alloc(LinkImage* img, InputMap* m, ObjBuilder* ob, 87 u32 nsym) { 88 Heap* h = img->heap; 89 u32 nsection = obj_section_count(ob); 90 u32 natom = obj_atom_count(ob); 91 u32 nreloc = obj_reloc_total(ob); 92 AtomSortRec* atoms = NULL; 93 u32 nactive = 0; 94 u32 i; 95 96 memset(m, 0, sizeof(*m)); 97 m->nsym = nsym; 98 m->sym = (LinkSymId*)h->alloc(h, sizeof(*m->sym) * nsym, _Alignof(LinkSymId)); 99 if (!m->sym) 100 compiler_panic(img->c, SRCLOC_NONE, "link: oom on input symbol map"); 101 memset(m->sym, 0, sizeof(*m->sym) * nsym); 102 m->nsection = nsection; 103 m->section = (LinkSectionId*)h->alloc(h, sizeof(*m->section) * nsection, 104 _Alignof(LinkSectionId)); 105 if (!m->section) 106 compiler_panic(img->c, SRCLOC_NONE, "link: oom on input section map"); 107 memset(m->section, 0, sizeof(*m->section) * nsection); 108 m->natom = natom; 109 m->atom = (LinkSectionId*)h->alloc(h, sizeof(*m->atom) * (natom ? natom : 1u), 110 _Alignof(LinkSectionId)); 111 if (!m->atom) 112 compiler_panic(img->c, SRCLOC_NONE, "link: oom on input atom map"); 113 memset(m->atom, 0, sizeof(*m->atom) * (natom ? natom : 1u)); 114 m->sym_atom = (ObjAtomId*)h->alloc( 115 h, sizeof(*m->sym_atom) * (nsym ? nsym : 1u), _Alignof(ObjAtomId)); 116 if (!m->sym_atom) 117 compiler_panic(img->c, SRCLOC_NONE, "link: oom on input symbol atom map"); 118 memset(m->sym_atom, 0, sizeof(*m->sym_atom) * (nsym ? nsym : 1u)); 119 m->nreloc = nreloc; 120 m->reloc_atom = (ObjAtomId*)h->alloc( 121 h, sizeof(*m->reloc_atom) * (nreloc ? nreloc : 1u), _Alignof(ObjAtomId)); 122 if (!m->reloc_atom) 123 compiler_panic(img->c, SRCLOC_NONE, "link: oom on input reloc atom map"); 124 memset(m->reloc_atom, 0, sizeof(*m->reloc_atom) * (nreloc ? nreloc : 1u)); 125 m->section_has_atoms = (u8*)h->alloc(h, nsection ? nsection : 1u, 1); 126 if (!m->section_has_atoms) 127 compiler_panic(img->c, SRCLOC_NONE, "link: oom on input section atom map"); 128 memset(m->section_has_atoms, 0, nsection ? nsection : 1u); 129 m->section_atom_first = (u32*)h->alloc( 130 h, sizeof(*m->section_atom_first) * (nsection ? nsection : 1u), 131 _Alignof(u32)); 132 m->section_atom_count = (u32*)h->alloc( 133 h, sizeof(*m->section_atom_count) * (nsection ? nsection : 1u), 134 _Alignof(u32)); 135 if (!m->section_atom_first || !m->section_atom_count) 136 compiler_panic(img->c, SRCLOC_NONE, 137 "link: oom on input section atom ranges"); 138 memset(m->section_atom_first, 0, 139 sizeof(*m->section_atom_first) * (nsection ? nsection : 1u)); 140 memset(m->section_atom_count, 0, 141 sizeof(*m->section_atom_count) * (nsection ? nsection : 1u)); 142 m->comdat_discarded = (u8*)h->alloc(h, nsection ? nsection : 1u, 1); 143 if (!m->comdat_discarded) 144 compiler_panic(img->c, SRCLOC_NONE, "link: oom on input comdat map"); 145 memset(m->comdat_discarded, 0, nsection ? nsection : 1u); 146 147 if (natom > 1u) { 148 atoms = (AtomSortRec*)h->alloc(h, sizeof(*atoms) * natom, 149 _Alignof(AtomSortRec)); 150 if (!atoms) 151 compiler_panic(img->c, SRCLOC_NONE, "link: oom on atom sort map"); 152 for (i = 1; i < natom; ++i) { 153 const ObjAtom* a = obj_atom_get(ob, (ObjAtomId)i); 154 if (!a || a->removed || a->section_id == OBJ_SEC_NONE || 155 a->section_id >= nsection) 156 continue; 157 atoms[nactive].id = (ObjAtomId)i; 158 atoms[nactive].section_id = a->section_id; 159 atoms[nactive].offset = a->offset; 160 ++nactive; 161 } 162 if (nactive > 1u) qsort(atoms, nactive, sizeof(*atoms), atom_sort_rec_cmp); 163 } 164 165 m->nsection_atom_ids = nactive; 166 if (nactive) { 167 ObjSecId cur = OBJ_SEC_NONE; 168 m->section_atom_ids = (ObjAtomId*)h->alloc( 169 h, sizeof(*m->section_atom_ids) * nactive, _Alignof(ObjAtomId)); 170 if (!m->section_atom_ids) 171 compiler_panic(img->c, SRCLOC_NONE, "link: oom on section atom ids"); 172 for (i = 0; i < nactive; ++i) { 173 ObjSecId sid = atoms[i].section_id; 174 m->section_atom_ids[i] = atoms[i].id; 175 if (sid != cur) { 176 m->section_has_atoms[sid] = 1; 177 m->section_atom_first[sid] = i; 178 cur = sid; 179 } 180 m->section_atom_count[sid]++; 181 } 182 } 183 if (atoms) h->free(h, atoms, sizeof(*atoms) * natom); 184 185 for (i = 1; i < nsym; ++i) 186 m->sym_atom[i] = input_map_find_symbol_atom(m, ob, (ObjSymId)i); 187 for (i = 0; i < nreloc; ++i) { 188 const Reloc* r = obj_reloc_at(ob, i); 189 if (!r || r->section_id == OBJ_SEC_NONE) continue; 190 m->reloc_atom[i] = input_map_find_atom(m, ob, r->section_id, r->offset); 191 } 192 } 193 194 /* ---- pass 1: collect symbols ---- */ 195 196 /* A symbol with no home section and no absolute/common pseudo-value: the 197 * importer's view of an undefined reference (or a DSO export). Distinct 198 * from link_sym_is_def — SK_FILE symbols are logical undefs here but 199 * defs there. Used only by scan_presence_before to split logical undefs 200 * from logical defs; the spurious-undef prune routes through the shared 201 * link_sym_is_spurious_undef. */ 202 static int obj_sym_is_logical_undef(const ObjSym* s) { 203 return s && s->section_id == OBJ_SEC_NONE && s->kind != SK_ABS && 204 s->kind != SK_COMMON; 205 } 206 207 /* COFF/PE SELECTANY: a duplicate strong global is acceptable iff both 208 * definitions live in COMDAT (SF_GROUP-tagged) sections. When that 209 * holds, the earlier-processed definition wins and the new section is 210 * marked for discard so its bytes never reach layout. */ 211 static int obj_sym_defined_in_comdat(ObjBuilder* ob, const ObjSym* s) { 212 const Section* sec; 213 if (!s || s->section_id == OBJ_SEC_NONE) return 0; 214 sec = obj_section_get(ob, s->section_id); 215 return sec && (sec->flags & SF_GROUP); 216 } 217 218 void link_resolve_symbols(Linker* l, LinkImage* img) { 219 u32 ii; 220 for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { 221 LinkInput* in = LinkInputs_at(&l->inputs, ii); 222 ObjBuilder* ob = in->obj; 223 InputMap* m = &img->input_maps[ii]; 224 u32 nsym = obj_section_count(ob); 225 (void)nsym; 226 ObjSymIter* it; 227 ObjSymEntry e; 228 229 if (in->kind == LINK_INPUT_DSO_BYTES) continue; 230 231 u32 nsyms_in_input = 0; 232 it = obj_symiter_new(ob); 233 while (obj_symiter_next(it, &e)) ++nsyms_in_input; 234 obj_symiter_free(it); 235 236 link_input_map_alloc(img, m, ob, 237 nsyms_in_input + 1u /* +1 for id-0 slot */); 238 239 it = obj_symiter_new(ob); 240 while (obj_symiter_next(it, &e)) { 241 const ObjSym* s = e.sym; 242 LinkSymbol rec; 243 LinkSymId existing; 244 if (link_sym_is_spurious_undef(s)) continue; 245 int is_def = link_sym_is_def(s); 246 247 memset(&rec, 0, sizeof(rec)); 248 rec.name = s->name; 249 rec.input_id = in->id; 250 rec.obj_sym = e.id; 251 rec.section_id = LINK_SEC_NONE; 252 rec.atom_id = is_def ? link_input_sym_atom(m, e.id) : OBJ_ATOM_NONE; 253 rec.value = s->value; 254 rec.size = s->size; 255 rec.common_align = (s->kind == SK_COMMON) ? (u32)s->common_align : 0u; 256 rec.bind = (u8)s->bind; 257 rec.kind = (u8)s->kind; 258 rec.defined = (u8)is_def; 259 rec.vaddr = 0; 260 261 if (is_def && (s->bind == SB_GLOBAL || s->bind == SB_WEAK) && 262 s->name != 0) { 263 LinkSymId fresh = (LinkSymId)(LinkSyms_count(&img->syms) + 1u); 264 if (symhash_insert(&img->globals, s->name, fresh, &existing)) { 265 m->sym[e.id] = link_append_symbol(img, &rec); 266 } else { 267 /* A second definition of an existing global/weak name: hand the 268 * binding-precedence decision to the shared policy module. The 269 * COMDAT lookup (does prev's section carry SF_GROUP?) is the 270 * caller-side bookkeeping symresolve deliberately leaves out. */ 271 LinkSymbol* prev = LinkSyms_at(&img->syms, existing - 1); 272 ObjBuilder* prev_ob = 273 (prev->input_id != LINK_INPUT_NONE) 274 ? LinkInputs_at(&l->inputs, prev->input_id - 1)->obj 275 : NULL; 276 const ObjSym* prev_os = 277 prev_ob ? obj_symbol_get(prev_ob, prev->obj_sym) : NULL; 278 SymAttrs ex_a = {0}; 279 SymAttrs inc_a = {0}; 280 SymMergeResult mr; 281 ex_a.bind = prev->bind; 282 ex_a.kind = prev->kind; 283 ex_a.size = prev->size; 284 ex_a.common_align = prev->common_align; 285 ex_a.in_comdat = (prev_ob && prev_os) 286 ? (u8)obj_sym_defined_in_comdat(prev_ob, prev_os) 287 : 0u; 288 inc_a.bind = rec.bind; 289 inc_a.kind = rec.kind; 290 inc_a.size = rec.size; 291 inc_a.common_align = rec.common_align; 292 inc_a.in_comdat = (u8)obj_sym_defined_in_comdat(ob, s); 293 mr = symresolve_merge(ex_a, inc_a); 294 switch (mr.kind) { 295 case SYM_MERGE_REPLACE: 296 rec.id = existing; 297 *prev = rec; 298 m->sym[e.id] = existing; 299 break; 300 case SYM_MERGE_COMMON: 301 rec.id = existing; 302 rec.common_align = mr.merged_align; 303 *prev = rec; 304 m->sym[e.id] = existing; 305 break; 306 case SYM_MERGE_COMDAT_DISCARD: 307 m->sym[e.id] = existing; 308 if (s->section_id < m->nsection) 309 m->comdat_discarded[s->section_id] = 1; 310 break; 311 case SYM_MERGE_ODR_ERROR: { 312 Slice nm_s = pool_slice(l->c->global, s->name); 313 compiler_panic(l->c, SRCLOC_NONE, 314 "link: duplicate definition of " 315 "global symbol '%.*s'", 316 (int)nm_s.len, nm_s.s); 317 break; 318 } 319 case SYM_MERGE_KEEP_EXISTING: 320 default: 321 m->sym[e.id] = existing; 322 break; 323 } 324 } 325 } else { 326 m->sym[e.id] = link_append_symbol(img, &rec); 327 } 328 } 329 obj_symiter_free(it); 330 } 331 } 332 333 /* Search DSO inputs for an exported symbol matching `name`. */ 334 static LinkInputId find_dso_export(Linker* l, Sym name) { 335 u32 ii; 336 ObjSymIter* it; 337 ObjSymEntry e; 338 if (name == 0) return LINK_INPUT_NONE; 339 for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { 340 LinkInput* in = LinkInputs_at(&l->inputs, ii); 341 if (in->kind != LINK_INPUT_DSO_BYTES) continue; 342 it = obj_symiter_new(in->obj); 343 while (obj_symiter_next(it, &e)) { 344 const ObjSym* s = e.sym; 345 if (s->name != name) continue; 346 if (s->kind == SK_UNDEF) continue; 347 if (s->bind == SB_LOCAL) continue; 348 obj_symiter_free(it); 349 return in->id; 350 } 351 obj_symiter_free(it); 352 } 353 return LINK_INPUT_NONE; 354 } 355 356 /* Resolve undefined symbol `s` to the symbol named `alias` (a defined image 357 * global or a DSO export), copying the target's binding into `s`. Returns 1 on 358 * success. Shared by the recorded-alias path and the underscore heuristic. */ 359 static int resolve_to_alias(Linker* l, LinkImage* img, LinkSymbol* s, 360 Sym alias) { 361 if (alias == 0) return 0; 362 LinkSymId hit = symhash_get(&img->globals, alias); 363 if (hit != LINK_SYM_NONE) { 364 LinkSymbol* def = LinkSyms_at(&img->syms, hit - 1); 365 if (def->defined || def->imported) { 366 s->name = def->name; 367 s->section_id = def->section_id; 368 s->value = def->value; 369 s->vaddr = def->vaddr; 370 s->kind = def->kind; 371 s->defined = def->defined; 372 s->imported = def->imported; 373 s->dso_input_id = def->dso_input_id; 374 if (!s->defined && !s->imported) { 375 s->kind = SK_ABS; 376 s->vaddr = 0; 377 s->defined = 1; 378 } 379 return 1; 380 } 381 } 382 LinkInputId dso = find_dso_export(l, alias); 383 if (dso != LINK_INPUT_NONE) { 384 s->name = alias; 385 s->imported = 1; 386 s->dso_input_id = dso; 387 return 1; 388 } 389 return 0; 390 } 391 392 void link_resolve_undefs(Linker* l, LinkImage* img) { 393 u32 i; 394 395 /* Cross-input COFF WEAK_EXTERNAL alias map: alias-declarator name -> target 396 * name (SymHash's value slot holds an interned Sym, never a real LinkSymId 397 * here; 0 = absent). Populated from every input's recorded aliases so a 398 * reference to the aliased name resolves to the target regardless of which 399 * input the reference vs. the declarator came from. Empty for non-COFF. */ 400 SymHash alias_map; 401 symhash_init(&alias_map, l->heap); 402 for (u32 ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { 403 LinkInput* in = LinkInputs_at(&l->inputs, ii); 404 if (!in->obj || in->kind == LINK_INPUT_DSO_BYTES) continue; 405 u32 na = obj_weak_alias_count(in->obj); 406 for (u32 ai = 0; ai < na; ++ai) { 407 ObjSymId asym = OBJ_SYM_NONE; 408 Sym target = 0; 409 if (!obj_weak_alias_at(in->obj, ai, &asym, &target) || target == 0) 410 continue; 411 const ObjSym* os = obj_symbol_get(in->obj, asym); 412 if (os && os->name != 0) symhash_set(&alias_map, os->name, target); 413 } 414 } 415 416 for (i = 0; i < LinkSyms_count(&img->syms); ++i) { 417 LinkSymbol* s = LinkSyms_at(&img->syms, i); 418 if (s->defined) continue; 419 if (s->name != 0) { 420 LinkSymId hit = symhash_get(&img->globals, s->name); 421 if (hit != LINK_SYM_NONE && hit != s->id) { 422 LinkSymbol* def = LinkSyms_at(&img->syms, hit - 1); 423 if (def->defined) { 424 s->section_id = def->section_id; 425 s->value = def->value; 426 s->vaddr = def->vaddr; 427 s->kind = def->kind; 428 s->bind = def->bind; 429 s->defined = 1; 430 continue; 431 } 432 } 433 } 434 if (s->name != 0) { 435 LinkInputId dso = find_dso_export(l, s->name); 436 if (dso != LINK_INPUT_NONE) { 437 s->imported = 1; 438 s->dso_input_id = dso; 439 continue; 440 } 441 } 442 if (l->resolver && s->name != 0) { 443 Slice nm_s = pool_slice(l->c->global, s->name); 444 void* p = l->resolver(l->resolver_user, nm_s); 445 if (p) { 446 s->kind = SK_ABS; 447 s->vaddr = (u64)(uintptr_t)p; 448 s->defined = 1; 449 continue; 450 } 451 } 452 /* COFF WEAK_EXTERNAL alias: resolve to the recorded fall-back symbol (the 453 * aux TagIndex target captured by read_coff, collected into `alias_map` 454 * keyed by the alias-declarator's name). This is the precise relationship — 455 * e.g. mingw x86_64's `_setjmp` aliasing `__intrinsic_setjmp`, which the 456 * single-underscore heuristic below cannot derive. It applies to ANY undef 457 * of the aliased name, not just the declarator symbol itself: the strong 458 * reference (sj.o's `_setjmp`) and the weak declarator are distinct undefs, 459 * and the reference is what needs redirecting. The declarator member is 460 * pulled by member_satisfies (weak undef under PE/COMDAT semantics), which 461 * brings in the target's own undef and pulls its short-import DSO, so the 462 * target is resolvable by now. Follow a short chain in case the target is 463 * itself an alias. */ 464 if (s->name != 0) { 465 int resolved = 0; 466 Sym cur = s->name; 467 for (u32 hop = 0; hop < 8u; ++hop) { 468 Sym target = symhash_get(&alias_map, cur); 469 if (target == 0) break; 470 if (resolve_to_alias(l, img, s, target)) { 471 resolved = 1; 472 break; 473 } 474 cur = target; 475 } 476 if (resolved) continue; 477 } 478 /* COFF WEAK_EXTERNAL alias fallback for references that carry no recorded 479 * aux TagIndex (GLOBAL undefs like crt2.o's call to `__set_app_type`, or 480 * inputs read before alias capture): recover the relationship via the mingw 481 * single-underscore naming convention. e.g. `__set_app_type` aliases to 482 * `_set_app_type`; `__imp___set_app_type` aliases to `__imp__set_app_type`. 483 * Try the de-underscored variant first, then the re-underscored one. */ 484 if (obj_format_weak_extern_underscore_alias(l->c) && s->name != 0) { 485 Slice nm_s = pool_slice(l->c->global, s->name); 486 const char* nm = nm_s.s; 487 size_t nlen = nm_s.len; 488 Sym candidates[2] = {0, 0}; 489 u32 ncand = 0; 490 if (nm && nlen >= 2 && nm[0] == '_') { 491 candidates[ncand++] = pool_intern_slice( 492 l->c->global, (Slice){.s = nm + 1, .len = (u32)(nlen - 1u)}); 493 } 494 if (nm && nlen > 0) { 495 char* buf = (char*)arena_array(l->c->scratch, char, nlen + 1u); 496 buf[0] = '_'; 497 memcpy(buf + 1, nm, nlen); 498 candidates[ncand++] = pool_intern_slice( 499 l->c->global, (Slice){.s = buf, .len = (u32)(nlen + 1u)}); 500 } 501 int resolved = 0; 502 for (u32 ci = 0; !resolved && ci < ncand; ++ci) { 503 if (resolve_to_alias(l, img, s, candidates[ci])) resolved = 1; 504 } 505 if (resolved) continue; 506 } 507 if (s->bind == SB_WEAK) { 508 s->kind = SK_ABS; 509 s->vaddr = 0; 510 s->defined = 1; 511 continue; 512 } 513 /* JIT lane: Mach-O inputs (including clang-produced .o files) 514 * carry a non-weak undef `__tlv_bootstrap` on every TLV var. 515 * kit_jit_from_image rewrites every descriptor's slot[0] to our 516 * thunk, so the resolved value never gets read — but we still need 517 * resolve_undefs to not panic. Treat the symbol as weak-undef 518 * (vaddr = 0, SK_ABS) in JIT mode only; AOT lanes keep the strict 519 * "undefined external" semantics. */ 520 if (l->jit_mode && s->name != 0) { 521 Slice nm_s = pool_slice(l->c->global, s->name); 522 const char* nm = nm_s.s; 523 size_t nlen = nm_s.len; 524 if (nm && nlen == 15u && memcmp(nm, "__tlv_bootstrap", 15u) == 0) { 525 s->kind = SK_ABS; 526 s->vaddr = 0; 527 s->defined = 1; 528 continue; 529 } 530 /* Windows COFF Local-Exec TLS: the in-process JIT relaxes every TLS 531 * access to in-image addressing (see kit_jit_from_image), so the PE 532 * module-index symbol `_tls_index` — normally supplied by the OS loader 533 * via the TLS directory — is never read. Define it as 0 so resolve does 534 * not reject it; its idiom relocs are dropped in the JIT reloc pass. */ 535 if (nm && nlen == 10u && memcmp(nm, "_tls_index", 10u) == 0) { 536 s->kind = SK_ABS; 537 s->vaddr = 0; 538 s->defined = 1; 539 continue; 540 } 541 } 542 { 543 Slice nm_s = s->name ? pool_slice(l->c->global, s->name) : SLICE_NULL; 544 const char* nm = nm_s.s ? nm_s.s : ""; 545 size_t namelen = nm_s.len; 546 obj_format_demangle_c(l->c, &nm, &namelen); 547 compiler_panic(l->c, SRCLOC_NONE, "link: undefined reference to '%.*s'", 548 (int)namelen, nm); 549 } 550 } 551 symhash_fini(&alias_map); 552 } 553 554 /* ---- pass 1b: --gc-sections liveness ---- */ 555 556 #define GC_ATOM_BIT 0x80000000u 557 #define GC_PACK(ii, j, is_atom) \ 558 (((u64)(u32)(ii) << 32) | ((is_atom) ? GC_ATOM_BIT : 0u) | (u32)(j)) 559 #define GC_II(p) ((u32)((p) >> 32)) 560 #define GC_IS_ATOM(p) (((u32)(p) & GC_ATOM_BIT) != 0) 561 #define GC_J(p) ((u32)((p) & ~GC_ATOM_BIT)) 562 563 static void gc_queue_push(GcQueue* q, Heap* h, u32 ii, u32 j, int is_atom) { 564 if (VEC_GROW(h, q->items, q->cap, q->n + 1u)) return; 565 q->items[q->n++] = GC_PACK(ii, j, is_atom); 566 } 567 568 void link_gc_live_alloc(GcLive* g, Linker* l, Heap* h) { 569 u32 ii; 570 g->ninputs = LinkInputs_count(&l->inputs); 571 g->marks = 572 LinkInputs_count(&l->inputs) 573 ? (u8**)h->alloc(h, sizeof(*g->marks) * LinkInputs_count(&l->inputs), 574 _Alignof(u8*)) 575 : NULL; 576 g->atom_marks = 577 LinkInputs_count(&l->inputs) 578 ? (u8**)h->alloc( 579 h, sizeof(*g->atom_marks) * LinkInputs_count(&l->inputs), 580 _Alignof(u8*)) 581 : NULL; 582 g->nsec = 583 LinkInputs_count(&l->inputs) 584 ? (u32*)h->alloc(h, sizeof(*g->nsec) * LinkInputs_count(&l->inputs), 585 _Alignof(u32)) 586 : NULL; 587 g->natom = 588 LinkInputs_count(&l->inputs) 589 ? (u32*)h->alloc(h, sizeof(*g->natom) * LinkInputs_count(&l->inputs), 590 _Alignof(u32)) 591 : NULL; 592 if (LinkInputs_count(&l->inputs) && 593 (!g->marks || !g->atom_marks || !g->nsec || !g->natom)) 594 compiler_panic(l->c, SRCLOC_NONE, "link: oom on gc live map"); 595 for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { 596 u32 nsec = obj_section_count(LinkInputs_at(&l->inputs, ii)->obj); 597 u32 natom = obj_atom_count(LinkInputs_at(&l->inputs, ii)->obj); 598 g->nsec[ii] = nsec; 599 g->natom[ii] = natom; 600 g->marks[ii] = (u8*)h->alloc(h, nsec ? nsec : 1u, 1); 601 if (!g->marks[ii]) 602 compiler_panic(l->c, SRCLOC_NONE, "link: oom on gc marks"); 603 memset(g->marks[ii], 0, nsec); 604 g->atom_marks[ii] = (u8*)h->alloc(h, natom ? natom : 1u, 1); 605 if (!g->atom_marks[ii]) 606 compiler_panic(l->c, SRCLOC_NONE, "link: oom on gc atom marks"); 607 memset(g->atom_marks[ii], 0, natom); 608 } 609 } 610 611 void link_gc_live_free(GcLive* g, Heap* h) { 612 u32 ii; 613 if (g->marks) { 614 for (ii = 0; ii < g->ninputs; ++ii) 615 if (g->marks[ii]) 616 h->free(h, g->marks[ii], g->nsec[ii] ? g->nsec[ii] : 1u); 617 h->free(h, g->marks, sizeof(*g->marks) * g->ninputs); 618 } 619 if (g->atom_marks) { 620 for (ii = 0; ii < g->ninputs; ++ii) 621 if (g->atom_marks[ii]) 622 h->free(h, g->atom_marks[ii], g->natom[ii] ? g->natom[ii] : 1u); 623 h->free(h, g->atom_marks, sizeof(*g->atom_marks) * g->ninputs); 624 } 625 if (g->nsec) h->free(h, g->nsec, sizeof(*g->nsec) * g->ninputs); 626 if (g->natom) h->free(h, g->natom, sizeof(*g->natom) * g->ninputs); 627 } 628 629 int link_gc_live_get(const GcLive* g, u32 ii, ObjSecId j) { 630 if (ii >= g->ninputs || j == OBJ_SEC_NONE || j >= g->nsec[ii]) return 0; 631 return g->marks[ii][j]; 632 } 633 634 int link_gc_atom_live_get(const GcLive* g, u32 ii, ObjAtomId j) { 635 if (ii >= g->ninputs || j == OBJ_ATOM_NONE || j >= g->natom[ii]) return 0; 636 return g->atom_marks[ii][j]; 637 } 638 639 static void gc_mark(GcLive* g, GcQueue* q, Heap* h, u32 ii, ObjSecId j) { 640 if (ii >= g->ninputs || j == OBJ_SEC_NONE || j >= g->nsec[ii]) return; 641 if (g->marks[ii][j]) return; 642 g->marks[ii][j] = 1; 643 if (q) gc_queue_push(q, h, ii, j, 0); 644 } 645 646 static void gc_mark_atom(GcLive* g, GcQueue* q, Heap* h, u32 ii, ObjAtomId j) { 647 if (ii >= g->ninputs || j == OBJ_ATOM_NONE || j >= g->natom[ii]) return; 648 if (g->atom_marks[ii][j]) return; 649 g->atom_marks[ii][j] = 1; 650 if (q) gc_queue_push(q, h, ii, j, 1); 651 } 652 653 static void gc_mark_section_or_atoms(GcLive* g, GcQueue* q, Heap* h, 654 ObjBuilder* ob, const InputMap* m, u32 ii, 655 ObjSecId sid) { 656 u32 first, count, i; 657 int marked = 0; 658 if (!link_input_section_has_atoms(m, sid)) { 659 gc_mark(g, q, h, ii, sid); 660 return; 661 } 662 link_input_section_atoms(m, sid, &first, &count); 663 for (i = 0; i < count; ++i) { 664 ObjAtomId aid = m->section_atom_ids[first + i]; 665 const ObjAtom* a = obj_atom_get(ob, aid); 666 if (!a || a->removed) continue; 667 gc_mark_atom(g, q, h, ii, aid); 668 marked = 1; 669 } 670 if (!marked) gc_mark(g, q, h, ii, sid); 671 } 672 673 /* From a LinkSymId, find the (input_idx, obj_sec_id) of its defining section. 674 * Returns 1 on hit. */ 675 static int gc_def_site(LinkImage* img, Linker* l, LinkSymId id, u32* out_ii, 676 ObjSecId* out_sid, ObjAtomId* out_aid) { 677 const LinkSymbol* s; 678 ObjBuilder* ob; 679 const ObjSym* osym; 680 if (id == LINK_SYM_NONE || id > LinkSyms_count(&img->syms)) return 0; 681 s = LinkSyms_at(&img->syms, id - 1); 682 if (!s->defined) { 683 LinkSymId hit; 684 if (s->name == 0) return 0; 685 hit = symhash_get(&img->globals, s->name); 686 if (hit == LINK_SYM_NONE || hit == s->id) return 0; 687 return gc_def_site(img, l, hit, out_ii, out_sid, out_aid); 688 } 689 if (s->kind == SK_ABS || s->kind == SK_COMMON) return 0; 690 if (s->input_id == LINK_INPUT_NONE) return 0; 691 ob = LinkInputs_at(&l->inputs, s->input_id - 1)->obj; 692 osym = obj_symbol_get(ob, s->obj_sym); 693 if (!osym || osym->section_id == OBJ_SEC_NONE) return 0; 694 *out_ii = (u32)(s->input_id - 1u); 695 *out_sid = osym->section_id; 696 *out_aid = link_input_sym_atom(&img->input_maps[*out_ii], s->obj_sym); 697 return 1; 698 } 699 700 /* Detect __start_<X> / __stop_<X> with <X> a valid C identifier. */ 701 int link_gc_split_start_stop(const char* s, size_t n, size_t* out_off, 702 size_t* out_len, int* out_is_start) { 703 static const char START[] = "__start_"; 704 static const char STOP[] = "__stop_"; 705 size_t off, len, i; 706 int is_start; 707 if (n > sizeof(START) - 1u && memcmp(s, START, sizeof(START) - 1u) == 0) { 708 off = sizeof(START) - 1u; 709 is_start = 1; 710 } else if (n > sizeof(STOP) - 1u && memcmp(s, STOP, sizeof(STOP) - 1u) == 0) { 711 off = sizeof(STOP) - 1u; 712 is_start = 0; 713 } else { 714 return 0; 715 } 716 len = n - off; 717 if (len == 0) return 0; 718 { 719 char c = s[off]; 720 if (!(c == '_' || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) 721 return 0; 722 } 723 for (i = 1; i < len; ++i) { 724 char c = s[off + i]; 725 if (!(c == '_' || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || 726 (c >= '0' && c <= '9'))) 727 return 0; 728 } 729 *out_off = off; 730 *out_len = len; 731 if (out_is_start) *out_is_start = is_start; 732 return 1; 733 } 734 735 static void gc_promote_by_section_name(Linker* l, LinkImage* img, GcLive* g, 736 GcQueue* q, Heap* h, Sym section_name) { 737 u32 ii, j; 738 for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { 739 ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; 740 u32 nsec = obj_section_count(ob); 741 for (j = 1; j < nsec; ++j) { 742 const Section* s = obj_section_get(ob, j); 743 if (!s || !link_section_kept(s)) continue; 744 if (s->name != section_name) continue; 745 gc_mark_section_or_atoms(g, q, h, ob, &img->input_maps[ii], ii, j); 746 } 747 } 748 } 749 750 void link_gc_compute(Linker* l, LinkImage* img, GcLive* g) { 751 u32 ii, j, k; 752 GcQueue q; 753 Heap* h = img->heap; 754 755 if (!l->gc_sections) { 756 for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { 757 ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; 758 InputMap* m = &img->input_maps[ii]; 759 u32 nsec = obj_section_count(ob); 760 for (j = 1; j < nsec; ++j) { 761 const Section* s = obj_section_get(ob, j); 762 if (s && link_section_kept(s) && !m->comdat_discarded[j]) 763 gc_mark_section_or_atoms(g, NULL, h, ob, m, ii, j); 764 } 765 } 766 return; 767 } 768 769 memset(&q, 0, sizeof(q)); 770 771 for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { 772 ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; 773 InputMap* m = &img->input_maps[ii]; 774 u32 nsec = obj_section_count(ob); 775 for (j = 1; j < nsec; ++j) { 776 const Section* s = obj_section_get(ob, j); 777 int root; 778 if (!s || !link_section_kept(s)) continue; 779 if (m->comdat_discarded[j]) continue; 780 root = (s->flags & SF_RETAIN) || s->sem == SSEM_INIT_ARRAY || 781 s->sem == SSEM_FINI_ARRAY || s->sem == SSEM_PREINIT_ARRAY; 782 if (root) gc_mark_section_or_atoms(g, &q, h, ob, m, ii, j); 783 if (link_input_section_has_atoms(m, j)) { 784 u32 first, count, ai; 785 link_input_section_atoms(m, j, &first, &count); 786 for (ai = 0; ai < count; ++ai) { 787 ObjAtomId aid = m->section_atom_ids[first + ai]; 788 const ObjAtom* a = obj_atom_get(ob, aid); 789 if (!a || a->removed) continue; 790 if (a->flags & OBJ_ATOM_RETAIN) gc_mark_atom(g, &q, h, ii, aid); 791 } 792 } 793 } 794 } 795 796 if (l->entry_name != 0) { 797 LinkSymId id = symhash_get(&img->globals, l->entry_name); 798 u32 tii; 799 ObjSecId tsid; 800 ObjAtomId taid; 801 if (gc_def_site(img, l, id, &tii, &tsid, &taid)) { 802 if (taid != OBJ_ATOM_NONE) 803 gc_mark_atom(g, &q, h, tii, taid); 804 else 805 gc_mark(g, &q, h, tii, tsid); 806 } 807 } 808 809 /* Keep executable definitions that a linked shared library references but 810 * nothing in the executable does (e.g. FreeBSD libc.so.7's back-references 811 * to crt-defined `environ` / `__progname`). Without rooting these, GC drops 812 * the defining section and the resulting dynamic exe fails to load 813 * ("Undefined symbol"). read_elf_dso records each DSO's undef names. */ 814 for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { 815 LinkInput* in = LinkInputs_at(&l->inputs, ii); 816 const ObjImage* dim; 817 u32 u, nu; 818 if (in->kind != LINK_INPUT_DSO_BYTES || !in->obj) continue; 819 dim = obj_image(in->obj); 820 nu = obj_image_nundefs(dim); 821 for (u = 0; u < nu; ++u) { 822 LinkSymId id = symhash_get(&img->globals, obj_image_undef(dim, u)); 823 u32 tii; 824 ObjSecId tsid; 825 ObjAtomId taid; 826 if (id == LINK_SYM_NONE) continue; 827 if (gc_def_site(img, l, id, &tii, &tsid, &taid)) { 828 if (taid != OBJ_ATOM_NONE) 829 gc_mark_atom(g, &q, h, tii, taid); 830 else 831 gc_mark(g, &q, h, tii, tsid); 832 } 833 } 834 } 835 836 while (q.n > 0) { 837 u64 v = q.items[--q.n]; 838 u32 cii = GC_II(v); 839 int c_is_atom = GC_IS_ATOM(v); 840 ObjSecId cj = (ObjSecId)GC_J(v); 841 ObjBuilder* ob = LinkInputs_at(&l->inputs, cii)->obj; 842 InputMap* m = &img->input_maps[cii]; 843 const ObjAtom* src_atom = 844 c_is_atom ? obj_atom_get(ob, (ObjAtomId)cj) : NULL; 845 ObjSecId src_sec = src_atom ? src_atom->section_id : cj; 846 u32 total = obj_reloc_total(ob); 847 (void)obj_section_count; 848 if (!total) continue; 849 for (k = 0; k < total; ++k) { 850 const Reloc* r = obj_reloc_at(ob, k); 851 LinkSymId target; 852 const LinkSymbol* tsym; 853 u32 tii; 854 ObjSecId tsid; 855 ObjAtomId taid; 856 if (r->section_id != src_sec) continue; 857 if (src_atom) { 858 u64 begin = src_atom->offset; 859 u64 end = begin + src_atom->size; 860 if ((u64)r->offset < begin || (u64)r->offset >= end) continue; 861 } 862 if (r->sym == OBJ_SYM_NONE || r->sym >= m->nsym) continue; 863 target = m->sym[r->sym]; 864 if (target == LINK_SYM_NONE) continue; 865 tsym = LinkSyms_at(&img->syms, target - 1); 866 867 if (tsym->name != 0) { 868 size_t off, ilen; 869 Slice nm_s = pool_slice(l->c->global, tsym->name); 870 const char* nm = nm_s.s; 871 size_t namelen = nm_s.len; 872 if (link_gc_split_start_stop(nm, namelen, &off, &ilen, NULL)) { 873 Sym secname = pool_intern_slice(l->c->global, 874 (Slice){.s = nm + off, .len = ilen}); 875 gc_promote_by_section_name(l, img, g, &q, h, secname); 876 } 877 } 878 879 if (gc_def_site(img, l, target, &tii, &tsid, &taid)) { 880 if (taid != OBJ_ATOM_NONE) 881 gc_mark_atom(g, &q, h, tii, taid); 882 else 883 gc_mark(g, &q, h, tii, tsid); 884 } 885 } 886 } 887 888 if (q.items) h->free(h, q.items, sizeof(*q.items) * q.cap); 889 } 890 891 void link_gc_drop_dead_globals(Linker* l, LinkImage* img, const GcLive* g) { 892 u32 i; 893 if (!l->gc_sections) return; 894 for (i = 0; i < LinkSyms_count(&img->syms); ++i) { 895 LinkSymbol* s = LinkSyms_at(&img->syms, i); 896 ObjBuilder* ob; 897 const ObjSym* osym; 898 ObjSecId osid; 899 ObjAtomId aid; 900 if (!s->defined) continue; 901 if (s->kind == SK_ABS || s->kind == SK_COMMON) continue; 902 if (s->input_id == LINK_INPUT_NONE) continue; 903 ob = LinkInputs_at(&l->inputs, s->input_id - 1)->obj; 904 osym = obj_symbol_get(ob, s->obj_sym); 905 if (!osym) continue; 906 osid = osym->section_id; 907 if (osid == OBJ_SEC_NONE) continue; 908 aid = link_input_sym_atom(&img->input_maps[s->input_id - 1u], s->obj_sym); 909 if (aid != OBJ_ATOM_NONE) { 910 if (link_gc_atom_live_get(g, (u32)(s->input_id - 1u), aid)) continue; 911 s->defined = 0; 912 s->vaddr = 0; 913 s->section_id = LINK_SEC_NONE; 914 continue; 915 } 916 if (link_gc_live_get(g, (u32)(s->input_id - 1u), osid)) continue; 917 s->defined = 0; 918 s->vaddr = 0; 919 s->section_id = LINK_SEC_NONE; 920 } 921 } 922 923 /* ---- archive ingestion ---- */ 924 925 static void include_archive_member(Linker* l, const LinkArchive* ar, 926 LinkArchiveMember* mem) { 927 LinkInput* in; 928 LinkInputId id; 929 u32 idx; 930 Sym coff_dll = 0; 931 if (mem->included) return; 932 in = LinkInputs_push(&l->inputs, &idx); 933 if (!in) 934 compiler_panic(l->c, SRCLOC_NONE, 935 "link: oom growing inputs (archive member)"); 936 id = (LinkInputId)(idx + 1u); 937 in->id = id; 938 /* PE/COFF short-import shim: read_coff_short_import stashes the 939 * providing DLL name on the ObjBuilder. Such members behave like 940 * DSO inputs — symbols are exports, not local definitions — so route 941 * through LINK_INPUT_DSO_BYTES with the DLL name as the soname. */ 942 if (mem->obj && obj_get_coff_import_dll(mem->obj, &coff_dll) && coff_dll) { 943 in->kind = LINK_INPUT_DSO_BYTES; 944 in->soname = coff_dll; 945 /* Short-import NameType may make the DLL export name differ from the 946 * local symbol name (EXPORTAS etc.); carry it for import-table synthesis. */ 947 { 948 Sym coff_imp_name = 0; 949 if (obj_get_coff_import_name(mem->obj, &coff_imp_name)) 950 in->coff_import_name = coff_imp_name; 951 } 952 } else { 953 in->kind = LINK_INPUT_OBJ_BYTES; 954 } 955 in->order = ar->order; 956 in->obj = mem->obj; 957 in->name = mem->name; 958 mem->included = 1; 959 mem->obj = NULL; 960 } 961 962 static void scan_presence_before(Linker* l, u32 max_order, SymHash* defined, 963 SymHash* undefs) { 964 u32 ii; 965 ObjSymIter* it; 966 ObjSymEntry e; 967 for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { 968 LinkInput* in = LinkInputs_at(&l->inputs, ii); 969 ObjBuilder* ob = in->obj; 970 int is_dso = (in->kind == LINK_INPUT_DSO_BYTES); 971 if (!ob || in->order > max_order) continue; 972 it = obj_symiter_new(ob); 973 while (obj_symiter_next(it, &e)) { 974 const ObjSym* s = e.sym; 975 if (s->name == 0) continue; 976 if (s->bind == SB_LOCAL) continue; 977 if (is_dso) { 978 /* A DSO's exported symbols satisfy undefined references, so a later 979 * static-archive member must NOT be pulled to redefine them. kit 980 * records DSO exports as OBJ_SEC_NONE globals (the importer's view), 981 * which obj_sym_is_logical_undef would otherwise misclassify as 982 * undefined — leaving e.g. a real libc's atoi looking unsatisfied and 983 * letting the freestanding rt's atoi shadow it. The DSO's own undefs 984 * (SK_UNDEF) are not exports and stay out of `defined`. */ 985 if (s->kind != SK_UNDEF) symhash_set(defined, s->name, 1u); 986 continue; 987 } 988 /* An unreferenced global/weak extern declaration is a header 989 * artifact, not a real demand to pull from an archive. Without 990 * this prune the C frontend's per-extern undef synthesis (e.g. 991 * every prototype in <math.h>) drags in matching archive members 992 * even when the user's source never references them. Matches the 993 * spurious-UNDEF prune in link_resolve_symbols and obj_sweep_dead 994 * at .o emit (obj.c). */ 995 if (link_sym_is_spurious_undef(s)) continue; 996 if (obj_sym_is_logical_undef(s)) 997 symhash_set(undefs, s->name, 1u); 998 else 999 symhash_set(defined, s->name, 1u); 1000 } 1001 obj_symiter_free(it); 1002 } 1003 } 1004 1005 static int inputs_have_defined_ifunc_before(Linker* l, u32 max_order) { 1006 u32 ii; 1007 ObjSymIter* it; 1008 ObjSymEntry e; 1009 for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { 1010 LinkInput* in = LinkInputs_at(&l->inputs, ii); 1011 ObjBuilder* ob = in->obj; 1012 if (!ob || in->order > max_order) continue; 1013 it = obj_symiter_new(ob); 1014 while (obj_symiter_next(it, &e)) { 1015 const ObjSym* s = e.sym; 1016 if (s->kind == SK_IFUNC) { 1017 obj_symiter_free(it); 1018 return 1; 1019 } 1020 } 1021 obj_symiter_free(it); 1022 } 1023 return 0; 1024 } 1025 1026 static int member_satisfies(LinkArchiveMember* mem, const SymHash* defined, 1027 const SymHash* wanted, int weak_undef_pulls) { 1028 ObjSymIter* it; 1029 ObjSymEntry e; 1030 int hit = 0; 1031 it = obj_symiter_new(mem->obj); 1032 while (obj_symiter_next(it, &e)) { 1033 const ObjSym* s = e.sym; 1034 if (s->name == 0) continue; 1035 /* In COFF archives, WEAK_EXTERNAL alias declarations are read as 1036 * SB_WEAK + SK_UNDEF (kit has no native alias model — see 1037 * coff_read.c step "WEAK_EXTERNAL primary"). The archive's symbol 1038 * map still lists the member as the canonical provider of that 1039 * name, so treat such weak undefs as defining for the archive-pull 1040 * decision (formats whose COMDAT semantics pull on a weak undef — 1041 * obj_format_weak_undef_pulls_archive_member). The actual 1042 * alias-to-target resolution happens later in link_resolve_undefs. */ 1043 if (s->kind == SK_UNDEF) { 1044 if (!(weak_undef_pulls && s->bind == SB_WEAK)) continue; 1045 } 1046 if (s->bind != SB_GLOBAL && s->bind != SB_WEAK) continue; 1047 if (symhash_get(wanted, s->name) == LINK_SYM_NONE) continue; 1048 if (symhash_get(defined, s->name) != LINK_SYM_NONE) continue; 1049 hit = 1; 1050 break; 1051 } 1052 obj_symiter_free(it); 1053 return hit; 1054 } 1055 1056 /* Synthesize an ObjBuilder providing the mingw CRT ctor/dtor list 1057 * boundary symbols (`__CTOR_LIST__`, `__CTOR_END__`, `__DTOR_LIST__`, 1058 * `__DTOR_END__`) backed by a 16-byte zero blob. mingw's gccmain.o 1059 * references these and walks them at program startup; lld/binutils 1060 * generate them via the linker script's `.ctors` / `.dtors` rules. 1061 * kit has no script for PE, so we inject an equivalent here. 1062 * 1063 * Zero contents are intentional for the empty-list case: 1064 * - __do_global_ctors loads `*(u32*)__CTOR_LIST__`; sees 0; cbz 1065 * short-circuit returns without iterating. 1066 * - __do_global_dtors loads `*(u64*)__DTOR_LIST__`; sees 0; cbz 1067 * short-circuit returns. 1068 * 1069 * For programs that emit real ctor/dtor sections this synth would 1070 * need to coordinate with .ctors/.dtors layout; v1 covers the empty 1071 * case (hello-world through mingw CRT). */ 1072 /* Registered as the COFF format's synth_inputs hook (src/obj/registry.c), so 1073 * it is only ever invoked for COFF targets — no obj==COFF guard needed. */ 1074 void link_synth_coff_ctor_dtor_list(Linker* l) { 1075 ObjBuilder* ob; 1076 ObjSecId sid; 1077 static const u8 kZeros[16] = {0}; 1078 LinkInput* in; 1079 u32 idx; 1080 if (!l) return; 1081 ob = obj_new(l->c); 1082 if (!ob) return; 1083 sid = obj_section_ex( 1084 ob, pool_intern_slice(l->c->global, SLICE_LIT(".rdata$ctors")), 1085 SEC_RODATA, SSEM_PROGBITS, SF_ALLOC | SF_RETAIN, 16, 0u, 0u, 0u); 1086 obj_section_replace_bytes(ob, sid, kZeros, sizeof(kZeros)); 1087 obj_symbol_ex(ob, pool_intern_slice(l->c->global, SLICE_LIT("__CTOR_LIST__")), 1088 SB_GLOBAL, SV_DEFAULT, SK_OBJ, sid, 0, 0, 0); 1089 obj_symbol_ex(ob, pool_intern_slice(l->c->global, SLICE_LIT("__CTOR_END__")), 1090 SB_GLOBAL, SV_DEFAULT, SK_OBJ, sid, 0, 0, 0); 1091 obj_symbol_ex(ob, pool_intern_slice(l->c->global, SLICE_LIT("__DTOR_LIST__")), 1092 SB_GLOBAL, SV_DEFAULT, SK_OBJ, sid, 0, 0, 0); 1093 obj_symbol_ex(ob, pool_intern_slice(l->c->global, SLICE_LIT("__DTOR_END__")), 1094 SB_GLOBAL, SV_DEFAULT, SK_OBJ, sid, 0, 0, 0); 1095 /* __chkstk: synthesized only for arches whose link descriptor carries the 1096 * stub bytes (aarch64). x64 needs none — its codegen emits inline probes (or 1097 * links libmingwex's plain-object __chkstk). Driven by the descriptor, so no 1098 * arch identity is consulted here. */ 1099 { 1100 const LinkArchDesc* la = link_arch_desc_for(l->c); 1101 if (la && la->coff_chkstk_bytes && la->coff_chkstk_len) { 1102 ObjSecId tsid = obj_section_ex( 1103 ob, pool_intern_slice(l->c->global, SLICE_LIT(".text$chkstk")), 1104 SEC_TEXT, SSEM_PROGBITS, SF_ALLOC | SF_EXEC | SF_RETAIN, 4, 0u, 0u, 1105 0u); 1106 obj_section_replace_bytes(ob, tsid, la->coff_chkstk_bytes, 1107 la->coff_chkstk_len); 1108 obj_symbol_ex(ob, pool_intern_slice(l->c->global, SLICE_LIT("__chkstk")), 1109 SB_GLOBAL, SV_DEFAULT, SK_FUNC, tsid, 0, 1110 la->coff_chkstk_len, 0); 1111 } 1112 } 1113 obj_finalize(ob); 1114 in = LinkInputs_push(&l->inputs, &idx); 1115 if (!in) 1116 compiler_panic(l->c, SRCLOC_NONE, "link: oom growing inputs (synth)"); 1117 in->id = (LinkInputId)(idx + 1u); 1118 in->kind = LINK_INPUT_OBJ_BYTES; 1119 in->order = l->next_input_order++; 1120 in->obj = ob; 1121 in->name = 1122 pool_intern_slice(l->c->global, SLICE_LIT("<kit-synth-coff-runtime>")); 1123 in->soname = 0; 1124 } 1125 1126 void link_ingest_archives(Linker* l) { 1127 u32 a, m; 1128 if (LinkArchives_count(&l->archives) == 0) return; 1129 1130 for (a = 0; a < LinkArchives_count(&l->archives); ++a) { 1131 LinkArchive* ar = LinkArchives_at(&l->archives, a); 1132 if (!ar->whole_archive) continue; 1133 for (m = 0; m < ar->nmembers; ++m) { 1134 /* obj==NULL is the long-form COFF head/trailer skip path 1135 * (set by link_add_archive_bytes). Drop them silently. */ 1136 if (!ar->members[m].obj) continue; 1137 include_archive_member(l, ar, &ar->members[m]); 1138 } 1139 } 1140 1141 for (a = 0; a < LinkArchives_count(&l->archives); ++a) { 1142 LinkArchive* ar = LinkArchives_at(&l->archives, a); 1143 Sym want_ifunc_init = 0; 1144 if (ar->whole_archive) continue; 1145 if (l->emit_static_exe && inputs_have_defined_ifunc_before(l, ar->order)) 1146 want_ifunc_init = 1147 pool_intern_slice(l->c->global, SLICE_LIT("__kit_ifunc_init")); 1148 for (;;) { 1149 SymHash defined, undefs; 1150 int changed = 0; 1151 symhash_init(&defined, l->heap); 1152 symhash_init(&undefs, l->heap); 1153 scan_presence_before(l, ar->order, &defined, &undefs); 1154 if (want_ifunc_init != 0 && 1155 symhash_get(&defined, want_ifunc_init) == LINK_SYM_NONE) 1156 symhash_set(&undefs, want_ifunc_init, 1u); 1157 1158 int weak_undef_pulls = obj_format_weak_undef_pulls_archive_member(l->c); 1159 for (m = 0; m < ar->nmembers; ++m) { 1160 LinkArchiveMember* mem = &ar->members[m]; 1161 if (mem->included) continue; 1162 if (!mem->obj) continue; /* long-form skip (head/trailer) */ 1163 if (!member_satisfies(mem, &defined, &undefs, weak_undef_pulls)) 1164 continue; 1165 include_archive_member(l, ar, mem); 1166 changed = 1; 1167 } 1168 symhash_fini(&defined); 1169 symhash_fini(&undefs); 1170 if (!changed) break; 1171 } 1172 } 1173 }