link_layout.c (52399B)
1 /* link_layout.c — section bucketing, vaddr assignment, scripted layout, 2 * COMMON BSS allocation, segment-byte copying, and the top-level 3 * link_resolve orchestration function. 4 * 5 * Image-relative discipline: every vaddr / file_offset on the produced 6 * image treats the image as based at 0. Consumers (link_emit_elf, 7 * kit_jit_from_image) add their own runtime base before patching 8 * relocations or writing PT_LOAD headers. Segment byte buffers hold raw 9 * input section bytes — no relocations are applied here, in line with 10 * the incremental-link discipline (link.h:136). */ 11 12 #include <kit/core.h> 13 #include <kit/jit.h> 14 #include <string.h> 15 16 #include "core/buf.h" 17 #include "core/bytes.h" 18 #include "core/heap.h" 19 #include "core/metrics.h" 20 #include "core/pool.h" 21 #include "core/slice.h" 22 #include "core/util.h" 23 #include "core/vec.h" 24 #include "link/link.h" 25 #include "link/link_arch.h" 26 #include "link/link_internal.h" 27 #include "obj/format.h" 28 29 LinkImage* link_image_alloc(Compiler*); /* defined in link.c */ 30 31 #define LINK_ELF_SHF_COMPRESSED 0x800u 32 33 /* Page size used for ELF segment alignment. We pull from env->execmem 34 * when present (matches the eventual JIT mapping granularity) and fall 35 * back to 16 KiB otherwise — large enough for any current Linux/aarch64 36 * loader. A future cross-link with mismatched host/target page sizes 37 * will need a target-derived value here instead. */ 38 u64 link_layout_page_size(Linker* l) { 39 const KitExecMem* m = (l && l->jit_host) ? l->jit_host->execmem : NULL; 40 if (m && m->page_size) return (u64)m->page_size; 41 return 0x4000u; 42 } 43 44 /* Four-bucket segment partitioning: see SegBucket in link_internal.h. */ 45 46 int link_section_kept(const Section* s) { 47 /* This cut keeps allocatable progbits/nobits sections only. Debug, 48 * symtab/strtab, group, and note sections are dropped — none of 49 * them participate in a static ET_EXEC layout. */ 50 if (!(s->flags & SF_ALLOC)) return 0; 51 if (s->sem == SSEM_PROGBITS || s->sem == SSEM_NOBITS || s->sem == SSEM_NOTE) 52 return 1; 53 if (s->sem == SSEM_INIT_ARRAY || s->sem == SSEM_FINI_ARRAY) return 1; 54 return 0; 55 } 56 57 int link_section_kept_fileonly(const Section* s) { 58 /* Non-allocatable .debug_* sections. They get no PT_LOAD segment but 59 * are carried through to the file so addr2line / gdb resolve 60 * file:line on the linked image. 61 * 62 * ELF SHF_COMPRESSED debug sections carry compressed bytes but relocation 63 * offsets refer to the uncompressed DWARF stream. Until the object model has 64 * a decompression/recompression path, dropping them is the only safe linked 65 * executable behavior. */ 66 return s && !s->removed && s->kind == SEC_DEBUG && 67 !(s->ext_kind == OBJ_EXT_ELF && 68 (s->ext_flags & LINK_ELF_SHF_COMPRESSED)); 69 } 70 71 SegBucket link_bucket_for(u16 flags) { 72 if (flags & SF_TLS) return SEG_TLS; 73 if (flags & SF_EXEC) return SEG_RX; 74 if (flags & SF_WRITE) return SEG_RW; 75 return SEG_R; 76 } 77 78 /* PIE `.data.rel.ro` placement: a read-only data section that carries an 79 * absolute (abs32/abs64) reloc cannot stay in a never-writable PT_LOAD. 80 * In PIE the linker rewrites those relocs into dynamic records — a 81 * RELATIVE for an internal target, a GLOB_DAT for an import — and the 82 * loader *writes* the resolved pointer into the slot at load time. A 83 * PF_R-only segment faults that store (manifesting as a SIGSEGV in the 84 * dynamic loader). Jump tables, @labeladdr arrays, and const pointer 85 * initializers all land here. Promote such sections to the writable 86 * segment; we forgo the post-relocation RELRO re-protection that a full 87 * toolchain would apply via PT_GNU_RELRO. */ 88 static int link_pie_ro_section_needs_write(const ObjBuilder* ob, ObjSecId sid) { 89 u32 i, total = obj_reloc_total(ob); 90 for (i = 0; i < total; ++i) { 91 const Reloc* r = obj_reloc_at(ob, i); 92 if (!r || r->removed || r->section_id != sid) continue; 93 if (r->kind == R_ABS64 || r->kind == R_ABS32) return 1; 94 } 95 return 0; 96 } 97 98 /* ---- LinkImage growth helpers ---- 99 * 100 * syms / relocs back onto SegVec — pointers stay stable across pushes, 101 * so callers may stash LinkSymbol/LinkRelocApply references and 102 * re-enter mutation without invalidation. */ 103 104 static LinkSymbol* append_symbol_slot(LinkImage* img) { 105 u32 idx; 106 LinkSymbol* s = LinkSyms_push(&img->syms, &idx); 107 if (!s) compiler_panic(img->c, SRCLOC_NONE, "link: oom growing symbols"); 108 s->id = (LinkSymId)(idx + 1u); 109 return s; 110 } 111 112 LinkSymId link_append_symbol(LinkImage* img, const LinkSymbol* tmpl) { 113 LinkSymbol* s = append_symbol_slot(img); 114 LinkSymId id = s->id; 115 *s = *tmpl; 116 s->id = id; 117 return id; 118 } 119 120 LinkRelocApply* link_append_reloc_slot(LinkImage* img) { 121 LinkRelocApply* r = LinkRelocs_push(&img->relocs, NULL); 122 if (!r) compiler_panic(img->c, SRCLOC_NONE, "link: oom growing relocs"); 123 return r; 124 } 125 126 /* ---- pass 2: section assignment + segment layout ---- */ 127 128 typedef struct SecRef { 129 u32 input_idx; 130 ObjSecId obj_sec_id; 131 LinkSectionId link_sec_id; 132 } SecRef; 133 134 #define PLACE_NONE ((u32)~0u) 135 136 /* Within a bucket, input sections sharing a name are placed contiguously 137 * — the standard "merge sections by name" rule. Without this the .init 138 * prologue from crti.o and the matching epilogue from crtn.o (both in 139 * a .init section) get separated by intervening .text, and `_init` is 140 * no longer a contiguous function. Placement walk: 141 * 142 * 1. Build a flat list of (input_idx, obj_sec_id) for kept+live 143 * sections. 144 * 2. While collecting, append each section to an O(1)-expected lookup 145 * keyed by (bucket, name). The group array is append-only, so group 146 * order is still first occurrence; each group's linked list preserves 147 * input order. 148 * 3. Lay out groups in first-occurrence order. 149 */ 150 typedef struct PlaceEntry { 151 u32 input_idx; 152 ObjSecId obj_sec_id; 153 ObjAtomId obj_atom_id; 154 u32 obj_offset; 155 u32 size; 156 Sym name; 157 SegBucket bucket; 158 u32 next; 159 } PlaceEntry; 160 161 typedef struct PlaceGroup { 162 u32 head; 163 u32 tail; 164 } PlaceGroup; 165 166 static inline u32 place_group_hash_(u64 key) { return hash_u64(key); } 167 HASHMAP_DEFINE(PlaceGroupHash, u64, u32, place_group_hash_); 168 169 static u64 place_group_key(Sym name, SegBucket bucket) { 170 return (((u64)name + 1u) << 3) | ((u64)bucket + 1u); 171 } 172 173 static u32 place_group_hash_cap(u32 n) { 174 u32 cap = KIT_HASHMAP_INIT_CAP; 175 while (cap < 0x80000000u && (cap - cap / 4u) < n) cap <<= 1; 176 return cap; 177 } 178 179 static int live_section_units(const GcLive* g, const InputMap* m, u32 ii, 180 ObjBuilder* ob, ObjSecId sid) { 181 u32 n = 0, first, count, i; 182 if (link_input_section_has_atoms(m, sid)) { 183 link_input_section_atoms(m, sid, &first, &count); 184 for (i = 0; i < count; ++i) { 185 ObjAtomId aid = m->section_atom_ids[first + i]; 186 const ObjAtom* a = obj_atom_get(ob, aid); 187 if (!a || a->removed) continue; 188 if (link_gc_atom_live_get(g, ii, aid)) ++n; 189 } 190 return n; 191 } 192 return link_gc_live_get(g, ii, sid) ? 1 : 0; 193 } 194 195 static void map_placed_unit(InputMap* m, ObjSecId sid, ObjAtomId aid, 196 LinkSectionId lsid) { 197 if (aid != OBJ_ATOM_NONE) { 198 m->atom[aid] = lsid; 199 if (m->section[sid] == LINK_SEC_NONE) m->section[sid] = lsid; 200 return; 201 } 202 m->section[sid] = lsid; 203 } 204 205 static void link_layout_sections_scripted(Linker* l, LinkImage* img, 206 const GcLive* g); 207 208 void link_layout_sections(Linker* l, LinkImage* img, const GcLive* g) { 209 if (l->script) { 210 link_layout_sections_scripted(l, img, g); 211 return; 212 } 213 Heap* h = img->heap; 214 u32 ii, j; 215 u32 total_kept = 0; 216 217 /* Pass 0: count kept sections (filtered by GC liveness). */ 218 for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { 219 ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; 220 for (j = 1; j < obj_section_count(ob); ++j) { 221 const Section* s = obj_section_get(ob, j); 222 InputMap* m = &img->input_maps[ii]; 223 if (s && link_section_kept(s) && !m->comdat_discarded[j]) 224 total_kept += live_section_units(g, m, ii, ob, j); 225 } 226 } 227 228 img->sections = total_kept ? (LinkSection*)h->alloc( 229 h, sizeof(*img->sections) * total_kept, 230 _Alignof(LinkSection)) 231 : NULL; 232 if (total_kept && !img->sections) 233 compiler_panic(img->c, SRCLOC_NONE, "link: oom on sections"); 234 235 /* Pass 1: collect kept sections into a flat list. */ 236 PlaceEntry* entries = 237 total_kept ? (PlaceEntry*)h->alloc(h, sizeof(*entries) * total_kept, 238 _Alignof(PlaceEntry)) 239 : NULL; 240 PlaceGroup* groups = 241 total_kept ? (PlaceGroup*)h->alloc(h, sizeof(*groups) * total_kept, 242 _Alignof(PlaceGroup)) 243 : NULL; 244 PlaceGroupHash group_map; 245 u32 ngroups = 0; 246 if (total_kept && !entries) 247 compiler_panic(img->c, SRCLOC_NONE, "link: oom on placement entries"); 248 if (total_kept && !groups) 249 compiler_panic(img->c, SRCLOC_NONE, "link: oom on placement groups"); 250 if (total_kept) { 251 PlaceGroupHash_init_cap(&group_map, h, place_group_hash_cap(total_kept)); 252 if (!group_map.slots) 253 compiler_panic(img->c, SRCLOC_NONE, "link: oom on placement group map"); 254 } 255 { 256 u32 e = 0; 257 for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { 258 ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; 259 InputMap* m = &img->input_maps[ii]; 260 for (j = 1; j < obj_section_count(ob); ++j) { 261 const Section* s = obj_section_get(ob, j); 262 u64 key; 263 u32* hit; 264 u32 group_idx; 265 if (!s || !link_section_kept(s) || m->comdat_discarded[j]) continue; 266 u32 first = 0, count = 1, ai; 267 int has_atoms = link_input_section_has_atoms(m, j); 268 if (has_atoms) link_input_section_atoms(m, j, &first, &count); 269 for (ai = 0; ai < count; ++ai) { 270 ObjAtomId aid = 271 has_atoms ? m->section_atom_ids[first + ai] : OBJ_ATOM_NONE; 272 const ObjAtom* a = has_atoms ? obj_atom_get(ob, aid) : NULL; 273 if (has_atoms) { 274 if (!a || a->removed) continue; 275 if (!link_gc_atom_live_get(g, ii, aid)) continue; 276 } else if (!link_gc_live_get(g, ii, j)) { 277 continue; 278 } 279 entries[e].input_idx = ii; 280 entries[e].obj_sec_id = j; 281 entries[e].obj_atom_id = has_atoms ? aid : OBJ_ATOM_NONE; 282 entries[e].obj_offset = has_atoms ? a->offset : 0u; 283 entries[e].size = has_atoms ? a->size : link_section_size_for_link(s); 284 entries[e].name = s->name; 285 entries[e].bucket = link_bucket_for(s->flags); 286 if (l->emit_pie && entries[e].bucket == SEG_R && 287 link_pie_ro_section_needs_write(ob, j)) 288 entries[e].bucket = SEG_RW; 289 entries[e].next = PLACE_NONE; 290 291 key = place_group_key(entries[e].name, entries[e].bucket); 292 hit = PlaceGroupHash_get(&group_map, key); 293 if (hit) { 294 group_idx = *hit - 1u; 295 } else { 296 group_idx = ngroups++; 297 groups[group_idx].head = PLACE_NONE; 298 groups[group_idx].tail = PLACE_NONE; 299 PlaceGroupHash_set(&group_map, key, group_idx + 1u); 300 } 301 if (groups[group_idx].tail == PLACE_NONE) { 302 groups[group_idx].head = e; 303 } else { 304 entries[groups[group_idx].tail].next = e; 305 } 306 groups[group_idx].tail = e; 307 ++e; 308 } 309 } 310 } 311 } 312 313 /* Four segment buckets; tracks per-bucket size during scan and 314 * per-section file_offset within the bucket. */ 315 u64 seg_size[SEG_NBUCKETS] = {0}; 316 u32 seg_align[SEG_NBUCKETS] = {1, 1, 1, 1}; 317 u32 seg_count[SEG_NBUCKETS] = {0}; 318 /* Track trailing NOBITS so segment mem_size > file_size: SEG_RW 319 * for .bss / COMMON, SEG_TLS for .tbss. */ 320 u64 seg_bss_extra[SEG_NBUCKETS] = {0}; 321 322 /* Pass 2: place sections, grouped by name within each bucket and 323 * in first-occurrence order across groups. NOBITS (.bss/.tbss) sections 324 * are placed in a second sub-pass so every bucket's file image stays a 325 * contiguous prefix: ELF requires bss to trail, and for TLS specifically 326 * a .tbss ahead of .tdata makes the loader copy garbage file bytes as the 327 * zero-init image (FreeBSD/riscv _init_tls then crashes on a stale TLS 328 * pointer). */ 329 for (int bss_phase = 0; bss_phase < 2; ++bss_phase) { 330 for (u32 gi = 0; gi < ngroups; ++gi) { 331 for (u32 k = groups[gi].head; k != PLACE_NONE; k = entries[k].next) { 332 PlaceEntry* pe = &entries[k]; 333 SegBucket bucket = pe->bucket; 334 335 ObjBuilder* ob = LinkInputs_at(&l->inputs, pe->input_idx)->obj; 336 InputMap* m = &img->input_maps[pe->input_idx]; 337 const Section* s = obj_section_get(ob, pe->obj_sec_id); 338 u32 align = s->align ? s->align : 1u; 339 u64 ofs; 340 LinkSection* ls; 341 LinkSectionId lsid; 342 int is_bss = (s->sem == SSEM_NOBITS || s->kind == SEC_BSS); 343 344 if (is_bss != bss_phase) continue; 345 if (is_bss) { 346 u64 cursor = seg_size[bucket] + seg_bss_extra[bucket]; 347 cursor = ALIGN_UP(cursor, (u64)(align)); 348 seg_bss_extra[bucket] = cursor + (u64)pe->size - seg_size[bucket]; 349 ofs = cursor; 350 } else { 351 seg_size[bucket] += seg_bss_extra[bucket]; 352 seg_bss_extra[bucket] = 0; 353 ofs = ALIGN_UP(seg_size[bucket], (u64)(align)); 354 seg_size[bucket] = ofs + (u64)pe->size; 355 } 356 357 if (align > seg_align[bucket]) seg_align[bucket] = align; 358 seg_count[bucket]++; 359 360 lsid = (LinkSectionId)(img->nsections + 1u); 361 ls = &img->sections[img->nsections++]; 362 memset(ls, 0, sizeof(*ls)); 363 ls->id = lsid; 364 ls->input_id = LinkInputs_at(&l->inputs, pe->input_idx)->id; 365 ls->obj_section_id = pe->obj_sec_id; 366 ls->obj_atom_id = pe->obj_atom_id; 367 ls->segment_id = LINK_SEG_NONE; 368 ls->obj_offset = pe->obj_offset; 369 ls->input_offset = ofs; 370 ls->file_offset = ofs; 371 ls->vaddr = ofs; 372 ls->size = pe->size; 373 ls->flags = s->flags; 374 ls->align = align; 375 ls->name = s->name; 376 ls->sem = (s->kind == SEC_BSS) ? SSEM_NOBITS : s->sem; 377 ls->segment_id = (LinkSegmentId)(bucket + 1u); /* 1..3 sentinel */ 378 map_placed_unit(m, pe->obj_sec_id, pe->obj_atom_id, lsid); 379 } 380 } 381 } 382 383 if (total_kept) PlaceGroupHash_fini(&group_map); 384 if (groups) h->free(h, groups, sizeof(*groups) * total_kept); 385 if (entries) h->free(h, entries, sizeof(*entries) * total_kept); 386 387 /* Materialize one LinkSegment per non-empty bucket, then assign 388 * absolute (image-relative) vaddr/file_offset to each segment and 389 * fix up section.{vaddr,file_offset,segment_id}. */ 390 { 391 LinkSegmentId bucket_seg[SEG_NBUCKETS] = {0}; 392 u64 cursor = 0; 393 u32 b; 394 u32 nseg = 0; 395 for (b = 0; b < SEG_NBUCKETS; ++b) 396 if (seg_count[b]) ++nseg; 397 398 img->segments = 399 nseg ? (LinkSegment*)h->alloc(h, sizeof(*img->segments) * nseg, 400 _Alignof(LinkSegment)) 401 : NULL; 402 img->segment_bytes = 403 nseg ? (u8**)h->alloc(h, sizeof(*img->segment_bytes) * nseg, 404 _Alignof(u8*)) 405 : NULL; 406 img->segment_bytes_cap = 407 nseg ? (size_t*)h->alloc(h, sizeof(*img->segment_bytes_cap) * nseg, 408 _Alignof(size_t)) 409 : NULL; 410 if (nseg && 411 (!img->segments || !img->segment_bytes || !img->segment_bytes_cap)) 412 compiler_panic(img->c, SRCLOC_NONE, "link: oom on segments"); 413 if (nseg) { 414 memset(img->segment_bytes, 0, sizeof(*img->segment_bytes) * nseg); 415 memset(img->segment_bytes_cap, 0, sizeof(*img->segment_bytes_cap) * nseg); 416 } 417 418 for (b = 0; b < SEG_NBUCKETS; ++b) { 419 LinkSegment* seg; 420 u64 file_size, mem_size, align; 421 u32 nat_align; 422 u32 perms; 423 if (!seg_count[b]) continue; 424 nat_align = seg_align[b] ? seg_align[b] : 1u; 425 align = (u64)nat_align; 426 if (align < link_layout_page_size(l)) align = link_layout_page_size(l); 427 cursor = ALIGN_UP(cursor, (u64)(align)); 428 429 seg = &img->segments[img->nsegments]; 430 file_size = seg_size[b]; 431 mem_size = seg_size[b] + seg_bss_extra[b]; 432 perms = SF_ALLOC; 433 if (b == SEG_RX) perms |= SF_EXEC; 434 if (b == SEG_RW) perms |= SF_WRITE; 435 if (b == SEG_TLS) perms |= SF_TLS; 436 437 memset(seg, 0, sizeof(*seg)); 438 seg->id = (LinkSegmentId)(img->nsegments + 1u); 439 seg->flags = perms; 440 seg->file_offset = cursor; 441 seg->vaddr = cursor; 442 seg->mem_size = mem_size; 443 seg->file_size = file_size; 444 seg->align = (u32)align; 445 seg->nsections = seg_count[b]; 446 bucket_seg[b] = seg->id; 447 if (b == SEG_TLS) { 448 /* Record TLS image span for PT_TLS emission and TLSLE 449 * reloc apply. tls_align preserves the natural section 450 * alignment (PT_TLS p_align), distinct from the 451 * containing PT_LOAD's page align. */ 452 img->tls_vaddr = cursor; 453 img->tls_filesz = file_size; 454 img->tls_memsz = mem_size; 455 img->tls_align = nat_align; 456 } 457 cursor += mem_size; 458 img->nsegments++; 459 } 460 461 /* Allocate segment buffers and fix up section offsets/vaddrs. The 462 * JIT lane maps input section bytes directly into execmem, so ordinary 463 * segment payload buffers would be copied only to be copied again. */ 464 for (b = 0; b < SEG_NBUCKETS; ++b) { 465 if (!bucket_seg[b]) continue; 466 { 467 LinkSegment* seg = &img->segments[bucket_seg[b] - 1]; 468 if (seg->file_size && !l->jit_mode) { 469 img->segment_bytes[bucket_seg[b] - 1] = 470 (u8*)h->alloc(h, (size_t)seg->file_size, 16); 471 if (!img->segment_bytes[bucket_seg[b] - 1]) 472 compiler_panic(img->c, SRCLOC_NONE, "link: oom on segment bytes"); 473 img->segment_bytes_cap[bucket_seg[b] - 1] = (size_t)seg->file_size; 474 memset(img->segment_bytes[bucket_seg[b] - 1], 0, 475 (size_t)seg->file_size); 476 } 477 } 478 } 479 480 for (j = 0; j < img->nsections; ++j) { 481 LinkSection* ls = &img->sections[j]; 482 u32 b2 = (u32)(ls->segment_id - 1u); /* sentinel-stash */ 483 LinkSegment* seg = &img->segments[bucket_seg[b2] - 1]; 484 ls->segment_id = seg->id; 485 ls->vaddr += seg->vaddr; 486 ls->file_offset += seg->file_offset; 487 } 488 } 489 } 490 491 /* ---- scripted layout (linker-script driven) ---- 492 * 493 * Walks the KitLinkScript's output sections in declaration order, 494 * placing matched input sections at the dot location counter. One 495 * LinkSegment per non-DISCARD output section maps 1:1 to a PT_LOAD on 496 * emit. Symbol assignments (top-level and in-section) materialize as 497 * defined LinkSymbol globals via link_emit_boundary_sym. 498 * 499 * Discard handling: `/DISCARD/` matches input sections by glob and 500 * leaves their per-input m->section[id] entry as LINK_SEC_NONE — the 501 * downstream emit_reloc_records / link_assign_symbol_vaddrs passes 502 * already treat that as "section dropped" so they're naturally 503 * excluded from segments, gc, and reloc apply. */ 504 505 /* `*` is the only metachar. Supported forms in the kernel.lds-style 506 * subset: trailing star (".text*"), leading star ("*COMMON" — not in 507 * kernel.lds but cheap), and exact literal. */ 508 static int match_glob(const char* pat, const char* name) { 509 size_t plen, nlen; 510 if (!pat || !name) return 0; 511 plen = slice_from_cstr(pat).len; 512 nlen = slice_from_cstr(name).len; 513 if (plen == 1 && pat[0] == '*') return 1; 514 if (plen >= 2 && pat[plen - 1] == '*') { 515 if (nlen + 1 < plen) return 0; 516 return memcmp(pat, name, plen - 1) == 0; 517 } 518 if (plen >= 2 && pat[0] == '*') { 519 if (nlen + 1 < plen) return 0; 520 return memcmp(pat + 1, name + (nlen - (plen - 1)), plen - 1) == 0; 521 } 522 return plen == nlen && memcmp(pat, name, plen) == 0; 523 } 524 525 static u64 eval_link_expr(Linker* l, LinkImage* img, u64 dot, 526 const KitLinkExpr* e, int* err) { 527 if (!e) { 528 *err = 1; 529 return 0; 530 } 531 switch ((KitLinkExprKind)e->kind) { 532 case KIT_LE_INT: 533 return (u64)e->v.int_val; 534 case KIT_LE_DOT: 535 return dot; 536 case KIT_LE_SYM: { 537 Sym name = pool_intern_slice(l->c->global, e->v.name); 538 LinkSymId id = symhash_get(&img->globals, name); 539 if (id == LINK_SYM_NONE) { 540 compiler_panic(l->c, SRCLOC_NONE, 541 "linker script: undefined symbol '%.*s' in expression", 542 SLICE_ARG(pool_slice(l->c->global, name))); 543 } 544 return LinkSyms_at(&img->syms, id - 1)->vaddr; 545 } 546 case KIT_LE_ADD: 547 return eval_link_expr(l, img, dot, e->v.bin.lhs, err) + 548 eval_link_expr(l, img, dot, e->v.bin.rhs, err); 549 case KIT_LE_SUB: 550 return eval_link_expr(l, img, dot, e->v.bin.lhs, err) - 551 eval_link_expr(l, img, dot, e->v.bin.rhs, err); 552 case KIT_LE_MUL: 553 return eval_link_expr(l, img, dot, e->v.bin.lhs, err) * 554 eval_link_expr(l, img, dot, e->v.bin.rhs, err); 555 case KIT_LE_DIV: { 556 u64 rhs = eval_link_expr(l, img, dot, e->v.bin.rhs, err); 557 if (rhs == 0) { 558 *err = 1; 559 return 0; 560 } 561 return eval_link_expr(l, img, dot, e->v.bin.lhs, err) / rhs; 562 } 563 case KIT_LE_AND: 564 return eval_link_expr(l, img, dot, e->v.bin.lhs, err) & 565 eval_link_expr(l, img, dot, e->v.bin.rhs, err); 566 case KIT_LE_OR: 567 return eval_link_expr(l, img, dot, e->v.bin.lhs, err) | 568 eval_link_expr(l, img, dot, e->v.bin.rhs, err); 569 case KIT_LE_XOR: 570 return eval_link_expr(l, img, dot, e->v.bin.lhs, err) ^ 571 eval_link_expr(l, img, dot, e->v.bin.rhs, err); 572 case KIT_LE_SHL: 573 return eval_link_expr(l, img, dot, e->v.bin.lhs, err) 574 << eval_link_expr(l, img, dot, e->v.bin.rhs, err); 575 case KIT_LE_SHR: 576 return eval_link_expr(l, img, dot, e->v.bin.lhs, err) >> 577 eval_link_expr(l, img, dot, e->v.bin.rhs, err); 578 case KIT_LE_ALIGN: { 579 u64 v = eval_link_expr(l, img, dot, e->v.align.val, err); 580 u64 a = eval_link_expr(l, img, dot, e->v.align.align, err); 581 if (a == 0) return v; 582 return ALIGN_UP(v, a); 583 } 584 case KIT_LE_REGION_ORIGIN: 585 case KIT_LE_REGION_LENGTH: 586 case KIT_LE_MAX: 587 case KIT_LE_MIN: 588 default: 589 compiler_panic(l->c, SRCLOC_NONE, 590 "linker script: expression kind %u not supported", 591 (unsigned)e->kind); 592 return 0; 593 } 594 } 595 596 /* Format-aware C-symbol mangling for linker-synthesized boundaries. */ 597 static Sym boundary_name(Linker* l, const char* name) { 598 return obj_format_c_mangle(l->c, name); 599 } 600 601 /* Upsert a global boundary symbol. Satisfies any prior undef ref in place; fans 602 * out to per-input duplicate name slots. `section_id` is LINK_SEC_NONE for 603 * absolute boundaries, or the owning LinkSectionId for boundaries that must 604 * follow a format-specific section relayout. */ 605 static void link_emit_boundary_sym_ex(Linker* l, LinkImage* img, 606 const char* name, u64 vaddr, 607 LinkSectionId section_id, u64 value) { 608 Sym sym = boundary_name(l, name); 609 LinkSymId id = symhash_get(&img->globals, sym); 610 LinkSymId canonical_id = id; 611 LinkSymbol rec; 612 u8 kind = SK_OBJ; 613 int fmt_kind; 614 u32 i, n; 615 /* Some formats own specific boundary symbols with a fixed SymKind 616 * (PE/COFF `__ImageBase` / `_tls_used` are SK_ABS). Ask the format 617 * instead of matching names here. */ 618 if (obj_format_boundary_sym_kind(l->c, slice_from_cstr(name), &fmt_kind)) 619 kind = (u8)fmt_kind; 620 memset(&rec, 0, sizeof(rec)); 621 rec.name = sym; 622 rec.kind = kind; 623 rec.defined = 1; 624 rec.section_id = section_id; 625 rec.value = value; 626 rec.vaddr = vaddr; 627 rec.bind = SB_GLOBAL; 628 if (id != LINK_SYM_NONE) { 629 *LinkSyms_at(&img->syms, id - 1) = rec; 630 LinkSyms_at(&img->syms, id - 1)->id = id; 631 } else { 632 LinkSymId fresh = link_append_symbol(img, &rec); 633 symhash_insert(&img->globals, sym, fresh, &id); 634 canonical_id = fresh; 635 } 636 n = LinkSyms_count(&img->syms); 637 for (i = 0; i < n; ++i) { 638 LinkSymbol* s = LinkSyms_at(&img->syms, i); 639 if (s->name != sym) continue; 640 if (s->id == canonical_id) continue; 641 if (s->bind == SB_LOCAL) continue; 642 s->section_id = section_id; 643 s->value = value; 644 s->vaddr = vaddr; 645 s->kind = kind; 646 s->defined = 1; 647 s->imported = 0; 648 } 649 } 650 651 void link_emit_boundary_sym(Linker* l, LinkImage* img, const char* name, 652 u64 vaddr) { 653 link_emit_boundary_sym_ex(l, img, name, vaddr, LINK_SEC_NONE, 0); 654 } 655 656 void link_emit_section_boundary_sym(Linker* l, LinkImage* img, 657 const char* name, LinkSectionId section_id, 658 u64 value) { 659 const LinkSection* sec; 660 u64 vaddr; 661 if (section_id == LINK_SEC_NONE || section_id > img->nsections) 662 compiler_panic(img->c, SRCLOC_NONE, 663 "link: boundary symbol '%.*s' has no containing section", 664 SLICE_ARG(slice_from_cstr(name))); 665 sec = &img->sections[section_id - 1]; 666 vaddr = sec->vaddr + (value - sec->obj_offset); 667 link_emit_boundary_sym_ex(l, img, name, vaddr, section_id, value); 668 } 669 670 /* link_define_boundary: public alias used by link_dyn.c. */ 671 void link_define_boundary(Linker* l, LinkImage* img, const char* name, 672 u64 vaddr) { 673 link_emit_boundary_sym(l, img, name, vaddr); 674 } 675 676 /* Upsert a global symbol (mirror of emit_boundary_sym, used by apply_asn). */ 677 static void upsert_global_sym(Linker* l, LinkImage* img, KitSlice name, 678 u64 vaddr) { 679 /* Script sym slices are arena-interned and NUL-terminated; the boundary 680 * emitter mangles via obj_format_c_mangle which needs a C string. */ 681 link_emit_boundary_sym(l, img, name.s, vaddr); 682 } 683 684 /* Apply one KitLinkAssignment. */ 685 static void apply_asn(Linker* l, LinkImage* img, u64* dot, 686 const KitLinkAssignment* asn) { 687 int err = 0; 688 u64 v = eval_link_expr(l, img, *dot, asn->expr, &err); 689 if (err) return; 690 switch ((KitLinkAsnKind)asn->kind) { 691 case KIT_LAS_DOT: 692 if (v < *dot) 693 compiler_panic(l->c, SRCLOC_NONE, 694 "linker script: dot moved backwards (%llu -> %llu)", 695 (unsigned long long)*dot, (unsigned long long)v); 696 *dot = v; 697 break; 698 case KIT_LAS_SYM: 699 case KIT_LAS_PROVIDE: 700 if (asn->sym.s) upsert_global_sym(l, img, asn->sym, v); 701 break; 702 } 703 } 704 705 static int input_match_section(const KitLinkInputMatch* m, const char* nm) { 706 /* section_pattern is an arena-interned, NUL-terminated span of the 707 * script text; match_glob scans it as a C string. */ 708 return match_glob(m->section_pattern.s, nm); 709 } 710 711 static void link_layout_sections_scripted(Linker* l, LinkImage* img, 712 const GcLive* g) { 713 Heap* h = img->heap; 714 const KitLinkScript* script = l->script; 715 u64 dot = 0; 716 u64 file_cursor = 0; 717 u32 ii, j, k, si; 718 u32 total_kept = 0; 719 720 img->scripted = 1; 721 722 for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { 723 ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; 724 InputMap* m = &img->input_maps[ii]; 725 for (j = 1; j < obj_section_count(ob); ++j) { 726 const Section* s = obj_section_get(ob, j); 727 if (s && link_section_kept(s) && !m->comdat_discarded[j]) 728 total_kept += live_section_units(g, m, ii, ob, j); 729 } 730 } 731 732 img->sections = total_kept ? (LinkSection*)h->alloc( 733 h, sizeof(*img->sections) * total_kept, 734 _Alignof(LinkSection)) 735 : NULL; 736 if (total_kept && !img->sections) 737 compiler_panic(img->c, SRCLOC_NONE, "link: oom on sections"); 738 739 u8** claimed = NULL; 740 if (LinkInputs_count(&l->inputs)) { 741 u32 ni = LinkInputs_count(&l->inputs); 742 claimed = (u8**)h->alloc(h, sizeof(*claimed) * ni, _Alignof(u8*)); 743 if (!claimed) compiler_panic(img->c, SRCLOC_NONE, "link: oom on claim map"); 744 for (ii = 0; ii < ni; ++ii) { 745 ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; 746 u32 nsec = obj_section_count(ob); 747 claimed[ii] = (u8*)h->alloc(h, nsec, 1); 748 if (!claimed[ii]) 749 compiler_panic(img->c, SRCLOC_NONE, "link: oom on claim row"); 750 memset(claimed[ii], 0, nsec); 751 } 752 } 753 754 for (k = 0; k < script->ntop_asns; ++k) { 755 const KitLinkAssignment* a = &script->top_asns[k]; 756 if (a->kind == KIT_LAS_DOT) apply_asn(l, img, &dot, a); 757 } 758 759 u32 nseg_max = 0; 760 for (si = 0; si < script->nsections; ++si) 761 if (!slice_eq_cstr(script->sections[si].name, "/DISCARD/")) ++nseg_max; 762 img->segments = 763 nseg_max ? (LinkSegment*)h->alloc(h, sizeof(*img->segments) * nseg_max, 764 _Alignof(LinkSegment)) 765 : NULL; 766 img->segment_bytes = 767 nseg_max ? (u8**)h->alloc(h, sizeof(*img->segment_bytes) * nseg_max, 768 _Alignof(u8*)) 769 : NULL; 770 img->segment_bytes_cap = 771 nseg_max 772 ? (size_t*)h->alloc(h, sizeof(*img->segment_bytes_cap) * nseg_max, 773 _Alignof(size_t)) 774 : NULL; 775 if (nseg_max && 776 (!img->segments || !img->segment_bytes || !img->segment_bytes_cap)) 777 compiler_panic(img->c, SRCLOC_NONE, "link: oom on segments"); 778 if (nseg_max) { 779 memset(img->segment_bytes, 0, sizeof(*img->segment_bytes) * nseg_max); 780 memset(img->segment_bytes_cap, 0, 781 sizeof(*img->segment_bytes_cap) * nseg_max); 782 } 783 784 for (si = 0; si < script->nsections; ++si) { 785 const KitLinkOutputSection* os = &script->sections[si]; 786 int is_discard = slice_eq_cstr(os->name, "/DISCARD/"); 787 788 if (is_discard) { 789 u32 mi; 790 for (mi = 0; mi < os->ninputs; ++mi) { 791 const KitLinkInputMatch* im = &os->inputs[mi]; 792 for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { 793 ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; 794 for (j = 1; j < obj_section_count(ob); ++j) { 795 const Section* s; 796 const char* nm; 797 if (claimed[ii][j]) continue; 798 s = obj_section_get(ob, j); 799 if (!s) continue; 800 { 801 nm = pool_slice(l->c->global, s->name).s; 802 } 803 if (!nm) continue; 804 if (input_match_section(im, nm)) claimed[ii][j] = 1; 805 } 806 } 807 } 808 continue; 809 } 810 811 u64 sec_start_dot; 812 u32 perms = 0; 813 LinkSegmentId seg_id = (LinkSegmentId)(img->nsegments + 1u); 814 LinkSegment* seg; 815 u64 file_size_accum = 0; 816 u64 mem_size_accum = 0; 817 u32 align_max = 1; 818 u32 nsec_in_seg = 0; 819 u32 first_section_idx = img->nsections; 820 821 for (k = 0; k < os->nasns; ++k) { 822 apply_asn(l, img, &dot, &os->asns[k]); 823 } 824 sec_start_dot = dot; 825 826 { 827 u32 mi; 828 for (mi = 0; mi < os->ninputs; ++mi) { 829 const KitLinkInputMatch* im = &os->inputs[mi]; 830 for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { 831 ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; 832 InputMap* m = &img->input_maps[ii]; 833 for (j = 1; j < obj_section_count(ob); ++j) { 834 const Section* s; 835 const char* nm; 836 u32 align; 837 u32 first = 0, count = 1, ai; 838 int has_atoms; 839 if (claimed[ii][j]) continue; 840 if (m->comdat_discarded[j]) continue; 841 s = obj_section_get(ob, j); 842 if (!s || !link_section_kept(s)) continue; 843 { 844 nm = pool_slice(l->c->global, s->name).s; 845 } 846 if (!nm) continue; 847 if (!input_match_section(im, nm)) continue; 848 849 align = s->align ? s->align : 1u; 850 has_atoms = link_input_section_has_atoms(m, j); 851 if (has_atoms) link_input_section_atoms(m, j, &first, &count); 852 for (ai = 0; ai < count; ++ai) { 853 ObjAtomId aid = 854 has_atoms ? m->section_atom_ids[first + ai] : OBJ_ATOM_NONE; 855 const ObjAtom* atom = has_atoms ? obj_atom_get(ob, aid) : NULL; 856 u64 ofs; 857 LinkSection* ls; 858 LinkSectionId lsid; 859 u64 obj_offset; 860 u64 size; 861 if (has_atoms) { 862 if (!atom || atom->removed) continue; 863 if (!link_gc_atom_live_get(g, ii, aid)) continue; 864 obj_offset = atom->offset; 865 size = atom->size; 866 } else if (!link_gc_live_get(g, ii, j)) { 867 continue; 868 } else { 869 obj_offset = 0u; 870 size = link_section_size_for_link(s); 871 } 872 if (align > align_max) align_max = align; 873 dot = ALIGN_UP(dot, (u64)align); 874 ofs = dot; 875 876 lsid = (LinkSectionId)(img->nsections + 1u); 877 ls = &img->sections[img->nsections++]; 878 memset(ls, 0, sizeof(*ls)); 879 ls->id = lsid; 880 ls->input_id = LinkInputs_at(&l->inputs, ii)->id; 881 ls->obj_section_id = j; 882 ls->obj_atom_id = aid; 883 ls->segment_id = seg_id; 884 ls->obj_offset = obj_offset; 885 ls->vaddr = ofs; 886 ls->size = size; 887 ls->flags = s->flags; 888 ls->align = align; 889 ls->name = s->name; 890 ls->sem = (s->kind == SEC_BSS) ? SSEM_NOBITS : s->sem; 891 ls->file_offset = ofs - sec_start_dot; 892 ls->input_offset = ls->file_offset; 893 map_placed_unit(m, j, aid, lsid); 894 895 dot += ls->size; 896 mem_size_accum = dot - sec_start_dot; 897 if (ls->sem != SSEM_NOBITS) file_size_accum = dot - sec_start_dot; 898 perms |= (s->flags & (SF_EXEC | SF_WRITE | SF_TLS)); 899 ++nsec_in_seg; 900 } 901 claimed[ii][j] = 1; 902 } 903 } 904 } 905 } 906 907 if (nsec_in_seg == 0) { 908 continue; 909 } 910 911 seg = &img->segments[img->nsegments]; 912 memset(seg, 0, sizeof(*seg)); 913 seg->id = seg_id; 914 seg->flags = SF_ALLOC | perms; 915 seg->vaddr = sec_start_dot; 916 file_cursor = ALIGN_UP(file_cursor, (u64)PAGE_SIZE); 917 seg->file_offset = file_cursor; 918 seg->mem_size = mem_size_accum; 919 seg->file_size = file_size_accum; 920 seg->align = align_max; 921 seg->nsections = nsec_in_seg; 922 file_cursor += file_size_accum; 923 if (file_size_accum && !l->jit_mode) { 924 img->segment_bytes[img->nsegments] = 925 (u8*)h->alloc(h, (size_t)file_size_accum, 16); 926 if (!img->segment_bytes[img->nsegments]) 927 compiler_panic(img->c, SRCLOC_NONE, 928 "link: oom on scripted segment bytes"); 929 img->segment_bytes_cap[img->nsegments] = (size_t)file_size_accum; 930 memset(img->segment_bytes[img->nsegments], 0, (size_t)file_size_accum); 931 } 932 933 { 934 u32 fi; 935 for (fi = first_section_idx; fi < img->nsections; ++fi) { 936 LinkSection* ls = &img->sections[fi]; 937 ls->file_offset = seg->file_offset + (ls->file_offset); 938 } 939 } 940 941 img->nsegments++; 942 } 943 944 for (k = 0; k < script->ntop_asns; ++k) { 945 const KitLinkAssignment* a = &script->top_asns[k]; 946 if (a->kind == KIT_LAS_SYM || a->kind == KIT_LAS_PROVIDE) 947 apply_asn(l, img, &dot, a); 948 } 949 950 if (claimed) { 951 u32 ni = LinkInputs_count(&l->inputs); 952 for (ii = 0; ii < ni; ++ii) { 953 ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; 954 h->free(h, claimed[ii], obj_section_count(ob)); 955 } 956 h->free(h, claimed, sizeof(*claimed) * ni); 957 } 958 } 959 960 /* ---- pass 2b: COMMON symbol BSS allocation ---- */ 961 962 void link_layout_commons(Linker* l, LinkImage* img) { 963 u32 i; 964 (void)l; 965 LinkSegment* rw_seg = NULL; 966 967 for (i = 0; i < img->nsegments; ++i) { 968 if (img->segments[i].flags & SF_WRITE) { 969 rw_seg = &img->segments[i]; 970 break; 971 } 972 } 973 974 { 975 int has_common = 0; 976 for (i = 0; i < LinkSyms_count(&img->syms); ++i) 977 if (LinkSyms_at(&img->syms, i)->kind == SK_COMMON && 978 LinkSyms_at(&img->syms, i)->defined) { 979 has_common = 1; 980 break; 981 } 982 if (!has_common) return; 983 } 984 985 if (!rw_seg) { 986 /* No writable PT_LOAD yet — synthesize a zero-size BSS segment for the 987 * COMMON section. This is a NOBITS region (no byte buffer, file/mem 988 * size grow with the section below), so it adopts only the array- 989 * growth helper, not the fixed-size region builder. */ 990 u32 seg_idx = link_iplt_alloc_segments(img, 1u); 991 u64 vaddr = 0; 992 for (i = 0; i < img->nsegments; ++i) { 993 u64 end = img->segments[i].vaddr + img->segments[i].mem_size; 994 if (end > vaddr) vaddr = end; 995 } 996 vaddr = ALIGN_UP(vaddr, (u64)(link_layout_page_size(l))); 997 rw_seg = &img->segments[seg_idx]; 998 memset(rw_seg, 0, sizeof(*rw_seg)); 999 rw_seg->id = (LinkSegmentId)(seg_idx + 1u); 1000 rw_seg->flags = SF_ALLOC | SF_WRITE; 1001 rw_seg->vaddr = vaddr; 1002 rw_seg->file_offset = vaddr; 1003 rw_seg->file_size = 0; 1004 rw_seg->mem_size = 0; 1005 rw_seg->align = (u32)link_layout_page_size(l); 1006 img->segment_bytes[seg_idx] = NULL; 1007 img->segment_bytes_cap[seg_idx] = 0; 1008 img->nsegments++; 1009 } 1010 1011 { 1012 Heap* h = img->heap; 1013 u64 bss_start = rw_seg->vaddr + rw_seg->mem_size; 1014 u64 bss_cursor = bss_start; 1015 u32 max_align = 1u; 1016 LinkSection* commsec; 1017 LinkSectionId comm_lsid; 1018 1019 for (i = 0; i < LinkSyms_count(&img->syms); ++i) { 1020 LinkSymbol* s = LinkSyms_at(&img->syms, i); 1021 u32 align; 1022 if (s->kind != SK_COMMON || !s->defined) continue; 1023 align = s->common_align ? s->common_align : 1u; 1024 if (align > max_align) max_align = align; 1025 bss_cursor = ALIGN_UP(bss_cursor, (u64)(align)); 1026 s->value = bss_cursor - bss_start; 1027 bss_cursor += s->size ? s->size : 1u; 1028 } 1029 1030 { 1031 u32 new_nsec = img->nsections + 1u; 1032 LinkSection* nsec = (LinkSection*)h->realloc( 1033 h, img->sections, sizeof(*img->sections) * img->nsections, 1034 sizeof(*img->sections) * new_nsec, _Alignof(LinkSection)); 1035 if (!nsec) 1036 compiler_panic(img->c, SRCLOC_NONE, "link: oom on common section"); 1037 img->sections = nsec; 1038 } 1039 commsec = &img->sections[img->nsections]; 1040 memset(commsec, 0, sizeof(*commsec)); 1041 comm_lsid = (LinkSectionId)(img->nsections + 1u); 1042 commsec->id = comm_lsid; 1043 commsec->input_id = LINK_INPUT_NONE; 1044 commsec->obj_section_id = OBJ_SEC_NONE; 1045 commsec->segment_id = rw_seg->id; 1046 commsec->input_offset = 0; 1047 commsec->file_offset = bss_start; 1048 commsec->vaddr = bss_start; 1049 commsec->size = bss_cursor - bss_start; 1050 commsec->flags = SF_ALLOC | SF_WRITE; 1051 commsec->align = max_align; 1052 commsec->name = pool_intern_slice(img->c->global, SLICE_LIT(".bss.common")); 1053 commsec->sem = SSEM_NOBITS; 1054 img->nsections++; 1055 1056 for (i = 0; i < LinkSyms_count(&img->syms); ++i) { 1057 LinkSymbol* s = LinkSyms_at(&img->syms, i); 1058 if (s->kind != SK_COMMON || !s->defined) continue; 1059 s->section_id = comm_lsid; 1060 s->vaddr = bss_start + s->value; 1061 s->kind = SK_OBJ; 1062 } 1063 1064 rw_seg->mem_size = bss_cursor - rw_seg->vaddr; 1065 rw_seg->nsections++; 1066 } 1067 } 1068 1069 /* Copy each input section's bytes into its segment buffer. */ 1070 void link_emit_segment_bytes(Linker* l, LinkImage* img) { 1071 u32 j; 1072 (void)l; 1073 for (j = 0; j < img->nsections; ++j) { 1074 LinkSection* ls = &img->sections[j]; 1075 ObjBuilder* ob; 1076 if (ls->input_id == LINK_INPUT_NONE) continue; 1077 ob = LinkInputs_at(&l->inputs, ls->input_id - 1)->obj; 1078 const Section* s = obj_section_get(ob, ls->obj_section_id); 1079 LinkSegment* seg = &img->segments[ls->segment_id - 1]; 1080 u8* dst; 1081 if (!s || s->sem == SSEM_NOBITS || s->kind == SEC_BSS) continue; 1082 if (ls->size == 0) continue; 1083 dst = img->segment_bytes[seg->id - 1] + 1084 (size_t)(ls->file_offset - seg->file_offset); 1085 buf_read(&s->bytes, (u32)ls->obj_offset, dst, (size_t)ls->size); 1086 } 1087 } 1088 1089 /* ---- pass 2c: file-only debug sections ---- 1090 * 1091 * Carry every surviving .debug_* section through to the linked image as 1092 * a non-segment, file-resident LinkSection so addr2line / gdb resolve 1093 * file:line on the output. Contributions of the same name are assigned 1094 * a per-name cumulative `vaddr` (the DWARF-section-relative base: 0 for 1095 * the first input, size0 for the second, …); the ELF emitter merges 1096 * same-name contributions into one output .debug_X section, and the 1097 * per-input base makes SK_SECTION cross-section R_ABS32 offsets land in 1098 * the merged section. Each contribution keeps its own byte buffer in 1099 * the debug registry, with relocations applied in place at reloc-offset. 1100 * 1101 * Runs after link_emit_segment_bytes (so the segment-byte copy never 1102 * sees these segment-less sections) and before link_assign_symbol_vaddrs 1103 * (so the SK_SECTION debug symbols pick up their section_id + base). */ 1104 1105 /* Per-output-name cumulative base tracker. Debug section names are few 1106 * (.debug_info/.debug_line/.debug_abbrev/.debug_str/...), so a linear 1107 * scan is fine. */ 1108 typedef struct DbgNameAcc { 1109 Sym name; 1110 u64 cum; /* running total size for this name */ 1111 } DbgNameAcc; 1112 1113 void link_layout_debug(Linker* l, LinkImage* img) { 1114 Heap* h = img->heap; 1115 u32 ii, j; 1116 u32 ndbg = 0; 1117 1118 /* Pass 0: count surviving debug contributions. */ 1119 for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { 1120 ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; 1121 InputMap* m = &img->input_maps[ii]; 1122 for (j = 1; j < obj_section_count(ob); ++j) { 1123 const Section* s = obj_section_get(ob, j); 1124 if (link_section_kept_fileonly(s) && !m->comdat_discarded[j] && 1125 link_section_size_for_link(s) > 0) 1126 ++ndbg; 1127 } 1128 } 1129 if (ndbg == 0) return; 1130 1131 /* Grow img->sections to hold the new file-only sections appended after 1132 * the existing allocatable + common sections. */ 1133 { 1134 u32 new_nsec = img->nsections + ndbg; 1135 LinkSection* ns = (LinkSection*)h->realloc( 1136 h, img->sections, sizeof(*img->sections) * img->nsections, 1137 sizeof(*img->sections) * new_nsec, _Alignof(LinkSection)); 1138 if (!ns) compiler_panic(img->c, SRCLOC_NONE, "link: oom on debug sections"); 1139 img->sections = ns; 1140 } 1141 1142 img->dbg_bytes = 1143 (u8**)h->alloc(h, sizeof(*img->dbg_bytes) * ndbg, _Alignof(u8*)); 1144 img->dbg_size = 1145 (u64*)h->alloc(h, sizeof(*img->dbg_size) * ndbg, _Alignof(u64)); 1146 DbgNameAcc* acc = 1147 (DbgNameAcc*)h->alloc(h, sizeof(*acc) * ndbg, _Alignof(DbgNameAcc)); 1148 if (!img->dbg_bytes || !img->dbg_size || !acc) 1149 compiler_panic(img->c, SRCLOC_NONE, "link: oom on debug registry"); 1150 memset(img->dbg_bytes, 0, sizeof(*img->dbg_bytes) * ndbg); 1151 memset(img->dbg_size, 0, sizeof(*img->dbg_size) * ndbg); 1152 1153 img->dbg_first_lsid = (LinkSectionId)(img->nsections + 1u); 1154 img->dbg_count = 0; 1155 u32 nacc = 0; 1156 1157 /* Pass 1: append one file-only LinkSection per contribution, assign 1158 * per-name cumulative base, and copy bytes into the registry. */ 1159 for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { 1160 ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; 1161 InputMap* m = &img->input_maps[ii]; 1162 for (j = 1; j < obj_section_count(ob); ++j) { 1163 const Section* s = obj_section_get(ob, j); 1164 u32 size; 1165 u64 base; 1166 u32 ai, slot; 1167 LinkSection* ls; 1168 LinkSectionId lsid; 1169 u8* buf; 1170 if (!link_section_kept_fileonly(s) || m->comdat_discarded[j]) continue; 1171 size = link_section_size_for_link(s); 1172 if (size == 0) continue; 1173 1174 /* Per-name cumulative base. */ 1175 for (ai = 0; ai < nacc; ++ai) 1176 if (acc[ai].name == s->name) break; 1177 if (ai == nacc) { 1178 acc[nacc].name = s->name; 1179 acc[nacc].cum = 0; 1180 ai = nacc++; 1181 } 1182 base = acc[ai].cum; 1183 acc[ai].cum += size; 1184 1185 slot = img->dbg_count; 1186 lsid = (LinkSectionId)(img->nsections + 1u); 1187 ls = &img->sections[img->nsections++]; 1188 memset(ls, 0, sizeof(*ls)); 1189 ls->id = lsid; 1190 ls->input_id = LinkInputs_at(&l->inputs, ii)->id; 1191 ls->obj_section_id = j; 1192 ls->obj_atom_id = OBJ_ATOM_NONE; 1193 ls->segment_id = LINK_SEG_NONE; 1194 ls->obj_offset = 0; 1195 ls->input_offset = 0; 1196 ls->file_offset = 0; /* assigned by the ELF emitter */ 1197 ls->vaddr = base; /* DWARF-section-relative base */ 1198 ls->size = size; 1199 ls->flags = s->flags; 1200 ls->align = s->align ? s->align : 1u; 1201 ls->name = s->name; 1202 ls->sem = SSEM_PROGBITS; 1203 ls->file_only = 1u; 1204 1205 /* Copy this contribution's bytes into its own registry buffer. */ 1206 buf = (u8*)h->alloc(h, size, 1); 1207 if (!buf) compiler_panic(img->c, SRCLOC_NONE, "link: oom on debug bytes"); 1208 buf_read(&s->bytes, 0u, buf, (size_t)size); 1209 img->dbg_bytes[slot] = buf; 1210 img->dbg_size[slot] = size; 1211 img->dbg_count++; 1212 1213 /* Map the input's section id to this LinkSection so the SK_SECTION 1214 * debug symbol resolves (assign_symbol_vaddrs) and its relocations 1215 * route here (emit_relocations). */ 1216 map_placed_unit(m, j, OBJ_ATOM_NONE, lsid); 1217 } 1218 } 1219 1220 h->free(h, acc, sizeof(*acc) * ndbg); 1221 } 1222 1223 u8* link_fileonly_bytes(LinkImage* img, LinkSectionId id) { 1224 if (!img || id == LINK_SEC_NONE || id < img->dbg_first_lsid) return NULL; 1225 { 1226 u32 idx = (u32)(id - img->dbg_first_lsid); 1227 if (idx >= img->dbg_count) return NULL; 1228 return img->dbg_bytes[idx]; 1229 } 1230 } 1231 1232 /* ---- public orchestration ---- */ 1233 1234 LinkImage* link_resolve(Linker* l) { 1235 LinkImage* img; 1236 Heap* h; 1237 1238 metrics_scope_begin(l->c, "link.resolve.total"); 1239 /* Inject any format-owned synthetic input before symbol resolution. 1240 * Only the COFF format registers a hook (link_synth_coff_ctor_dtor_list); 1241 * the dispatcher is a no-op for every other format. */ 1242 obj_format_synth_inputs(l->c, l); 1243 metrics_scope_begin(l->c, "link.ingest_archives"); 1244 link_ingest_archives(l); 1245 metrics_scope_end(l->c, "link.ingest_archives"); 1246 1247 img = link_image_alloc(l->c); 1248 h = img->heap; 1249 img->linker = l; 1250 img->text_base_set = l->text_base_set; 1251 img->text_base = l->text_base; 1252 1253 img->ninput_maps = LinkInputs_count(&l->inputs); 1254 metrics_count(l->c, "link.inputs", img->ninput_maps); 1255 img->input_maps = 1256 LinkInputs_count(&l->inputs) 1257 ? (InputMap*)h->alloc( 1258 h, sizeof(*img->input_maps) * LinkInputs_count(&l->inputs), 1259 _Alignof(InputMap)) 1260 : NULL; 1261 if (LinkInputs_count(&l->inputs) && !img->input_maps) 1262 compiler_panic(l->c, SRCLOC_NONE, "link: oom on input maps"); 1263 if (LinkInputs_count(&l->inputs)) 1264 memset(img->input_maps, 0, 1265 sizeof(*img->input_maps) * LinkInputs_count(&l->inputs)); 1266 1267 metrics_scope_begin(l->c, "link.resolve_symbols"); 1268 link_resolve_symbols(l, img); 1269 metrics_scope_end(l->c, "link.resolve_symbols"); 1270 { 1271 GcLive g = {0}; 1272 metrics_scope_begin(l->c, "link.gc"); 1273 link_gc_live_alloc(&g, l, h); 1274 link_gc_compute(l, img, &g); 1275 metrics_scope_end(l->c, "link.gc"); 1276 metrics_scope_begin(l->c, "link.layout_sections"); 1277 link_layout_sections(l, img, &g); 1278 link_layout_commons(l, img); 1279 metrics_count(l->c, "link.sections", img->nsections); 1280 metrics_count(l->c, "link.segments", img->nsegments); 1281 metrics_scope_end(l->c, "link.layout_sections"); 1282 metrics_scope_begin(l->c, "link.emit_segment_bytes"); 1283 if (!l->jit_mode) link_emit_segment_bytes(l, img); 1284 metrics_scope_end(l->c, "link.emit_segment_bytes"); 1285 /* Carry .debug_* through as file-only sections. ELF places them in 1286 * non-alloc sections; Mach-O in a __DWARF segment. The JIT path 1287 * serves debug via kit_jit_view instead, and COFF emit doesn't yet 1288 * handle file-only sections. */ 1289 metrics_scope_begin(l->c, "link.layout_debug"); 1290 if (!l->strip_debug && !l->jit_mode && 1291 obj_format_carries_file_only_debug(l->c)) 1292 link_layout_debug(l, img); 1293 metrics_scope_end(l->c, "link.layout_debug"); 1294 metrics_scope_begin(l->c, "link.assign_vaddrs"); 1295 link_assign_symbol_vaddrs(l, img); 1296 metrics_scope_end(l->c, "link.assign_vaddrs"); 1297 metrics_scope_begin(l->c, "link.emit_boundaries"); 1298 link_emit_array_boundaries(l, img); 1299 link_emit_tls_boundaries(l, img); 1300 link_emit_encoding_section_boundaries(l, img); 1301 link_emit_boundary_sym(l, img, "__dso_handle", 0); 1302 /* `_DYNAMIC` marks the dynamic section; in a static image it must be 1303 * absolute 0 so libc's static-vs-dynamic probe (FreeBSD's __libc_start1 1304 * gates _init_tls() on `&_DYNAMIC != NULL`) takes the static path. Only 1305 * define it for dynamic output, where layout_dyn places it at the real 1306 * .dynamic vaddr; for static, the weak undef from crt/libc already 1307 * resolved to SK_ABS 0, and defining it here as a rebased SK_OBJ symbol 1308 * would wrongly make `&_DYNAMIC` non-zero. */ 1309 if (l->emit_pie) link_emit_boundary_sym(l, img, "_DYNAMIC", 0); 1310 link_emit_boundary_sym(l, img, "_GLOBAL_OFFSET_TABLE_", 0); 1311 /* PE/COFF: mingw CRT references `__ImageBase` for ASLR-relative 1312 * addressing and base-relocation bookkeeping. The PE emitter 1313 * writes LINK_PE_IMAGE_BASE into the optional header; expose the 1314 * same value as a linker-defined symbol so input objects resolve. 1315 * Driven by the format claiming `__ImageBase` (the same hook that 1316 * fixes its SymKind) rather than a target.obj switch. */ 1317 { 1318 int fmt_kind; 1319 if (obj_format_boundary_sym_kind(l->c, SLICE_LIT("__ImageBase"), 1320 &fmt_kind)) { 1321 link_emit_boundary_sym(l, img, "__ImageBase", LINK_PE_IMAGE_BASE); 1322 if (img->tls_memsz) link_emit_boundary_sym(l, img, "_tls_used", 0); 1323 } 1324 } 1325 { 1326 const LinkArchDesc* arch = link_arch_desc_for(l->c); 1327 u32 si; 1328 u64 gp_vaddr = 0; 1329 if (arch && arch->global_pointer_symbol) { 1330 for (si = 0; si < img->nsegments; ++si) { 1331 if (img->segments[si].flags & SF_WRITE) { 1332 gp_vaddr = img->segments[si].vaddr + arch->global_pointer_rw_offset; 1333 break; 1334 } 1335 } 1336 link_emit_boundary_sym(l, img, arch->global_pointer_symbol, gp_vaddr); 1337 } 1338 } 1339 metrics_scope_end(l->c, "link.emit_boundaries"); 1340 metrics_scope_begin(l->c, "link.resolve_undefs"); 1341 link_resolve_undefs(l, img); 1342 metrics_scope_end(l->c, "link.resolve_undefs"); 1343 metrics_scope_begin(l->c, "link.gc_drop_dead"); 1344 link_gc_drop_dead_globals(l, img, &g); 1345 metrics_scope_end(l->c, "link.gc_drop_dead"); 1346 metrics_scope_begin(l->c, "link.layout_iplt"); 1347 link_layout_iplt(l, img); 1348 if (img->niplt) link_emit_array_boundaries(l, img); 1349 metrics_scope_end(l->c, "link.layout_iplt"); 1350 { 1351 LinkSymId* got_map = NULL; 1352 LinkSymId* stub_map = NULL; 1353 u32 map_size = LinkSyms_count(&img->syms) + 1u; 1354 metrics_scope_begin(l->c, "link.layout_jit_stubs"); 1355 link_layout_jit_stubs(l, img, map_size, &stub_map); 1356 metrics_scope_end(l->c, "link.layout_jit_stubs"); 1357 metrics_scope_begin(l->c, "link.layout_got"); 1358 /* Skip the link-time static GOT only for formats that build their own 1359 * static GOT / non-lazy pointer table (Mach-O) in a static image. */ 1360 if (!obj_format_builds_own_static_got(l->c) || !l->emit_static_exe) 1361 link_layout_got(l, img, map_size, &got_map); 1362 metrics_scope_end(l->c, "link.layout_got"); 1363 metrics_scope_begin(l->c, "link.emit_relocations"); 1364 link_emit_relocations(l, img, got_map, stub_map); 1365 metrics_count(l->c, "link.syms", LinkSyms_count(&img->syms)); 1366 metrics_count(l->c, "link.relocs", LinkRelocs_count(&img->relocs)); 1367 metrics_scope_end(l->c, "link.emit_relocations"); 1368 if (got_map) h->free(h, got_map, sizeof(*got_map) * map_size); 1369 if (stub_map) h->free(h, stub_map, sizeof(*stub_map) * map_size); 1370 } 1371 { 1372 const ObjFormatImpl* fmt = obj_format_lookup(l->c->target.obj); 1373 metrics_scope_begin(l->c, "link.layout_dyn"); 1374 if (fmt && fmt->layout_dyn) fmt->layout_dyn(l, img); 1375 metrics_scope_end(l->c, "link.layout_dyn"); 1376 } 1377 metrics_scope_begin(l->c, "link.resolve_entry"); 1378 link_resolve_entry(l, img); 1379 metrics_scope_end(l->c, "link.resolve_entry"); 1380 link_gc_live_free(&g, h); 1381 } 1382 1383 metrics_scope_begin(l->c, "link.capture_debug"); 1384 link_capture_debug_inputs(l, img); 1385 metrics_scope_end(l->c, "link.capture_debug"); 1386 1387 metrics_scope_end(l->c, "link.resolve.total"); 1388 return img; 1389 }