obj_secnames.c (16721B)
1 /* Format-aware canonical section names. 2 * 3 * The kit-internal section model (obj/obj.h) is format-neutral: every 4 * Section carries a single Sym name plus a SecKind tag. Most sections 5 * keep ELF-style dot-prefixed names ("`.text`", "`.data`", …) end-to-end 6 * because the per-format writer translates them as it emits headers. 7 * 8 * A handful of *synthetic* sections — built by the linker rather than 9 * the front end — diverge in name across formats. Their names need to 10 * be picked at synthesis time, before any writer sees them, because the 11 * linker uses the name to drive layout, symbol-boundary emission, and 12 * the writer's output-section bucketing. This TU centralizes that 13 * choice so callers don't sprinkle target-format switches through 14 * link_layout.c / link_dyn.c. 15 * 16 * Phase 1: ELF returns the historical name; Mach-O 17 * panics with a "TODO" until the macho writer lands in Phase 2/3. COFF 18 * panics in the same way and is filled in later. */ 19 20 #include <kit/cg.h> 21 #include <string.h> 22 23 #include "core/core.h" 24 #include "core/heap.h" 25 #include "core/pool.h" 26 #include "core/slice.h" 27 #include "obj/format.h" 28 #include "obj/obj.h" 29 30 /* The C-symbol prefix for the active object format, never NULL: a format 31 * row with a NULL c_label_prefix (or no format match) is treated as "". */ 32 const char* obj_format_c_label_prefix(const Compiler* c) { 33 const ObjFormatImpl* fmt = c ? obj_format_lookup(c->target.obj) : NULL; 34 const char* p = fmt ? fmt->c_label_prefix : NULL; 35 return p ? p : ""; 36 } 37 38 int obj_macho_debug_sectname(const char* name, size_t len, char out[17]) { 39 /* Only ".debug_*" sections translate here; ".eh_frame" lives in __TEXT 40 * and is handled by the writer's generic SecKind path and the reader's 41 * own candidate list, not this helper. */ 42 static const char kPrefix[] = ".debug_"; 43 const size_t plen = sizeof(kPrefix) - 1; /* 7 */ 44 size_t i; 45 if (!name || len < plen || memcmp(name, kPrefix, plen) != 0) return 0; 46 /* out = "__" + name-without-dot, capped at Mach-O's 16-byte sectname. 47 * The cap yields Apple's spelling for the one overlong DWARF5 name 48 * (".debug_str_offsets" -> "__debug_str_offs"). */ 49 out[0] = '_'; 50 out[1] = '_'; 51 for (i = 0; i + 1 < len && i < 14u; ++i) out[2 + i] = name[1 + i]; 52 out[2 + i] = '\0'; 53 return 1; 54 } 55 56 const char* obj_macho_canon_secname(SecKind kind) { 57 /* Mirrors the SecKind cases of name_to_seg_sect (src/obj/macho/emit.c): 58 * keep the two in lockstep so a section's text spelling and its binary 59 * header land in the same Mach-O (segment,section). */ 60 switch (kind) { 61 case SEC_TEXT: 62 return "__TEXT,__text"; 63 case SEC_RODATA: 64 return "__TEXT,__const"; 65 case SEC_DATA: 66 return "__DATA,__data"; 67 case SEC_BSS: 68 return "__DATA,__bss"; 69 default: /* SEC_OTHER / SEC_DEBUG: spelled from the section's own name. */ 70 return NULL; 71 } 72 } 73 74 /* Inverse of obj_macho_canon_secname: classify a Mach-O native 75 * "segname,sectname" spelling into a SecKind. Mirrors the per-segment 76 * rules of the Mach-O reader (sec_kind_from_seg_sect in macho/read.c) 77 * for the canonical names, but is name-only (no S_TYPE flags) so a 78 * format-neutral caller can classify without the raw section header. */ 79 int obj_macho_seckind_for_secname(const char* name, size_t len, SecKind* kind) { 80 const char* comma; 81 size_t seg_len, sect_off, sect_len; 82 if (!name || len == 0) return 0; 83 comma = (const char*)memchr(name, ',', len); 84 if (!comma) return 0; 85 seg_len = (size_t)(comma - name); 86 sect_off = seg_len + 1u; 87 sect_len = len - sect_off; 88 { 89 const char* seg = name; 90 const char* sect = name + sect_off; 91 SecKind k; 92 if (seg_len == 7 && memcmp(seg, "__DWARF", 7) == 0) { 93 k = SEC_DEBUG; 94 } else if (seg_len == 6 && memcmp(seg, "__TEXT", 6) == 0) { 95 k = (sect_len == 6 && memcmp(sect, "__text", 6) == 0) ? SEC_TEXT 96 : SEC_RODATA; 97 } else if (seg_len == 6 && memcmp(seg, "__DATA", 6) == 0) { 98 k = (sect_len == 5 && memcmp(sect, "__bss", 5) == 0) ? SEC_BSS : SEC_DATA; 99 } else { 100 return 0; 101 } 102 if (kind) *kind = k; 103 return 1; 104 } 105 } 106 107 /* Translate a kit-internal (ELF-spelled) section name to its Mach-O 108 * native "segname,sectname" spelling. Generalizes 109 * obj_macho_debug_sectname: the ".debug_*" DWARF case routes to 110 * "__DWARF,__debug_*" (truncated to Mach-O's 16-byte sectname), and 111 * ".eh_frame" routes to "__TEXT,__eh_frame". Returns 0 for any other 112 * name (caller falls back to its own spelling). */ 113 int obj_macho_native_secname(const char* name, size_t len, char out[40]) { 114 char ds[17]; 115 if (!name || len == 0) return 0; 116 if (obj_macho_debug_sectname(name, len, ds)) { 117 /* "__DWARF," + ds (already "__debug_*", <=16 chars). */ 118 size_t dl = slice_from_cstr(ds).len; 119 memcpy(out, "__DWARF,", 8); 120 memcpy(out + 8, ds, dl); 121 out[8 + dl] = '\0'; 122 return 1; 123 } 124 if (len == 9 && memcmp(name, ".eh_frame", 9) == 0) { 125 memcpy(out, "__TEXT,__eh_frame", 17); 126 out[17] = '\0'; 127 return 1; 128 } 129 return 0; 130 } 131 132 static Sym secname_panic_unimpl(Compiler* c, const char* which) { 133 SrcLoc l = {0, 0, 0}; 134 compiler_panic(c, l, 135 "obj section name '%.*s' for target obj=%u not yet " 136 "implemented", 137 SLICE_ARG(slice_from_cstr(which)), (unsigned)c->target.obj); 138 return 0; 139 } 140 141 Sym obj_secname_init_array(Compiler* c) { 142 switch (c->target.obj) { 143 case KIT_OBJ_ELF: 144 return pool_intern_slice(c->global, SLICE_LIT(".init_array")); 145 case KIT_OBJ_MACHO: 146 return pool_intern_slice(c->global, SLICE_LIT("__DATA,__mod_init_func")); 147 case KIT_OBJ_COFF: 148 /* CRT runtime scans `.CRT$X[A-Z]` for ctor/dtor tables; XCU is 149 * the user-constructor bucket. See doc/OBJ.md. */ 150 return pool_intern_slice(c->global, SLICE_LIT(".CRT$XCU")); 151 default: 152 return secname_panic_unimpl(c, ".init_array"); 153 } 154 } 155 156 Sym obj_secname_fini_array(Compiler* c) { 157 switch (c->target.obj) { 158 case KIT_OBJ_ELF: 159 return pool_intern_slice(c->global, SLICE_LIT(".fini_array")); 160 case KIT_OBJ_MACHO: 161 return pool_intern_slice(c->global, SLICE_LIT("__DATA,__mod_term_func")); 162 case KIT_OBJ_COFF: 163 /* `.CRT$XPA`/`XPZ` are markers; XPU is the user-destructor 164 * bucket. See doc/OBJ.md. */ 165 return pool_intern_slice(c->global, SLICE_LIT(".CRT$XPU")); 166 default: 167 return secname_panic_unimpl(c, ".fini_array"); 168 } 169 } 170 171 Sym obj_secname_preinit_array(Compiler* c) { 172 switch (c->target.obj) { 173 case KIT_OBJ_ELF: 174 return pool_intern_slice(c->global, SLICE_LIT(".preinit_array")); 175 case KIT_OBJ_MACHO: 176 /* Mach-O has no direct `.preinit_array` analogue — dyld runs 177 * S_MOD_INIT_FUNC_POINTERS only. Phase 3 of the linker will 178 * route preinit ctors through __mod_init_func; until then any 179 * caller hitting this on a MACHO target is doing something the 180 * platform can't represent. */ 181 return secname_panic_unimpl(c, ".preinit_array"); 182 case KIT_OBJ_COFF: 183 /* CRT's own setup runs in `.CRT$XI*`; user pre-init lives at 184 * XIA just after the CRT. See doc/OBJ.md. */ 185 return pool_intern_slice(c->global, SLICE_LIT(".CRT$XIA")); 186 default: 187 return secname_panic_unimpl(c, ".preinit_array"); 188 } 189 } 190 191 Sym obj_secname_tdata(Compiler* c) { 192 switch (c->target.obj) { 193 case KIT_OBJ_ELF: 194 return pool_intern_slice(c->global, SLICE_LIT(".tdata")); 195 case KIT_OBJ_MACHO: 196 return pool_intern_slice(c->global, SLICE_LIT("__DATA,__thread_data")); 197 case KIT_OBJ_COFF: 198 /* MSVC `.tls$` convention; linker concatenates `.tls$*` sorted 199 * by suffix. See doc/OBJ.md. */ 200 return pool_intern_slice(c->global, SLICE_LIT(".tls$")); 201 case KIT_OBJ_WASM: 202 /* Wasm has no thread-local storage model: a module instance owns a 203 * single linear memory, so a thread-local is just an ordinary 204 * data object. Keep the `.tdata` name (laid out like `.data`) and 205 * lower tls_addr_of to a plain symbol address. */ 206 return pool_intern_slice(c->global, SLICE_LIT(".tdata")); 207 default: 208 return secname_panic_unimpl(c, ".tdata"); 209 } 210 } 211 212 Sym obj_secname_tbss(Compiler* c) { 213 switch (c->target.obj) { 214 case KIT_OBJ_ELF: 215 return pool_intern_slice(c->global, SLICE_LIT(".tbss")); 216 case KIT_OBJ_MACHO: 217 return pool_intern_slice(c->global, SLICE_LIT("__DATA,__thread_bss")); 218 case KIT_OBJ_COFF: 219 /* sorted-alphabetically-last so it falls at the tail of the TLS 220 * image's zero-fill region. See doc/OBJ.md. */ 221 return pool_intern_slice(c->global, SLICE_LIT(".tls$ZZZ")); 222 case KIT_OBJ_WASM: 223 /* See obj_secname_tdata: wasm thread-locals are ordinary 224 * (zero-filled) data. */ 225 return pool_intern_slice(c->global, SLICE_LIT(".tbss")); 226 default: 227 return secname_panic_unimpl(c, ".tbss"); 228 } 229 } 230 231 int obj_format_extern_via_got(const Compiler* c) { 232 /* Mach-O always binds extern data through __got / non-lazy pointers 233 * — direct ADRP+ADD to an imported symbol isn't representable in 234 * ld64's reloc set. 235 * 236 * ELF static link: extern data is resolved at link time, so direct 237 * page-relative addressing works (linker patches the ADRP+ADD). 238 * 239 * ELF -fPIC / -fPIE: extern data may resolve to a symbol defined 240 * in a DSO at runtime; the codegen must route through the GOT so 241 * the loader can patch a single slot rather than touching .text. */ 242 /* Mach-O always binds extern data through its own static GOT / non-lazy 243 * pointers — same property the builds_own_static_got field records. */ 244 if (obj_format_builds_own_static_got(c)) return 1; 245 if (c->target.obj == KIT_OBJ_ELF && 246 (c->target.pic == KIT_PIC_PIC || c->target.pic == KIT_PIC_PIE)) 247 return 1; 248 return 0; 249 } 250 251 int obj_symbol_extern_via_got(const Compiler* c, ObjBuilder* obj, 252 ObjSymId sym) { 253 const ObjSym* s; 254 if (!obj_format_extern_via_got(c)) return 0; 255 s = obj_symbol_get(obj, sym); 256 return s && s->section_id == OBJ_SEC_NONE; 257 } 258 259 int obj_format_split_sections_as_atoms(const Compiler* c) { 260 const ObjFormatImpl* fmt; 261 if (!c) return 0; 262 fmt = obj_format_lookup(c->target.obj); 263 return fmt && fmt->split_sections_as_atoms; 264 } 265 266 /* C-symbol mangling for the active object format. Mach-O prepends a 267 * single `_` to every C source-level symbol on disk (matching Apple cc 268 * and decl.c): "main" → `_main`, "_start" → `__start`, 269 * "__init_array_start" → `___init_array_start`. ELF / COFF / Wasm 270 * intern verbatim. The temp buffer for the Mach-O case comes from 271 * `c->ctx->heap`, the same allocator the existing call sites 272 * (boundary_name, kit_jit_lookup, link_intern_c_name) already use. */ 273 Sym obj_format_c_mangle(Compiler* c, const char* name) { 274 size_t n, plen; 275 const char* prefix; 276 Heap* h; 277 char* buf; 278 Sym s; 279 SrcLoc loc = {0, 0, 0}; 280 if (!c || !name) return 0; 281 prefix = obj_format_c_label_prefix(c); 282 plen = slice_from_cstr(prefix).len; 283 if (plen == 0) return pool_intern_slice(c->global, slice_from_cstr(name)); 284 n = slice_from_cstr(name).len; 285 h = (Heap*)c->ctx->heap; 286 buf = (char*)h->alloc(h, n + plen + 1u, 1); 287 if (!buf) 288 compiler_panic(c, loc, "obj_format_c_mangle: oom prefixing '%.*s'", 289 SLICE_ARG(slice_from_cstr(name))); 290 memcpy(buf, prefix, plen); 291 memcpy(buf + plen, name, n); 292 buf[n + plen] = 0; 293 s = pool_intern_slice(c->global, (Slice){.s = buf, .len = (u32)(n + plen)}); 294 h->free(h, buf, n + plen + 1u); 295 return s; 296 } 297 298 /* Inverse of obj_format_c_mangle for diagnostic display. Strips the 299 * format's leading C-mangle byte from `*name` (advancing the pointer 300 * and decrementing `*len`) so panic text shows the source-level name 301 * regardless of target format. No-op for formats with no prefix. */ 302 void obj_format_demangle_c(const Compiler* c, const char** name, size_t* len) { 303 const char* prefix; 304 size_t plen; 305 if (!c || !name || !len || !*name) return; 306 prefix = obj_format_c_label_prefix(c); 307 plen = slice_from_cstr(prefix).len; 308 if (plen == 0 || *len < plen) return; 309 if (memcmp(*name, prefix, plen) != 0) return; 310 *name += plen; 311 *len -= plen; 312 } 313 314 /* Default entry symbol name baked into a freshly created Linker for 315 * this object format. Mach-O uses `_main` because LC_MAIN names main 316 * directly (dyld owns C runtime startup); ELF / COFF / Wasm use the 317 * historical `_start` produced by crt1.o. Returned as a NUL-terminated 318 * literal; caller interns. */ 319 const char* obj_format_default_entry_name(const Compiler* c) { 320 /* Mach-O: `_main` (LC_MAIN names main; dyld owns startup). 321 * COFF: `mainCRTStartup` (PE/Windows CRT entry sets up argc/argv and 322 * calls main; resolved against the user CRT archive, mingw's 323 * libmingwex.a — see doc/OBJ.md). 324 * ELF / Wasm: the historical `_start` produced by crt1.o. 325 * All driven by the per-format default_entry_name field; a row with a 326 * NULL field (or no format match) falls back to "_start". */ 327 const ObjFormatImpl* fmt = c ? obj_format_lookup(c->target.obj) : NULL; 328 const char* e = fmt ? fmt->default_entry_name : NULL; 329 return e ? e : "_start"; 330 } 331 332 int obj_format_carries_file_only_debug(const Compiler* c) { 333 const ObjFormatImpl* fmt = c ? obj_format_lookup(c->target.obj) : NULL; 334 return fmt && fmt->carries_file_only_debug; 335 } 336 337 int obj_format_builds_own_static_got(const Compiler* c) { 338 const ObjFormatImpl* fmt = c ? obj_format_lookup(c->target.obj) : NULL; 339 return fmt && fmt->builds_own_static_got; 340 } 341 342 int obj_format_weak_undef_pulls_archive_member(const Compiler* c) { 343 const ObjFormatImpl* fmt = c ? obj_format_lookup(c->target.obj) : NULL; 344 return fmt && fmt->weak_undef_pulls_archive_member; 345 } 346 347 int obj_format_weak_extern_underscore_alias(const Compiler* c) { 348 const ObjFormatImpl* fmt = c ? obj_format_lookup(c->target.obj) : NULL; 349 return fmt && fmt->weak_extern_underscore_alias; 350 } 351 352 int obj_format_supports_symbol_feature(const Compiler* c, int symfeat) { 353 /* The only format-divergent feature axis today is TLS access: only ELF and 354 * Mach-O can represent the ELF/Mach-O TLS-access features the CG layer mints. 355 * COFF (Windows TEB model) and Wasm cannot. Every other (non-TLS) feature is 356 * representable by every format. The per-format answer lives on the vtable. 357 */ 358 switch (symfeat) { 359 case KIT_CG_SYMFEAT_TLS_LOCAL_EXEC: 360 case KIT_CG_SYMFEAT_TLS_INITIAL_EXEC: 361 case KIT_CG_SYMFEAT_TLS_LOCAL_DYNAMIC: 362 case KIT_CG_SYMFEAT_TLS_GENERAL_DYNAMIC: { 363 const ObjFormatImpl* fmt = c ? obj_format_lookup(c->target.obj) : NULL; 364 return fmt && fmt->tls_symbol_features; 365 } 366 default: 367 return 1; 368 } 369 } 370 371 int obj_format_static_ifunc_via_rela_iplt(const Compiler* c) { 372 /* The single home for the (os == FREEBSD && obj == ELF) knowledge: 373 * FreeBSD's crt walks [__rela_iplt_start, __rela_iplt_end) of 374 * R_*_IRELATIVE relocs before main, so kit emits that table instead of 375 * the ctor-based __kit_ifunc_init path on FreeBSD/ELF. */ 376 return c && c->target.os == KIT_OS_FREEBSD && c->target.obj == KIT_OBJ_ELF; 377 } 378 379 u32 obj_format_static_ifunc_irelative_type(const Compiler* c) { 380 /* The R_*_IRELATIVE resolver wire type for the __rela_iplt table the 381 * predicate above selects. Resolves through the *target* format rather 382 * than a literal KIT_OBJ_ELF so the generic iplt pass names no format 383 * constant; non-ELF formats have no elf_arch and yield 0. */ 384 const ObjFormatImpl* fmt; 385 const ObjElfArchOps* ao; 386 if (!c) return 0u; 387 fmt = obj_format_lookup(c->target.obj); 388 ao = (fmt && fmt->elf_arch) ? fmt->elf_arch(c->target.arch) : NULL; 389 return ao ? ao->r_irelative : 0u; 390 } 391 392 u32 obj_format_elf_tls_tp_bias(const Compiler* c) { 393 const ObjFormatImpl* fmt; 394 const ObjElfArchOps* arch; 395 if (!c || c->target.obj != KIT_OBJ_ELF) return 0u; 396 fmt = obj_format_lookup(KIT_OBJ_ELF); 397 arch = (fmt && fmt->elf_arch) ? fmt->elf_arch(c->target.arch) : NULL; 398 return arch ? arch->tls_tp_bias : 0u; 399 } 400 401 int obj_format_boundary_sym_kind(const Compiler* c, KitSlice name, 402 int* symkind) { 403 /* PE/COFF owns two synthetic absolute globals the linker emits: 404 * `__ImageBase` (image base for ASLR-relative math) and `_tls_used` 405 * (the IMAGE_TLS_DIRECTORY anchor). Both are SK_ABS. Other formats 406 * own no boundary symbols here. */ 407 if (!c || c->target.obj != KIT_OBJ_COFF) return 0; 408 if (slice_eq_cstr(name, "__ImageBase") || slice_eq_cstr(name, "_tls_used")) { 409 if (symkind) *symkind = SK_ABS; 410 return 1; 411 } 412 return 0; 413 } 414 415 void obj_format_synth_inputs(const Compiler* c, Linker* l) { 416 const ObjFormatImpl* fmt = c ? obj_format_lookup(c->target.obj) : NULL; 417 if (fmt && fmt->synth_inputs) fmt->synth_inputs(l); 418 }