kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

obj_secnames.c (16721B)


      1 /* Format-aware canonical section names.
      2  *
      3  * The kit-internal section model (obj/obj.h) is format-neutral: every
      4  * Section carries a single Sym name plus a SecKind tag.  Most sections
      5  * keep ELF-style dot-prefixed names ("`.text`", "`.data`", …) end-to-end
      6  * because the per-format writer translates them as it emits headers.
      7  *
      8  * A handful of *synthetic* sections — built by the linker rather than
      9  * the front end — diverge in name across formats.  Their names need to
     10  * be picked at synthesis time, before any writer sees them, because the
     11  * linker uses the name to drive layout, symbol-boundary emission, and
     12  * the writer's output-section bucketing.  This TU centralizes that
     13  * choice so callers don't sprinkle target-format switches through
     14  * link_layout.c / link_dyn.c.
     15  *
     16  * Phase 1: ELF returns the historical name; Mach-O
     17  * panics with a "TODO" until the macho writer lands in Phase 2/3.  COFF
     18  * panics in the same way and is filled in later. */
     19 
     20 #include <kit/cg.h>
     21 #include <string.h>
     22 
     23 #include "core/core.h"
     24 #include "core/heap.h"
     25 #include "core/pool.h"
     26 #include "core/slice.h"
     27 #include "obj/format.h"
     28 #include "obj/obj.h"
     29 
     30 /* The C-symbol prefix for the active object format, never NULL: a format
     31  * row with a NULL c_label_prefix (or no format match) is treated as "". */
     32 const char* obj_format_c_label_prefix(const Compiler* c) {
     33   const ObjFormatImpl* fmt = c ? obj_format_lookup(c->target.obj) : NULL;
     34   const char* p = fmt ? fmt->c_label_prefix : NULL;
     35   return p ? p : "";
     36 }
     37 
     38 int obj_macho_debug_sectname(const char* name, size_t len, char out[17]) {
     39   /* Only ".debug_*" sections translate here; ".eh_frame" lives in __TEXT
     40    * and is handled by the writer's generic SecKind path and the reader's
     41    * own candidate list, not this helper. */
     42   static const char kPrefix[] = ".debug_";
     43   const size_t plen = sizeof(kPrefix) - 1; /* 7 */
     44   size_t i;
     45   if (!name || len < plen || memcmp(name, kPrefix, plen) != 0) return 0;
     46   /* out = "__" + name-without-dot, capped at Mach-O's 16-byte sectname.
     47    * The cap yields Apple's spelling for the one overlong DWARF5 name
     48    * (".debug_str_offsets" -> "__debug_str_offs"). */
     49   out[0] = '_';
     50   out[1] = '_';
     51   for (i = 0; i + 1 < len && i < 14u; ++i) out[2 + i] = name[1 + i];
     52   out[2 + i] = '\0';
     53   return 1;
     54 }
     55 
     56 const char* obj_macho_canon_secname(SecKind kind) {
     57   /* Mirrors the SecKind cases of name_to_seg_sect (src/obj/macho/emit.c):
     58    * keep the two in lockstep so a section's text spelling and its binary
     59    * header land in the same Mach-O (segment,section). */
     60   switch (kind) {
     61     case SEC_TEXT:
     62       return "__TEXT,__text";
     63     case SEC_RODATA:
     64       return "__TEXT,__const";
     65     case SEC_DATA:
     66       return "__DATA,__data";
     67     case SEC_BSS:
     68       return "__DATA,__bss";
     69     default: /* SEC_OTHER / SEC_DEBUG: spelled from the section's own name. */
     70       return NULL;
     71   }
     72 }
     73 
     74 /* Inverse of obj_macho_canon_secname: classify a Mach-O native
     75  * "segname,sectname" spelling into a SecKind.  Mirrors the per-segment
     76  * rules of the Mach-O reader (sec_kind_from_seg_sect in macho/read.c)
     77  * for the canonical names, but is name-only (no S_TYPE flags) so a
     78  * format-neutral caller can classify without the raw section header. */
     79 int obj_macho_seckind_for_secname(const char* name, size_t len, SecKind* kind) {
     80   const char* comma;
     81   size_t seg_len, sect_off, sect_len;
     82   if (!name || len == 0) return 0;
     83   comma = (const char*)memchr(name, ',', len);
     84   if (!comma) return 0;
     85   seg_len = (size_t)(comma - name);
     86   sect_off = seg_len + 1u;
     87   sect_len = len - sect_off;
     88   {
     89     const char* seg = name;
     90     const char* sect = name + sect_off;
     91     SecKind k;
     92     if (seg_len == 7 && memcmp(seg, "__DWARF", 7) == 0) {
     93       k = SEC_DEBUG;
     94     } else if (seg_len == 6 && memcmp(seg, "__TEXT", 6) == 0) {
     95       k = (sect_len == 6 && memcmp(sect, "__text", 6) == 0) ? SEC_TEXT
     96                                                             : SEC_RODATA;
     97     } else if (seg_len == 6 && memcmp(seg, "__DATA", 6) == 0) {
     98       k = (sect_len == 5 && memcmp(sect, "__bss", 5) == 0) ? SEC_BSS : SEC_DATA;
     99     } else {
    100       return 0;
    101     }
    102     if (kind) *kind = k;
    103     return 1;
    104   }
    105 }
    106 
    107 /* Translate a kit-internal (ELF-spelled) section name to its Mach-O
    108  * native "segname,sectname" spelling.  Generalizes
    109  * obj_macho_debug_sectname: the ".debug_*" DWARF case routes to
    110  * "__DWARF,__debug_*" (truncated to Mach-O's 16-byte sectname), and
    111  * ".eh_frame" routes to "__TEXT,__eh_frame".  Returns 0 for any other
    112  * name (caller falls back to its own spelling). */
    113 int obj_macho_native_secname(const char* name, size_t len, char out[40]) {
    114   char ds[17];
    115   if (!name || len == 0) return 0;
    116   if (obj_macho_debug_sectname(name, len, ds)) {
    117     /* "__DWARF," + ds (already "__debug_*", <=16 chars). */
    118     size_t dl = slice_from_cstr(ds).len;
    119     memcpy(out, "__DWARF,", 8);
    120     memcpy(out + 8, ds, dl);
    121     out[8 + dl] = '\0';
    122     return 1;
    123   }
    124   if (len == 9 && memcmp(name, ".eh_frame", 9) == 0) {
    125     memcpy(out, "__TEXT,__eh_frame", 17);
    126     out[17] = '\0';
    127     return 1;
    128   }
    129   return 0;
    130 }
    131 
    132 static Sym secname_panic_unimpl(Compiler* c, const char* which) {
    133   SrcLoc l = {0, 0, 0};
    134   compiler_panic(c, l,
    135                  "obj section name '%.*s' for target obj=%u not yet "
    136                  "implemented",
    137                  SLICE_ARG(slice_from_cstr(which)), (unsigned)c->target.obj);
    138   return 0;
    139 }
    140 
    141 Sym obj_secname_init_array(Compiler* c) {
    142   switch (c->target.obj) {
    143     case KIT_OBJ_ELF:
    144       return pool_intern_slice(c->global, SLICE_LIT(".init_array"));
    145     case KIT_OBJ_MACHO:
    146       return pool_intern_slice(c->global, SLICE_LIT("__DATA,__mod_init_func"));
    147     case KIT_OBJ_COFF:
    148       /* CRT runtime scans `.CRT$X[A-Z]` for ctor/dtor tables; XCU is
    149        * the user-constructor bucket.  See doc/OBJ.md. */
    150       return pool_intern_slice(c->global, SLICE_LIT(".CRT$XCU"));
    151     default:
    152       return secname_panic_unimpl(c, ".init_array");
    153   }
    154 }
    155 
    156 Sym obj_secname_fini_array(Compiler* c) {
    157   switch (c->target.obj) {
    158     case KIT_OBJ_ELF:
    159       return pool_intern_slice(c->global, SLICE_LIT(".fini_array"));
    160     case KIT_OBJ_MACHO:
    161       return pool_intern_slice(c->global, SLICE_LIT("__DATA,__mod_term_func"));
    162     case KIT_OBJ_COFF:
    163       /* `.CRT$XPA`/`XPZ` are markers; XPU is the user-destructor
    164        * bucket.  See doc/OBJ.md. */
    165       return pool_intern_slice(c->global, SLICE_LIT(".CRT$XPU"));
    166     default:
    167       return secname_panic_unimpl(c, ".fini_array");
    168   }
    169 }
    170 
    171 Sym obj_secname_preinit_array(Compiler* c) {
    172   switch (c->target.obj) {
    173     case KIT_OBJ_ELF:
    174       return pool_intern_slice(c->global, SLICE_LIT(".preinit_array"));
    175     case KIT_OBJ_MACHO:
    176       /* Mach-O has no direct `.preinit_array` analogue — dyld runs
    177        * S_MOD_INIT_FUNC_POINTERS only.  Phase 3 of the linker will
    178        * route preinit ctors through __mod_init_func; until then any
    179        * caller hitting this on a MACHO target is doing something the
    180        * platform can't represent. */
    181       return secname_panic_unimpl(c, ".preinit_array");
    182     case KIT_OBJ_COFF:
    183       /* CRT's own setup runs in `.CRT$XI*`; user pre-init lives at
    184        * XIA just after the CRT.  See doc/OBJ.md. */
    185       return pool_intern_slice(c->global, SLICE_LIT(".CRT$XIA"));
    186     default:
    187       return secname_panic_unimpl(c, ".preinit_array");
    188   }
    189 }
    190 
    191 Sym obj_secname_tdata(Compiler* c) {
    192   switch (c->target.obj) {
    193     case KIT_OBJ_ELF:
    194       return pool_intern_slice(c->global, SLICE_LIT(".tdata"));
    195     case KIT_OBJ_MACHO:
    196       return pool_intern_slice(c->global, SLICE_LIT("__DATA,__thread_data"));
    197     case KIT_OBJ_COFF:
    198       /* MSVC `.tls$` convention; linker concatenates `.tls$*` sorted
    199        * by suffix.  See doc/OBJ.md. */
    200       return pool_intern_slice(c->global, SLICE_LIT(".tls$"));
    201     case KIT_OBJ_WASM:
    202       /* Wasm has no thread-local storage model: a module instance owns a
    203        * single linear memory, so a thread-local is just an ordinary
    204        * data object. Keep the `.tdata` name (laid out like `.data`) and
    205        * lower tls_addr_of to a plain symbol address. */
    206       return pool_intern_slice(c->global, SLICE_LIT(".tdata"));
    207     default:
    208       return secname_panic_unimpl(c, ".tdata");
    209   }
    210 }
    211 
    212 Sym obj_secname_tbss(Compiler* c) {
    213   switch (c->target.obj) {
    214     case KIT_OBJ_ELF:
    215       return pool_intern_slice(c->global, SLICE_LIT(".tbss"));
    216     case KIT_OBJ_MACHO:
    217       return pool_intern_slice(c->global, SLICE_LIT("__DATA,__thread_bss"));
    218     case KIT_OBJ_COFF:
    219       /* sorted-alphabetically-last so it falls at the tail of the TLS
    220        * image's zero-fill region.  See doc/OBJ.md. */
    221       return pool_intern_slice(c->global, SLICE_LIT(".tls$ZZZ"));
    222     case KIT_OBJ_WASM:
    223       /* See obj_secname_tdata: wasm thread-locals are ordinary
    224        * (zero-filled) data. */
    225       return pool_intern_slice(c->global, SLICE_LIT(".tbss"));
    226     default:
    227       return secname_panic_unimpl(c, ".tbss");
    228   }
    229 }
    230 
    231 int obj_format_extern_via_got(const Compiler* c) {
    232   /* Mach-O always binds extern data through __got / non-lazy pointers
    233    * — direct ADRP+ADD to an imported symbol isn't representable in
    234    * ld64's reloc set.
    235    *
    236    * ELF static link: extern data is resolved at link time, so direct
    237    * page-relative addressing works (linker patches the ADRP+ADD).
    238    *
    239    * ELF -fPIC / -fPIE: extern data may resolve to a symbol defined
    240    * in a DSO at runtime; the codegen must route through the GOT so
    241    * the loader can patch a single slot rather than touching .text. */
    242   /* Mach-O always binds extern data through its own static GOT / non-lazy
    243    * pointers — same property the builds_own_static_got field records. */
    244   if (obj_format_builds_own_static_got(c)) return 1;
    245   if (c->target.obj == KIT_OBJ_ELF &&
    246       (c->target.pic == KIT_PIC_PIC || c->target.pic == KIT_PIC_PIE))
    247     return 1;
    248   return 0;
    249 }
    250 
    251 int obj_symbol_extern_via_got(const Compiler* c, ObjBuilder* obj,
    252                               ObjSymId sym) {
    253   const ObjSym* s;
    254   if (!obj_format_extern_via_got(c)) return 0;
    255   s = obj_symbol_get(obj, sym);
    256   return s && s->section_id == OBJ_SEC_NONE;
    257 }
    258 
    259 int obj_format_split_sections_as_atoms(const Compiler* c) {
    260   const ObjFormatImpl* fmt;
    261   if (!c) return 0;
    262   fmt = obj_format_lookup(c->target.obj);
    263   return fmt && fmt->split_sections_as_atoms;
    264 }
    265 
    266 /* C-symbol mangling for the active object format.  Mach-O prepends a
    267  * single `_` to every C source-level symbol on disk (matching Apple cc
    268  * and decl.c): "main" → `_main`, "_start" → `__start`,
    269  * "__init_array_start" → `___init_array_start`.  ELF / COFF / Wasm
    270  * intern verbatim.  The temp buffer for the Mach-O case comes from
    271  * `c->ctx->heap`, the same allocator the existing call sites
    272  * (boundary_name, kit_jit_lookup, link_intern_c_name) already use. */
    273 Sym obj_format_c_mangle(Compiler* c, const char* name) {
    274   size_t n, plen;
    275   const char* prefix;
    276   Heap* h;
    277   char* buf;
    278   Sym s;
    279   SrcLoc loc = {0, 0, 0};
    280   if (!c || !name) return 0;
    281   prefix = obj_format_c_label_prefix(c);
    282   plen = slice_from_cstr(prefix).len;
    283   if (plen == 0) return pool_intern_slice(c->global, slice_from_cstr(name));
    284   n = slice_from_cstr(name).len;
    285   h = (Heap*)c->ctx->heap;
    286   buf = (char*)h->alloc(h, n + plen + 1u, 1);
    287   if (!buf)
    288     compiler_panic(c, loc, "obj_format_c_mangle: oom prefixing '%.*s'",
    289                    SLICE_ARG(slice_from_cstr(name)));
    290   memcpy(buf, prefix, plen);
    291   memcpy(buf + plen, name, n);
    292   buf[n + plen] = 0;
    293   s = pool_intern_slice(c->global, (Slice){.s = buf, .len = (u32)(n + plen)});
    294   h->free(h, buf, n + plen + 1u);
    295   return s;
    296 }
    297 
    298 /* Inverse of obj_format_c_mangle for diagnostic display.  Strips the
    299  * format's leading C-mangle byte from `*name` (advancing the pointer
    300  * and decrementing `*len`) so panic text shows the source-level name
    301  * regardless of target format.  No-op for formats with no prefix. */
    302 void obj_format_demangle_c(const Compiler* c, const char** name, size_t* len) {
    303   const char* prefix;
    304   size_t plen;
    305   if (!c || !name || !len || !*name) return;
    306   prefix = obj_format_c_label_prefix(c);
    307   plen = slice_from_cstr(prefix).len;
    308   if (plen == 0 || *len < plen) return;
    309   if (memcmp(*name, prefix, plen) != 0) return;
    310   *name += plen;
    311   *len -= plen;
    312 }
    313 
    314 /* Default entry symbol name baked into a freshly created Linker for
    315  * this object format.  Mach-O uses `_main` because LC_MAIN names main
    316  * directly (dyld owns C runtime startup); ELF / COFF / Wasm use the
    317  * historical `_start` produced by crt1.o.  Returned as a NUL-terminated
    318  * literal; caller interns. */
    319 const char* obj_format_default_entry_name(const Compiler* c) {
    320   /* Mach-O: `_main` (LC_MAIN names main; dyld owns startup).
    321    * COFF: `mainCRTStartup` (PE/Windows CRT entry sets up argc/argv and
    322    *   calls main; resolved against the user CRT archive, mingw's
    323    *   libmingwex.a — see doc/OBJ.md).
    324    * ELF / Wasm: the historical `_start` produced by crt1.o.
    325    * All driven by the per-format default_entry_name field; a row with a
    326    * NULL field (or no format match) falls back to "_start". */
    327   const ObjFormatImpl* fmt = c ? obj_format_lookup(c->target.obj) : NULL;
    328   const char* e = fmt ? fmt->default_entry_name : NULL;
    329   return e ? e : "_start";
    330 }
    331 
    332 int obj_format_carries_file_only_debug(const Compiler* c) {
    333   const ObjFormatImpl* fmt = c ? obj_format_lookup(c->target.obj) : NULL;
    334   return fmt && fmt->carries_file_only_debug;
    335 }
    336 
    337 int obj_format_builds_own_static_got(const Compiler* c) {
    338   const ObjFormatImpl* fmt = c ? obj_format_lookup(c->target.obj) : NULL;
    339   return fmt && fmt->builds_own_static_got;
    340 }
    341 
    342 int obj_format_weak_undef_pulls_archive_member(const Compiler* c) {
    343   const ObjFormatImpl* fmt = c ? obj_format_lookup(c->target.obj) : NULL;
    344   return fmt && fmt->weak_undef_pulls_archive_member;
    345 }
    346 
    347 int obj_format_weak_extern_underscore_alias(const Compiler* c) {
    348   const ObjFormatImpl* fmt = c ? obj_format_lookup(c->target.obj) : NULL;
    349   return fmt && fmt->weak_extern_underscore_alias;
    350 }
    351 
    352 int obj_format_supports_symbol_feature(const Compiler* c, int symfeat) {
    353   /* The only format-divergent feature axis today is TLS access: only ELF and
    354    * Mach-O can represent the ELF/Mach-O TLS-access features the CG layer mints.
    355    * COFF (Windows TEB model) and Wasm cannot. Every other (non-TLS) feature is
    356    * representable by every format. The per-format answer lives on the vtable.
    357    */
    358   switch (symfeat) {
    359     case KIT_CG_SYMFEAT_TLS_LOCAL_EXEC:
    360     case KIT_CG_SYMFEAT_TLS_INITIAL_EXEC:
    361     case KIT_CG_SYMFEAT_TLS_LOCAL_DYNAMIC:
    362     case KIT_CG_SYMFEAT_TLS_GENERAL_DYNAMIC: {
    363       const ObjFormatImpl* fmt = c ? obj_format_lookup(c->target.obj) : NULL;
    364       return fmt && fmt->tls_symbol_features;
    365     }
    366     default:
    367       return 1;
    368   }
    369 }
    370 
    371 int obj_format_static_ifunc_via_rela_iplt(const Compiler* c) {
    372   /* The single home for the (os == FREEBSD && obj == ELF) knowledge:
    373    * FreeBSD's crt walks [__rela_iplt_start, __rela_iplt_end) of
    374    * R_*_IRELATIVE relocs before main, so kit emits that table instead of
    375    * the ctor-based __kit_ifunc_init path on FreeBSD/ELF. */
    376   return c && c->target.os == KIT_OS_FREEBSD && c->target.obj == KIT_OBJ_ELF;
    377 }
    378 
    379 u32 obj_format_static_ifunc_irelative_type(const Compiler* c) {
    380   /* The R_*_IRELATIVE resolver wire type for the __rela_iplt table the
    381    * predicate above selects. Resolves through the *target* format rather
    382    * than a literal KIT_OBJ_ELF so the generic iplt pass names no format
    383    * constant; non-ELF formats have no elf_arch and yield 0. */
    384   const ObjFormatImpl* fmt;
    385   const ObjElfArchOps* ao;
    386   if (!c) return 0u;
    387   fmt = obj_format_lookup(c->target.obj);
    388   ao = (fmt && fmt->elf_arch) ? fmt->elf_arch(c->target.arch) : NULL;
    389   return ao ? ao->r_irelative : 0u;
    390 }
    391 
    392 u32 obj_format_elf_tls_tp_bias(const Compiler* c) {
    393   const ObjFormatImpl* fmt;
    394   const ObjElfArchOps* arch;
    395   if (!c || c->target.obj != KIT_OBJ_ELF) return 0u;
    396   fmt = obj_format_lookup(KIT_OBJ_ELF);
    397   arch = (fmt && fmt->elf_arch) ? fmt->elf_arch(c->target.arch) : NULL;
    398   return arch ? arch->tls_tp_bias : 0u;
    399 }
    400 
    401 int obj_format_boundary_sym_kind(const Compiler* c, KitSlice name,
    402                                  int* symkind) {
    403   /* PE/COFF owns two synthetic absolute globals the linker emits:
    404    * `__ImageBase` (image base for ASLR-relative math) and `_tls_used`
    405    * (the IMAGE_TLS_DIRECTORY anchor).  Both are SK_ABS.  Other formats
    406    * own no boundary symbols here. */
    407   if (!c || c->target.obj != KIT_OBJ_COFF) return 0;
    408   if (slice_eq_cstr(name, "__ImageBase") || slice_eq_cstr(name, "_tls_used")) {
    409     if (symkind) *symkind = SK_ABS;
    410     return 1;
    411   }
    412   return 0;
    413 }
    414 
    415 void obj_format_synth_inputs(const Compiler* c, Linker* l) {
    416   const ObjFormatImpl* fmt = c ? obj_format_lookup(c->target.obj) : NULL;
    417   if (fmt && fmt->synth_inputs) fmt->synth_inputs(l);
    418 }