kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

emit.c (29236B)


      1 /* ELF ET_REL writer. Walks a finalized ObjBuilder and emits a 64-bit
      2  * little-endian relocatable object via the supplied Writer.
      3  *
      4  * Layout strategy:
      5  *   1. plan ELF section headers (one per obj section, plus synthesized
      6  *      .symtab / .strtab / .shstrtab and one .rela.<name> per obj section
      7  *      that carries relocations);
      8  *   2. build .symtab + .strtab content (locals first — STT_SECTION
      9  *      synthesized for every input section, then ordinary locals, then
     10  *      globals/weaks);
     11  *   3. build .rela.* content using the per-arch reloc map (selected
     12  *      by Compiler.target.arch);
     13  *   4. build .shstrtab;
     14  *   5. assign file offsets sequentially, respecting per-section
     15  *      addralign;
     16  *   6. write Ehdr, then each section's bytes (seeking to its sh_offset),
     17  *      then the section header table.
     18  *
     19  * 64-bit little-endian only. Per-arch reloc tables (elf_reloc_<arch>.c)
     20  * supply the RelocKind -> ELF type mapping; e_machine is selected from
     21  * Compiler.target.arch. Big-endian / 32-bit ELF panic at entry.
     22  *
     23  * See doc/DESIGN.md §5.5 for the round-trip invariant: read_elf of this
     24  * output must produce an ObjBuilder shape-equivalent to the input,
     25  * modulo (a) section ordering and (b) the synthesized STT_SECTION
     26  * symbols (which are visible to read_elf but were not in the input). */
     27 
     28 #include <string.h>
     29 
     30 #include "core/arena.h"
     31 #include "core/buf.h"
     32 #include "core/heap.h"
     33 #include "core/pool.h"
     34 #include "core/slice.h"
     35 #include "core/util.h"
     36 #include "obj/elf/elf.h"
     37 #include "obj/format.h"
     38 
     39 /* ---- per-ELF-section plan record ---- */
     40 
     41 /* Internal section descriptor used during planning. Mirrors Elf64_Shdr
     42  * but with an explicit pointer to the source bytes (either an obj
     43  * Section's chunked Buf or a synthesized linear buffer). NOBITS sections
     44  * have no source bytes and consume no file space. */
     45 typedef struct ElfSec {
     46   /* Final shdr fields (little-endian-encoded at write time). */
     47   u32 sh_name; /* offset into shstrtab */
     48   u32 sh_type;
     49   u64 sh_flags;
     50   u64 sh_addr; /* always 0 for ET_REL */
     51   u64 sh_offset;
     52   u64 sh_size;
     53   u32 sh_link;
     54   u32 sh_info;
     55   u64 sh_addralign;
     56   u64 sh_entsize;
     57 
     58   /* Section name. The name string lives in scratch (synthesized) or in
     59    * the global pool (obj-section names); buf-source is set for sections
     60    * carrying obj-section bytes, raw_bytes for synthesized. */
     61   const char* name;
     62   u32 name_len;
     63 
     64   const Buf* obj_bytes; /* one of these three is set: */
     65   const u8* raw_bytes;  /*                            */
     66   int is_nobits;        /*                            */
     67 } ElfSec;
     68 
     69 /* ---- emit ---- */
     70 
     71 static u32 sec_flags_to_elf(u16 flags) {
     72   u64 r = 0;
     73   if (flags & SF_ALLOC) r |= SHF_ALLOC;
     74   if (flags & SF_EXEC) r |= SHF_EXECINSTR;
     75   if (flags & SF_WRITE) r |= SHF_WRITE;
     76   if (flags & SF_TLS) r |= SHF_TLS;
     77   if (flags & SF_MERGE) r |= SHF_MERGE;
     78   if (flags & SF_STRINGS) r |= SHF_STRINGS;
     79   if (flags & SF_GROUP) r |= SHF_GROUP;
     80   if (flags & SF_LINK_ORDER) r |= SHF_LINK_ORDER;
     81   if (flags & SF_RETAIN) r |= SHF_GNU_RETAIN;
     82   return (u32)r;
     83 }
     84 
     85 static u32 sec_sem_to_elf(u16 sem) {
     86   switch (sem) {
     87     case SSEM_PROGBITS:
     88       return SHT_PROGBITS;
     89     case SSEM_NOBITS:
     90       return SHT_NOBITS;
     91     case SSEM_SYMTAB:
     92       return SHT_SYMTAB;
     93     case SSEM_STRTAB:
     94       return SHT_STRTAB;
     95     case SSEM_RELA:
     96       return SHT_RELA;
     97     case SSEM_REL:
     98       return SHT_REL;
     99     case SSEM_NOTE:
    100       return SHT_NOTE;
    101     case SSEM_INIT_ARRAY:
    102       return SHT_INIT_ARRAY;
    103     case SSEM_FINI_ARRAY:
    104       return SHT_FINI_ARRAY;
    105     case SSEM_PREINIT_ARRAY:
    106       return SHT_PREINIT_ARRAY;
    107     case SSEM_GROUP:
    108       return SHT_GROUP;
    109     default:
    110       return SHT_PROGBITS;
    111   }
    112 }
    113 
    114 static u8 sym_bind_to_elf(u16 bind) { return elf_st_bind((u8)bind); }
    115 
    116 /* SK_COMMON -> STT_OBJECT: real ELF emitters (clang, gcc, GNU as) write
    117  * tentative definitions as STT_OBJECT with shndx=SHN_COMMON. STT_COMMON
    118  * is a near-extinct convention that llvm-readelf renders as the literal
    119  * type name "COMMON" — emitting it breaks roundtrip against any
    120  * toolchain-produced .o. The shared elf.h table encodes this directly. */
    121 static u8 sym_kind_to_elf(u16 kind) { return elf_st_type((u8)kind); }
    122 
    123 static u8 sym_vis_to_elf(u8 vis) { return elf_st_other(vis); }
    124 
    125 static u16 sym_shndx(const ObjSym* s, const u32* obj_to_elf, u32 nsec) {
    126   if (s->kind == SK_COMMON) return (u16)SHN_COMMON;
    127   if (s->kind == SK_ABS) return (u16)SHN_ABS;
    128   /* STT_FILE conventionally carries SHN_ABS as its shndx — its value
    129    * field is not an address. Match clang/binutils. */
    130   if (s->kind == SK_FILE) return (u16)SHN_ABS;
    131   if (s->section_id == OBJ_SEC_NONE) return (u16)SHN_UNDEF;
    132   if (s->section_id >= nsec) return (u16)SHN_UNDEF;
    133   return (u16)obj_to_elf[s->section_id];
    134 }
    135 
    136 static const char* sym_to_str(Compiler* c, Sym n, u32* len_out) {
    137   Slice sl = pool_slice(c->global, n);
    138   const char* s = sl.s;
    139   if (!s) {
    140     *len_out = 0;
    141     return "";
    142   }
    143   *len_out = (u32)sl.len;
    144   return s;
    145 }
    146 
    147 /* Append `len` bytes of `s` followed by a single NUL to `b`, return
    148  * the offset at which `s` was placed.
    149  *
    150  * If `s` already exists at some offset (as a NUL-terminated substring
    151  * starting at any offset), reuse that offset — clang/binutils both
    152  * dedupe trivially identical strings, and matching the convention
    153  * keeps our strtab the same size as theirs. The dedupe is linear in
    154  * the strtab; section + symbol counts are small enough that this is
    155  * fine without a hash. */
    156 static u32 strtab_add(Buf* b, const char* s, u32 len) {
    157   /* Empty string: always at offset 0 (the leading NUL). */
    158   if (len == 0) return 0;
    159 
    160   /* Linear search for an existing copy. We must scan chunk-by-chunk
    161    * because Buf is segmented; flatten to a temp scratch buffer first
    162    * if non-empty and search there. For our tiny strtabs, the cost is
    163    * dominated by the writes anyway. */
    164   u32 total = buf_pos(b);
    165   if (total > len) {
    166     /* Flatten just to search — not optimal but the strtab here is
    167      * always small (low kilobytes at most). */
    168     u8 stack[256];
    169     u8* tmp =
    170         total <= sizeof stack ? stack : (u8*)b->heap->alloc(b->heap, total, 1);
    171     if (tmp) {
    172       buf_flatten(b, tmp);
    173       for (u32 i = 0; i + len < total; ++i) {
    174         if (tmp[i + len] == 0 && memcmp(tmp + i, s, len) == 0) {
    175           if (tmp != stack) b->heap->free(b->heap, tmp, total);
    176           return i;
    177         }
    178       }
    179       if (tmp != stack) b->heap->free(b->heap, tmp, total);
    180     }
    181   }
    182 
    183   u32 off = total;
    184   buf_write(b, s, len);
    185   {
    186     u8 z = 0;
    187     buf_write(b, &z, 1);
    188   }
    189   return off;
    190 }
    191 
    192 void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) {
    193   Heap* h = (Heap*)c->ctx->heap;
    194 
    195   /* Run the tombstone sweep before any iteration: cascades removed
    196    * sections into their defining symbols, drops dangling relocs,
    197    * compacts groups, and absorbs the historical UNDEF prune. After this
    198    * call every direct ID-based access below must skip entries whose
    199    * `removed` bit is set. */
    200   obj_sweep_dead(ob);
    201 
    202   /* ---- target validation ------------------------------------------ */
    203   const ObjFormatImpl* fmt = obj_format_lookup(KIT_OBJ_ELF);
    204   const ObjElfArchOps* elf =
    205       fmt && fmt->elf_arch ? fmt->elf_arch(c->target.arch) : NULL;
    206   u32 e_machine;
    207   u32 (*reloc_to)(u32);
    208   if (!elf || !elf->reloc_to) {
    209     compiler_panic(c, SRCLOC_NONE, "emit_elf: unsupported target arch %u",
    210                    (u32)c->target.arch);
    211   }
    212   e_machine = elf->e_machine;
    213   reloc_to = elf->reloc_to;
    214   if (c->target.big_endian) {
    215     compiler_panic(c, SRCLOC_NONE, "emit_elf: big-endian ELF not supported");
    216   }
    217   /* is32 selects ELFCLASS32 (RV32) record widths/layouts everywhere
    218    * below; ptr_size==8 is the established ELFCLASS64 path. */
    219   if (c->target.ptr_size != 8 && c->target.ptr_size != 4) {
    220     compiler_panic(c, SRCLOC_NONE, "emit_elf: ptr_size %u (expected 4 or 8)",
    221                    (u32)c->target.ptr_size);
    222   }
    223   int is32 = (c->target.ptr_size == 4);
    224   u32 sym_size = is32 ? ELF32_SYM_SIZE : ELF64_SYM_SIZE;
    225   u32 rela_size = is32 ? ELF32_RELA_SIZE : ELF64_RELA_SIZE;
    226   u32 ehdr_size = is32 ? ELF32_EHDR_SIZE : ELF64_EHDR_SIZE;
    227   u32 shdr_size = is32 ? ELF32_SHDR_SIZE : ELF64_SHDR_SIZE;
    228 
    229   /* ---- pass 1: plan ELF section list ------------------------------ */
    230 
    231   u32 nobjsec = obj_section_count(ob);
    232 
    233   u32 nobjgrp = obj_group_count(ob);
    234   /* Upper bound on ELF section count:
    235    *   1 (SHN_UNDEF)
    236    * + nobjsec - 1 (one ELF entry per real obj section)
    237    * + nobjsec - 1 (worst case: a .rela.<name> per obj section)
    238    * + nobjgrp - 1 (one synthesized SHT_GROUP per ObjGroup)
    239    * + 3 (.symtab, .strtab, .shstrtab)
    240    */
    241   u32 max_secs =
    242       1 + (nobjsec - 1) + (nobjsec - 1) + (nobjgrp ? nobjgrp - 1 : 0) + 3;
    243   if (max_secs < 4) max_secs = 4;
    244   ElfSec* secs = arena_array(c->scratch, ElfSec, max_secs);
    245   u32 nsecs = 0;
    246   memset(&secs[nsecs++], 0, sizeof secs[0]); /* index 0 = SHN_UNDEF */
    247 
    248   /* Map obj section id -> ELF section index. */
    249   u32* obj_to_elf = arena_zarray(c->scratch, u32, nobjsec);
    250 
    251   for (u32 i = 1; i < nobjsec; ++i) {
    252     const Section* s = obj_section_get(ob, i);
    253     if (s->removed) continue; /* tombstone — see obj_sweep_dead */
    254     ElfSec* es = &secs[nsecs];
    255     memset(es, 0, sizeof *es);
    256     u32 nlen;
    257     es->name = sym_to_str(c, s->name, &nlen);
    258     es->name_len = nlen;
    259     /* Honor format-specific overrides preserved by the reader for
    260      * sh_type/sh_flags bits the canonical SecSem/SecFlag enums
    261      * don't model (e.g. SHT_LLVM_ADDRSIG, SHF_EXCLUDE). */
    262     es->sh_type = (s->ext_kind == OBJ_EXT_ELF && s->ext_type)
    263                       ? s->ext_type
    264                       : sec_sem_to_elf(s->sem);
    265     es->sh_flags = sec_flags_to_elf(s->flags);
    266     if (s->ext_kind == OBJ_EXT_ELF) es->sh_flags |= s->ext_flags;
    267     es->sh_addr = 0;
    268     es->sh_addralign = s->align ? s->align : 1;
    269     es->sh_entsize = s->entsize;
    270     es->sh_link = 0;
    271     es->sh_info = 0;
    272     if (s->sem == SSEM_NOBITS) {
    273       es->is_nobits = 1;
    274       es->sh_size = s->bss_size;
    275     } else {
    276       es->obj_bytes = &s->bytes;
    277       es->sh_size = s->bytes.total;
    278     }
    279     obj_to_elf[i] = nsecs++;
    280   }
    281 
    282   /* ---- pass 2: build .symtab + .strtab content -------------------- */
    283 
    284   /* .strtab: leading NUL byte. Then a name per emitted symbol. */
    285   Buf strtab;
    286   buf_init(&strtab, h);
    287   {
    288     u8 z = 0;
    289     buf_write(&strtab, &z, 1);
    290   }
    291 
    292   /* The .symtab is built into a contiguous arena buffer of fixed-size
    293    * 24-byte records. We don't know the count up front; bound by
    294    * (nobjsec section symbols) + (obj symbol count). */
    295   u32 nobjsym = 0;
    296   {
    297     ObjSymIter* it = obj_symiter_new(ob);
    298     ObjSymEntry e;
    299     while (obj_symiter_next(it, &e)) ++nobjsym;
    300     obj_symiter_free(it);
    301   }
    302   u32 max_syms = 1 + (nobjsec - 1) + nobjsym;
    303   u8* symtab =
    304       (u8*)arena_alloc(c->scratch, (size_t)sym_size * max_syms, _Alignof(u64));
    305   u32 nsyms = 0;
    306   memset(&symtab[nsyms * sym_size], 0, sym_size);
    307   nsyms = 1; /* index 0: STN_UNDEF */
    308 
    309 /* Helper to emit one symbol record at index `idx` into symtab.
    310  * Elf64_Sym (24B) and Elf32_Sym (16B) REORDER fields: ELF32 places
    311  * st_value/st_size BEFORE st_info/st_other/st_shndx, so select the byte
    312  * layout by `is32` rather than just narrowing widths. */
    313 #define WRITE_SYM(idx, st_name, st_info, st_other, st_shndx, st_value, \
    314                   st_size)                                             \
    315   do {                                                                 \
    316     u8* slot = &symtab[(idx) * sym_size];                              \
    317     if (is32) {                                                        \
    318       slot[0] = (u8)((st_name));                                       \
    319       slot[1] = (u8)((st_name) >> 8);                                  \
    320       slot[2] = (u8)((st_name) >> 16);                                 \
    321       slot[3] = (u8)((st_name) >> 24);                                 \
    322       for (int _b = 0; _b < 4; ++_b)                                   \
    323         slot[4 + _b] = (u8)((u64)(st_value) >> (_b * 8));              \
    324       for (int _b = 0; _b < 4; ++_b)                                   \
    325         slot[8 + _b] = (u8)((u64)(st_size) >> (_b * 8));               \
    326       slot[12] = (u8)((st_info));                                      \
    327       slot[13] = (u8)((st_other));                                     \
    328       slot[14] = (u8)((st_shndx));                                     \
    329       slot[15] = (u8)((st_shndx) >> 8);                                \
    330     } else {                                                           \
    331       slot[0] = (u8)((st_name));                                       \
    332       slot[1] = (u8)((st_name) >> 8);                                  \
    333       slot[2] = (u8)((st_name) >> 16);                                 \
    334       slot[3] = (u8)((st_name) >> 24);                                 \
    335       slot[4] = (u8)((st_info));                                       \
    336       slot[5] = (u8)((st_other));                                      \
    337       slot[6] = (u8)((st_shndx));                                      \
    338       slot[7] = (u8)((st_shndx) >> 8);                                 \
    339       for (int _b = 0; _b < 8; ++_b)                                   \
    340         slot[8 + _b] = (u8)((u64)(st_value) >> (_b * 8));              \
    341       for (int _b = 0; _b < 8; ++_b)                                   \
    342         slot[16 + _b] = (u8)((u64)(st_size) >> (_b * 8));              \
    343     }                                                                  \
    344   } while (0)
    345 
    346   /* No automatic STT_SECTION synthesis. Section symbols are emitted
    347    * iff they are present in the input ObjBuilder (typically as
    348    * SK_SECTION ObjSyms preserved by read_elf, or added explicitly by
    349    * a hand-built caller that needs to reference a section by sym).
    350    * This matches clang's output: only sections referenced by section
    351    * symbols carry one. */
    352 
    353   /* Map obj symbol id -> elf symbol index. */
    354   u32* sym_to_elf = arena_zarray(c->scratch, u32, nobjsym + 2);
    355 
    356   /* Two passes over obj symbols: locals, then globals/weak. */
    357   for (int pass = 0; pass < 2; ++pass) {
    358     ObjSymIter* it = obj_symiter_new(ob);
    359     ObjSymEntry e;
    360     while (obj_symiter_next(it, &e)) {
    361       const ObjSym* s = e.sym;
    362       if (s->removed) continue; /* spurious-UNDEF prune + explicit removal */
    363       int is_local = (s->bind == SB_LOCAL);
    364       if ((pass == 0) != is_local) continue;
    365       u32 nlen;
    366       const char* nm = sym_to_str(c, s->name, &nlen);
    367       u32 nameoff = nlen ? strtab_add(&strtab, nm, nlen) : 0;
    368       u8 info =
    369           ELF64_ST_INFO(sym_bind_to_elf(s->bind), sym_kind_to_elf(s->kind));
    370       u8 other = sym_vis_to_elf(s->vis);
    371       u16 shndx = sym_shndx(s, obj_to_elf, nobjsec);
    372       u64 value = (s->kind == SK_COMMON) ? s->common_align : s->value;
    373       WRITE_SYM(nsyms, nameoff, info, other, shndx, value, s->size);
    374       sym_to_elf[e.id] = nsyms;
    375       nsyms++;
    376     }
    377     obj_symiter_free(it);
    378   }
    379 #undef WRITE_SYM
    380 
    381   /* sh_info on .symtab is the index of the first non-local symbol.
    382    * Locals = 1 (STN_UNDEF) + count of input-side LOCAL obj symbols. */
    383   u32 nlocals = 1;
    384   {
    385     ObjSymIter* it = obj_symiter_new(ob);
    386     ObjSymEntry e;
    387     while (obj_symiter_next(it, &e)) {
    388       if (e.sym->removed) continue;
    389       if (e.sym->bind == SB_LOCAL) ++nlocals;
    390     }
    391     obj_symiter_free(it);
    392   }
    393 
    394   /* Append .symtab + .strtab + .shstrtab planning records.
    395    * sh_link/sh_info for .symtab and .rela.* are filled in once we know
    396    * each section's elf index. */
    397   u32 idx_symtab = 0, idx_strtab = 0, idx_shstrtab = 0;
    398 
    399   /* ---- pass 2.5: synthesize SHT_GROUP sections from ObjGroups ----
    400    * Append one SHT_GROUP section per ObjGroup. The body is a 4-byte LE
    401    * flags word followed by the elf section index of each member.
    402    * Placed before relas so the file layout has data sections, then
    403    * groups, then relas/symtab/strtab — matching clang's ordering and
    404    * keeping data-section offsets independent of group presence. */
    405   u32* group_elf_idx =
    406       nobjgrp > 1 ? arena_array(c->scratch, u32, nobjgrp) : NULL;
    407   if (group_elf_idx) memset(group_elf_idx, 0, sizeof(u32) * nobjgrp);
    408   for (u32 gi = 1; gi < nobjgrp; ++gi) {
    409     const ObjGroup* g = obj_group_get(ob, gi);
    410     if (!g || g->removed) continue;
    411 
    412     u32 body_size = 4u + 4u * g->nsections;
    413     u8* body = (u8*)arena_alloc(c->scratch, body_size, _Alignof(u32));
    414     u32 gflags = g->flags ? g->flags : 1u; /* GRP_COMDAT default */
    415     body[0] = (u8)(gflags);
    416     body[1] = (u8)(gflags >> 8);
    417     body[2] = (u8)(gflags >> 16);
    418     body[3] = (u8)(gflags >> 24);
    419     for (u32 j = 0; j < g->nsections; ++j) {
    420       ObjSecId sid = g->sections[j];
    421       u32 eidx = (sid && sid < nobjsec) ? obj_to_elf[sid] : 0;
    422       u8* slot = body + 4 + j * 4;
    423       slot[0] = (u8)(eidx);
    424       slot[1] = (u8)(eidx >> 8);
    425       slot[2] = (u8)(eidx >> 16);
    426       slot[3] = (u8)(eidx >> 24);
    427     }
    428 
    429     u32 nlen;
    430     const char* gname = sym_to_str(c, g->name, &nlen);
    431     if (nlen == 0) {
    432       gname = ".group";
    433       nlen = 6;
    434     }
    435 
    436     ElfSec* es = &secs[nsecs];
    437     memset(es, 0, sizeof *es);
    438     es->name = gname;
    439     es->name_len = nlen;
    440     es->sh_type = SHT_GROUP;
    441     es->sh_flags = 0;
    442     es->sh_addralign = 4;
    443     es->sh_entsize = 4;
    444     es->sh_info = (g->signature && g->signature < nobjsym + 2)
    445                       ? sym_to_elf[g->signature]
    446                       : 0;
    447     /* sh_link patched below once idx_symtab is known. */
    448     es->raw_bytes = body;
    449     es->sh_size = body_size;
    450     group_elf_idx[gi] = nsecs;
    451     nsecs++;
    452   }
    453 
    454   /* ---- pass 3: build .rela.<name> contents ------------------------ */
    455 
    456   /* Allocate one .rela section per obj section that has any relocs. */
    457   u32 total_relocs = obj_reloc_total(ob);
    458 
    459   typedef struct RelaPlan {
    460     u32 obj_section; /* obj section the rela applies to */
    461     u8* bytes;       /* arena-allocated rela bytes */
    462     u32 size;        /* bytes count = nrelocs * rela_size (24 or 12) */
    463   } RelaPlan;
    464 
    465   RelaPlan* rela_plans = arena_zarray(c->scratch, RelaPlan, nobjsec);
    466   u32 nrela_plans = 0;
    467 
    468   for (u32 si = 1; si < nobjsec; ++si) {
    469     const Section* host = obj_section_get(ob, si);
    470     if (!host || host->removed) continue;
    471     u32 nr = obj_reloc_count(ob, si);
    472     if (!nr) continue;
    473     u8* buf =
    474         (u8*)arena_alloc(c->scratch, (size_t)rela_size * nr, _Alignof(u64));
    475     u32 j = 0;
    476     for (u32 i = 0; i < total_relocs; ++i) {
    477       const Reloc* r = obj_reloc_at(ob, i);
    478       if (r->removed) continue;
    479       if (r->section_id != si) continue;
    480       u32 etype = reloc_to(r->kind);
    481       if (etype == ELF_R_AARCH64_NONE /* == ELF_R_X86_64_NONE == 0 */ &&
    482           r->kind != R_NONE) {
    483         compiler_panic(c, SRCLOC_NONE,
    484                        "emit_elf: unsupported relocation kind %u for arch %u",
    485                        (u32)r->kind, (u32)c->target.arch);
    486       }
    487       u32 sym_elf_idx;
    488       if (r->sym == OBJ_SYM_NONE) {
    489         /* Reloc against a section: use the synthesized
    490          * STT_SECTION symbol if the obj reloc carries a
    491          * section_id-equivalent; otherwise 0. */
    492         sym_elf_idx = 0;
    493       } else {
    494         sym_elf_idx = sym_to_elf[r->sym];
    495       }
    496       /* Elf32_Rela (12B): r_offset@0, r_info@4 (ELF32_R_INFO, 8-bit
    497        * type), r_addend@8 — all 4-byte. Elf64_Rela (24B): all 8-byte. */
    498       u8* slot = &buf[j * rela_size];
    499       if (is32) {
    500         for (int b = 0; b < 4; ++b) slot[b] = (u8)((u32)r->offset >> (b * 8));
    501         u32 info = ELF32_R_INFO(sym_elf_idx, etype);
    502         for (int b = 0; b < 4; ++b) slot[4 + b] = (u8)(info >> (b * 8));
    503         for (int b = 0; b < 4; ++b)
    504           slot[8 + b] = (u8)((u32)r->addend >> (b * 8));
    505       } else {
    506         for (int b = 0; b < 8; ++b) slot[b] = (u8)((u64)r->offset >> (b * 8));
    507         u64 info = ELF64_R_INFO(sym_elf_idx, etype);
    508         for (int b = 0; b < 8; ++b) slot[8 + b] = (u8)(info >> (b * 8));
    509         for (int b = 0; b < 8; ++b)
    510           slot[16 + b] = (u8)((u64)r->addend >> (b * 8));
    511       }
    512       ++j;
    513     }
    514     rela_plans[nrela_plans].obj_section = si;
    515     rela_plans[nrela_plans].bytes = buf;
    516     rela_plans[nrela_plans].size = nr * rela_size;
    517     nrela_plans++;
    518   }
    519 
    520   /* Append ElfSec entries for each .rela.<name>. Names are ".rela" +
    521    * the obj section name; allocate in scratch. */
    522   u32* rela_elf_idx = arena_array(c->scratch, u32, nrela_plans + 1);
    523   for (u32 ri = 0; ri < nrela_plans; ++ri) {
    524     u32 si = rela_plans[ri].obj_section;
    525     const Section* s = obj_section_get(ob, si);
    526     u32 base_len;
    527     const char* base = sym_to_str(c, s->name, &base_len);
    528     u32 nlen = 5 + base_len; /* ".rela" + base */
    529     char* nm = (char*)arena_alloc(c->scratch, nlen + 1, 1);
    530     memcpy(nm, ".rela", 5);
    531     memcpy(nm + 5, base, base_len);
    532     nm[nlen] = 0;
    533 
    534     ElfSec* es = &secs[nsecs];
    535     memset(es, 0, sizeof *es);
    536     es->name = nm;
    537     es->name_len = nlen;
    538     es->sh_type = SHT_RELA;
    539     es->sh_flags = SHF_INFO_LINK;
    540     es->sh_addralign = is32 ? 4 : 8;
    541     es->sh_entsize = rela_size;
    542     es->sh_info = obj_to_elf[si]; /* section the relas apply to */
    543     /* sh_link filled below once we know symtab's elf index. */
    544     es->raw_bytes = rela_plans[ri].bytes;
    545     es->sh_size = rela_plans[ri].size;
    546     rela_elf_idx[ri] = nsecs;
    547     nsecs++;
    548   }
    549 
    550   /* Append .symtab. */
    551   {
    552     ElfSec* es = &secs[nsecs];
    553     memset(es, 0, sizeof *es);
    554     es->name = ".symtab";
    555     es->name_len = 7;
    556     es->sh_type = SHT_SYMTAB;
    557     es->sh_flags = 0;
    558     es->sh_addralign = is32 ? 4 : 8;
    559     es->sh_entsize = sym_size;
    560     es->raw_bytes = symtab;
    561     es->sh_size = (u64)nsyms * sym_size;
    562     es->sh_info = nlocals; /* first non-local symbol */
    563     idx_symtab = nsecs;
    564     nsecs++;
    565   }
    566 
    567   /* Patch sh_link on each .rela section now that we have idx_symtab. */
    568   for (u32 ri = 0; ri < nrela_plans; ++ri) {
    569     secs[rela_elf_idx[ri]].sh_link = idx_symtab;
    570   }
    571   /* SHT_GROUP also points its sh_link at .symtab (the symtab the
    572    * signature symbol's index in sh_info refers to). */
    573   for (u32 gi = 1; gi < nobjgrp; ++gi) {
    574     if (group_elf_idx && group_elf_idx[gi]) {
    575       secs[group_elf_idx[gi]].sh_link = idx_symtab;
    576     }
    577   }
    578 
    579   /* ---- pass 4: append section names to the same strtab and emit it.
    580    *
    581    * clang reuses .strtab for both symbol names and section names —
    582    * e_shstrndx and .symtab.sh_link both point at it. Match that
    583    * convention: continue appending into `strtab` (which already
    584    * contains the symbol names), then emit one STRTAB section. */
    585 
    586   /* secs[0] (SHN_UNDEF) carries name "" → offset 0. */
    587   secs[0].sh_name = 0;
    588   for (u32 i = 1; i < nsecs; ++i) {
    589     secs[i].sh_name = strtab_add(&strtab, secs[i].name, secs[i].name_len);
    590   }
    591 
    592   /* Append the .strtab section record itself; its own name lands in
    593    * the same buffer (so the strtab is self-describing). */
    594   {
    595     const char* nm = ".strtab";
    596     u32 nlen = 7;
    597     u32 nameoff = strtab_add(&strtab, nm, nlen);
    598     u32 sz = buf_pos(&strtab);
    599     u8* flat = (u8*)arena_alloc(c->scratch, sz, 1);
    600     buf_flatten(&strtab, flat);
    601     buf_fini(&strtab);
    602 
    603     ElfSec* es = &secs[nsecs];
    604     memset(es, 0, sizeof *es);
    605     es->name = nm;
    606     es->name_len = nlen;
    607     es->sh_name = nameoff;
    608     es->sh_type = SHT_STRTAB;
    609     es->sh_addralign = 1;
    610     es->raw_bytes = flat;
    611     es->sh_size = sz;
    612     idx_strtab = nsecs;
    613     idx_shstrtab = nsecs; /* same section serves both roles */
    614     nsecs++;
    615   }
    616   secs[idx_symtab].sh_link = idx_strtab;
    617 
    618   /* ---- pass 5: assign file offsets -------------------------------- */
    619 
    620   u64 cur = ehdr_size;
    621   for (u32 i = 1; i < nsecs; ++i) {
    622     ElfSec* es = &secs[i];
    623     if (es->is_nobits) {
    624       /* sh_offset for NOBITS is conventionally where the next
    625        * non-NOBITS section begins; we set it to cur without
    626        * advancing. */
    627       es->sh_offset = cur;
    628       continue;
    629     }
    630     u64 a = es->sh_addralign ? es->sh_addralign : 1;
    631     cur = ALIGN_UP(cur, a);
    632     es->sh_offset = cur;
    633     cur += es->sh_size;
    634   }
    635   /* ELF32 toolchains conventionally align the SHT to 4; ELF64 to 8. */
    636   cur = ALIGN_UP(cur, (u64)(is32 ? 4 : 8));
    637   u64 e_shoff = cur;
    638 
    639   /* ---- pass 6: write Ehdr ----------------------------------------- */
    640 
    641   u8 ident[EI_NIDENT] = {0};
    642   ident[EI_MAG0] = ELFMAG0;
    643   ident[EI_MAG1] = ELFMAG1;
    644   ident[EI_MAG2] = ELFMAG2;
    645   ident[EI_MAG3] = ELFMAG3;
    646   ident[EI_CLASS] = is32 ? ELFCLASS32 : ELFCLASS64;
    647   ident[EI_DATA] = ELFDATA2LSB;
    648   ident[EI_VERSION] = EV_CURRENT;
    649   /* SysV is the canonical OSABI for Linux relocatable .o files. Targets that
    650    * would otherwise be ambiguous after object detection get explicit badges:
    651    * freestanding uses kit's private STANDALONE byte, and FreeBSD uses the
    652    * standard FreeBSD OSABI so `kit ld` can select FreeBSD runtime/link policy
    653    * from a plain relocatable input.
    654    *
    655    * GNU extensions (STT_GNU_IFUNC, SHF_GNU_RETAIN, ...) upgrade Linux/SysV and
    656    * freestanding objects to ELFOSABI_GNU below. FreeBSD keeps its OSABI badge;
    657    * GNU-flavored symbol/section kinds do not make the target Linux. */
    658   {
    659     Compiler* osc = obj_compiler(ob);
    660     if (osc && osc->target.os == KIT_OS_FREESTANDING)
    661       ident[EI_OSABI] = ELFOSABI_STANDALONE;
    662     else if (osc && osc->target.os == KIT_OS_FREEBSD)
    663       ident[EI_OSABI] = ELFOSABI_FREEBSD;
    664     else
    665       ident[EI_OSABI] = ELFOSABI_NONE;
    666   }
    667   {
    668     ObjSymIter* it = obj_symiter_new(ob);
    669     ObjSymEntry e;
    670     u32 nsec = obj_section_count(ob), si;
    671     while (obj_symiter_next(it, &e)) {
    672       if (e.sym->removed) continue;
    673       if (e.sym->kind == SK_IFUNC) {
    674         if (ident[EI_OSABI] != ELFOSABI_FREEBSD) ident[EI_OSABI] = ELFOSABI_GNU;
    675         break;
    676       }
    677     }
    678     obj_symiter_free(it);
    679     if (ident[EI_OSABI] != ELFOSABI_GNU &&
    680         ident[EI_OSABI] != ELFOSABI_FREEBSD) {
    681       for (si = 1; si < nsec; ++si) {
    682         const Section* sec = obj_section_get(ob, si);
    683         if (sec && !sec->removed && (sec->flags & SF_RETAIN)) {
    684           ident[EI_OSABI] = ELFOSABI_GNU;
    685           break;
    686         }
    687       }
    688     }
    689   }
    690   /* e_flags: prefer the value preserved from a prior read (round-trip);
    691    * else synthesize a sensible per-arch default. RV64 kit targets the
    692    * Linux psABI's lp64d soft-relax convention (RVC + double-float ABI). */
    693   u32 e_flags;
    694   if (!obj_get_elf_e_flags(ob, &e_flags)) {
    695     e_flags = elf->e_flags;
    696     /* rv32 (ptr_size 4): ilp32 and ilp32f share KIT_ARCH_RV32, so the static
    697      * descriptor's float-ABI bits (a placeholder SINGLE) cannot be right for
    698      * both. Derive them from -mabi (float_abi); RVC and other descriptor bits
    699      * are kept. rv64 (ptr_size 8) is left untouched, preserving its e_flags. */
    700     if (e_machine == EM_RISCV) {
    701       Compiler* ec = obj_compiler(ob);
    702       if (ec && ec->target.ptr_size == 4u) {
    703         u32 fa = EF_RISCV_FLOAT_ABI_SOFT;
    704         if (ec->target.float_abi == KIT_FLOAT_ABI_SINGLE)
    705           fa = EF_RISCV_FLOAT_ABI_SINGLE;
    706         else if (ec->target.float_abi == KIT_FLOAT_ABI_DOUBLE)
    707           fa = EF_RISCV_FLOAT_ABI_DOUBLE;
    708         else if (ec->target.float_abi == KIT_FLOAT_ABI_DEFAULT)
    709           fa = EF_RISCV_FLOAT_ABI_SINGLE; /* rv32 default profile is ilp32f */
    710         e_flags = (e_flags & ~(u32)EF_RISCV_FLOAT_ABI_MASK) | fa;
    711       }
    712     }
    713   }
    714 
    715   kit_writer_seek(w, 0);
    716   kit_writer_write(w, ident, EI_NIDENT);
    717   elf_wr_u16(w, ET_REL);
    718   elf_wr_u16(w, (u16)e_machine);
    719   elf_wr_u32(w, EV_CURRENT);
    720   /* e_entry/e_phoff/e_shoff are native-width (4B on ELF32, 8B on ELF64);
    721    * the field ORDER is identical, only the widths shrink. */
    722   elf_wr_addr(w, is32, 0);          /* e_entry */
    723   elf_wr_addr(w, is32, 0);          /* e_phoff */
    724   elf_wr_addr(w, is32, e_shoff);    /* e_shoff */
    725   elf_wr_u32(w, e_flags);           /* e_flags */
    726   elf_wr_u16(w, (u16)ehdr_size);    /* e_ehsize */
    727   elf_wr_u16(w, 0);                 /* e_phentsize */
    728   elf_wr_u16(w, 0);                 /* e_phnum */
    729   elf_wr_u16(w, (u16)shdr_size);    /* e_shentsize */
    730   elf_wr_u16(w, (u16)nsecs);        /* e_shnum */
    731   elf_wr_u16(w, (u16)idx_shstrtab); /* e_shstrndx */
    732 
    733   /* ---- pass 7: write each section's bytes ------------------------- */
    734 
    735   for (u32 i = 1; i < nsecs; ++i) {
    736     ElfSec* es = &secs[i];
    737     if (es->is_nobits || es->sh_size == 0) continue;
    738     kit_writer_seek(w, es->sh_offset);
    739     if (es->obj_bytes) {
    740       u32 sz = es->obj_bytes->total;
    741       u8* tmp = (u8*)h->alloc(h, sz ? sz : 1, 1);
    742       if (sz) buf_flatten(es->obj_bytes, tmp);
    743       kit_writer_write(w, tmp, sz);
    744       h->free(h, tmp, sz ? sz : 1);
    745     } else if (es->raw_bytes) {
    746       kit_writer_write(w, es->raw_bytes, (size_t)es->sh_size);
    747     }
    748   }
    749 
    750   /* ---- pass 8: write section header table ------------------------- */
    751 
    752   kit_writer_seek(w, e_shoff);
    753   for (u32 i = 0; i < nsecs; ++i) {
    754     const ElfSec* es = &secs[i];
    755     /* Elf32_Shdr (40B) and Elf64_Shdr (64B) share field ORDER; only
    756      * sh_flags/sh_addr/sh_offset/sh_size/sh_addralign/sh_entsize narrow
    757      * from u64 to u32 under is32. */
    758     elf_wr_u32(w, es->sh_name);
    759     elf_wr_u32(w, es->sh_type);
    760     elf_wr_addr(w, is32, es->sh_flags);
    761     elf_wr_addr(w, is32, es->sh_addr);
    762     elf_wr_addr(w, is32, es->sh_offset);
    763     elf_wr_addr(w, is32, es->sh_size);
    764     elf_wr_u32(w, es->sh_link);
    765     elf_wr_u32(w, es->sh_info);
    766     elf_wr_addr(w, is32, es->sh_addralign);
    767     elf_wr_addr(w, is32, es->sh_entsize);
    768   }
    769 }