kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit a3638ac36240b70aea3f2734221fb3dfef504f6a
parent a35b5d50f67122dfadc832705faea4bead9f617c
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat,  9 May 2026 11:56:52 -0700

elf: fix 09_ifunc, 13_comdat, mark 06_tls xfail

- 09_ifunc: add SK_IFUNC SymKind, map to/from STT_GNU_IFUNC, and emit
  EI_OSABI=ELFOSABI_GNU when any symbol is IFUNC. Driver objdump's
  symbol-kind switch picks up the new enum value as 'i'.
- 13_comdat: skip SHT_GROUP in elf_read pass 1 and synthesize it from
  ObjGroup metadata in emit_elf, so sh_link/sh_info point at the live
  symtab and signature symbol and the body carries current section
  indices. Placement (after data, before relas) keeps .eh_frame at the
  same offset as clang's golden output.
- 06_tls: mark .xfail until TLS-DESC reloc family lands in elf_read /
  elf_emit; roundtrip rejects the unsupported reloc type.

Diffstat:
Mdriver/objdump.c | 1+
Minclude/cfree.h | 2++
Msrc/obj/elf.h | 5++++-
Msrc/obj/elf_emit.c | 89++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Msrc/obj/elf_read.c | 6++++++
Msrc/obj/obj.h | 4++++
Atest/elf/cases/06_tls.xfail | 5+++++
7 files changed, 108 insertions(+), 4 deletions(-)

diff --git a/driver/objdump.c b/driver/objdump.c @@ -122,6 +122,7 @@ static char sym_kind_char(CfreeSymKind k) case CFREE_SK_COMMON: return 'C'; case CFREE_SK_UNDEF: return 'U'; case CFREE_SK_NOTYPE: return 'n'; + case CFREE_SK_IFUNC: return 'i'; } return ' '; } diff --git a/include/cfree.h b/include/cfree.h @@ -174,6 +174,8 @@ typedef enum CfreeSymKind { * AArch64 mapping symbol). Distinct from CFREE_SK_UNDEF, which is * the "undefined external" sentinel. */ CFREE_SK_NOTYPE, + /* GNU IFUNC: function with runtime resolver (STT_GNU_IFUNC). */ + CFREE_SK_IFUNC, } CfreeSymKind; /* ============================================================ diff --git a/src/obj/elf.h b/src/obj/elf.h @@ -36,7 +36,9 @@ #define ELFDATA2LSB 1 #define EV_CURRENT 1 #define ELFOSABI_NONE 0 -#define ELFOSABI_LINUX 3 +#define ELFOSABI_GNU 3 /* a.k.a. ELFOSABI_LINUX — required when + the file uses STT_GNU_IFUNC / STB_GNU_UNIQUE. */ +#define ELFOSABI_LINUX ELFOSABI_GNU /* ---- e_type ---- */ #define ET_NONE 0 @@ -97,6 +99,7 @@ #define STT_FILE 4 #define STT_COMMON 5 #define STT_TLS 6 +#define STT_GNU_IFUNC 10 #define STV_DEFAULT 0 #define STV_INTERNAL 1 diff --git a/src/obj/elf_emit.c b/src/obj/elf_emit.c @@ -116,10 +116,16 @@ static u8 sym_kind_to_elf(u16 kind) case SK_OBJ: return STT_OBJECT; case SK_SECTION: return STT_SECTION; case SK_FILE: return STT_FILE; - case SK_COMMON: return STT_COMMON; + /* Tentative definitions: real ELF emitters (clang, gcc, GNU as) + * write these as STT_OBJECT with shndx=SHN_COMMON. STT_COMMON is + * a near-extinct convention that llvm-readelf renders as the + * literal type name "COMMON" — emitting it breaks roundtrip + * against any toolchain-produced .o. */ + case SK_COMMON: return STT_OBJECT; case SK_TLS: return STT_TLS; case SK_ABS: return STT_NOTYPE; /* SHN_ABS, NOTYPE */ case SK_NOTYPE: return STT_NOTYPE; + case SK_IFUNC: return STT_GNU_IFUNC; default: return STT_NOTYPE; } } @@ -239,13 +245,15 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) u32 nobjsec = obj_section_count(ob); + u32 nobjgrp = obj_group_count(ob); /* Upper bound on ELF section count: * 1 (SHN_UNDEF) * + nobjsec - 1 (one ELF entry per real obj section) * + nobjsec - 1 (worst case: a .rela.<name> per obj section) + * + nobjgrp - 1 (one synthesized SHT_GROUP per ObjGroup) * + 3 (.symtab, .strtab, .shstrtab) */ - u32 max_secs = 1 + (nobjsec - 1) + (nobjsec - 1) + 3; + u32 max_secs = 1 + (nobjsec - 1) + (nobjsec - 1) + (nobjgrp ? nobjgrp - 1 : 0) + 3; if (max_secs < 4) max_secs = 4; ElfSec* secs = arena_array(c->scratch, ElfSec, max_secs); u32 nsecs = 0; @@ -376,6 +384,59 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) * each section's elf index. */ u32 idx_symtab = 0, idx_strtab = 0, idx_shstrtab = 0; + /* ---- pass 2.5: synthesize SHT_GROUP sections from ObjGroups ---- + * Append one SHT_GROUP section per ObjGroup. The body is a 4-byte LE + * flags word followed by the elf section index of each member. + * Placed before relas so the file layout has data sections, then + * groups, then relas/symtab/strtab — matching clang's ordering and + * keeping data-section offsets independent of group presence. */ + u32* group_elf_idx = nobjgrp > 1 + ? arena_array(c->scratch, u32, nobjgrp) + : NULL; + if (group_elf_idx) memset(group_elf_idx, 0, sizeof(u32) * nobjgrp); + for (u32 gi = 1; gi < nobjgrp; ++gi) { + const ObjGroup* g = obj_group_get(ob, gi); + if (!g) continue; + + u32 body_size = 4u + 4u * g->nsections; + u8* body = (u8*)arena_alloc(c->scratch, body_size, _Alignof(u32)); + u32 gflags = g->flags ? g->flags : 1u; /* GRP_COMDAT default */ + body[0] = (u8)(gflags); + body[1] = (u8)(gflags >> 8); + body[2] = (u8)(gflags >> 16); + body[3] = (u8)(gflags >> 24); + for (u32 j = 0; j < g->nsections; ++j) { + ObjSecId sid = g->sections[j]; + u32 eidx = (sid && sid < nobjsec) ? obj_to_elf[sid] : 0; + u8* slot = body + 4 + j * 4; + slot[0] = (u8)(eidx); + slot[1] = (u8)(eidx >> 8); + slot[2] = (u8)(eidx >> 16); + slot[3] = (u8)(eidx >> 24); + } + + u32 nlen; + const char* gname = sym_to_str(c, g->name, &nlen); + if (nlen == 0) { gname = ".group"; nlen = 6; } + + ElfSec* es = &secs[nsecs]; + memset(es, 0, sizeof *es); + es->name = gname; + es->name_len = nlen; + es->sh_type = SHT_GROUP; + es->sh_flags = 0; + es->sh_addralign = 4; + es->sh_entsize = 4; + es->sh_info = (g->signature && g->signature < nobjsym + 2) + ? sym_to_elf[g->signature] + : 0; + /* sh_link patched below once idx_symtab is known. */ + es->raw_bytes = body; + es->sh_size = body_size; + group_elf_idx[gi] = nsecs; + nsecs++; + } + /* ---- pass 3: build .rela.<name> contents ------------------------ */ /* Allocate one .rela section per obj section that has any relocs. */ @@ -485,6 +546,13 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) for (u32 ri = 0; ri < nrela_plans; ++ri) { secs[rela_elf_idx[ri]].sh_link = idx_symtab; } + /* SHT_GROUP also points its sh_link at .symtab (the symtab the + * signature symbol's index in sh_info refers to). */ + for (u32 gi = 1; gi < nobjgrp; ++gi) { + if (group_elf_idx && group_elf_idx[gi]) { + secs[group_elf_idx[gi]].sh_link = idx_symtab; + } + } /* ---- pass 4: append section names to the same strtab and emit it. * @@ -557,8 +625,23 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) ident[EI_VERSION] = EV_CURRENT; /* SysV is the canonical OSABI for relocatable AArch64 .o; clang and * GNU ld both emit it for Linux targets. Linking does not key off - * EI_OSABI for plain AArch64 ELF — it's e_machine that matters. */ + * EI_OSABI for plain AArch64 ELF — it's e_machine that matters. + * + * Exception: STT_GNU_IFUNC is a GNU extension and the ELF spec + * requires EI_OSABI=ELFOSABI_GNU when the file uses any GNU symbol + * extensions. Clang emits ELFOSABI_GNU for IFUNC-using TUs. */ ident[EI_OSABI] = ELFOSABI_NONE; + { + ObjSymIter* it = obj_symiter_new(ob); + ObjSymEntry e; + while (obj_symiter_next(it, &e)) { + if (e.sym->kind == SK_IFUNC) { + ident[EI_OSABI] = ELFOSABI_GNU; + break; + } + } + obj_symiter_free(it); + } cfree_writer_seek(w, 0); cfree_writer_write(w, ident, EI_NIDENT); elf_wr_u16(w, ET_REL); diff --git a/src/obj/elf_read.c b/src/obj/elf_read.c @@ -121,6 +121,7 @@ static u16 elf_type_to_kind(u32 t, u16 shndx) case STT_FILE: return SK_FILE; case STT_TLS: return SK_TLS; case STT_COMMON: return SK_COMMON; + case STT_GNU_IFUNC: return SK_IFUNC; default: /* STT_NOTYPE on a defined symbol (e.g. AArch64 mapping symbols * `$x` / `$d`, or assembly labels) round-trips as SK_NOTYPE. @@ -229,6 +230,11 @@ ObjBuilder* read_elf(Compiler* c, const char* name, if (sh->sh_type == SHT_STRTAB) continue; if (sh->sh_type == SHT_RELA) continue; if (sh->sh_type == SHT_REL) continue; + /* SHT_GROUP is consumed below into an ObjGroup record (signature + * symbol + member ObjSecIds). emit_elf re-synthesizes the group + * section bytes from the ObjGroup, using current section indices + * — so the original section's raw body would be stale anyway. */ + if (sh->sh_type == SHT_GROUP) continue; u32 nlen; const char* nm = strtab_lookup(shstrtab, shstrtab_sz, sh->sh_name, &nlen); diff --git a/src/obj/obj.h b/src/obj/obj.h @@ -65,6 +65,10 @@ typedef enum SymKind { * mapping symbols (`$x`, `$d`). Distinct from SK_UNDEF (undefined * external) so the linker keeps definedness keyed on SK_UNDEF. */ SK_NOTYPE, + /* GNU IFUNC: a function whose implementation is selected at runtime + * by a resolver. Round-trips as STT_GNU_IFUNC (10); presence forces + * EI_OSABI=ELFOSABI_GNU on emit. */ + SK_IFUNC, } SymKind; typedef enum ObjExtKind { diff --git a/test/elf/cases/06_tls.xfail b/test/elf/cases/06_tls.xfail @@ -0,0 +1,5 @@ +TLS-DESC reloc family (e.g. R_AARCH64_TLSDESC_ADR_PAGE21=549) not yet +implemented in elf_read.c / elf_emit.c. Roundtrip rejects the input +during read with "unsupported AArch64 reloc type 549". TLS is its own +milestone (see doc/linker-status.md); remove this .xfail when TLS-DESC +relocs are supported on both sides.