kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit db7be7bfe28562663f14b4d20757abbc5bb7b4aa
parent f8282a97550097c40abbfa3653c41dd0f26e4273
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat,  9 May 2026 12:24:56 -0700

obj: preserve format-specific sh_type/sh_flags across roundtrip

Adds Section.ext_type and Section.ext_flags so the ELF reader can
stash raw sh_type values and sh_flags bits the canonical SecSem /
SecFlag enums don't model.  emit_elf prefers the override when set,
falling back to the SecSem mapping otherwise.

Concrete cases unblocked: .llvm_addrsig (SHT_LLVM_ADDRSIG +
SHF_EXCLUDE) and .ARM.attributes (LOPROC+0x3) — both previously
papered over by the test normalizer's drop list, which is now
emptied.  Any future SHT_LOOS/LOPROC section round-trips for free.

Other unknown bits like SHF_INFO_LINK and SHF_COMPRESSED also now
ride through verbatim instead of being silently dropped.

Diffstat:
Msrc/obj/elf_emit.c | 8+++++++-
Msrc/obj/elf_read.c | 32+++++++++++++++++++++++++++++---
Msrc/obj/obj.c | 10++++++++++
Msrc/obj/obj.h | 13+++++++++++++
Mtest/elf/normalize.py | 15+++++----------
5 files changed, 64 insertions(+), 14 deletions(-)

diff --git a/src/obj/elf_emit.c b/src/obj/elf_emit.c @@ -270,8 +270,14 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) u32 nlen; es->name = sym_to_str(c, s->name, &nlen); es->name_len = nlen; - es->sh_type = sec_sem_to_elf(s->sem); + /* Honor format-specific overrides preserved by the reader for + * sh_type/sh_flags bits the canonical SecSem/SecFlag enums + * don't model (e.g. SHT_LLVM_ADDRSIG, SHF_EXCLUDE). */ + es->sh_type = (s->ext_kind == OBJ_EXT_ELF && s->ext_type) + ? s->ext_type + : sec_sem_to_elf(s->sem); es->sh_flags = sec_flags_to_elf(s->flags); + if (s->ext_kind == OBJ_EXT_ELF) es->sh_flags |= s->ext_flags; es->sh_addr = 0; es->sh_addralign = s->align ? s->align : 1; es->sh_entsize = s->entsize; diff --git a/src/obj/elf_read.c b/src/obj/elf_read.c @@ -48,6 +48,15 @@ static void parse_shdr(const u8* p, ShdrRec* out) /* ---- mappers ---- */ +/* The bits this function maps to SecFlag — anything outside this mask is + * treated as opaque and stashed in Section.ext_flags by the caller so the + * emitter can write it back unchanged. Examples of bits left over: + * SHF_EXCLUDE (0x80000000) on .llvm_addrsig, SHF_COMPRESSED (0x800) on + * compressed .debug_*, SHF_INFO_LINK (0x40) on .rela.* sections. */ +#define ELF_KNOWN_FLAGS_MASK \ + ((u64)(SHF_ALLOC | SHF_EXECINSTR | SHF_WRITE | SHF_TLS | \ + SHF_MERGE | SHF_STRINGS | SHF_GROUP | SHF_LINK_ORDER)) + static u16 elf_flags_to_obj(u64 f) { u16 r = 0; @@ -62,8 +71,13 @@ static u16 elf_flags_to_obj(u64 f) return r; } -static u16 elf_type_to_sem(u32 t) +/* Map ELF sh_type -> SecSem. Sets *known to 1 if the value is one of + * the canonical types the cfree model knows about; 0 means the caller + * fell through to the SSEM_PROGBITS fallback and should preserve the + * raw sh_type via Section.ext_type so emit_elf can write it back. */ +static u16 elf_type_to_sem(u32 t, int* known) { + *known = 1; switch (t) { case SHT_PROGBITS: return SSEM_PROGBITS; case SHT_NOBITS: return SSEM_NOBITS; @@ -76,7 +90,7 @@ static u16 elf_type_to_sem(u32 t) case SHT_FINI_ARRAY: return SSEM_FINI_ARRAY; case SHT_PREINIT_ARRAY: return SSEM_PREINIT_ARRAY; case SHT_GROUP: return SSEM_GROUP; - default: return SSEM_PROGBITS; + default: *known = 0; return SSEM_PROGBITS; } } @@ -241,7 +255,8 @@ ObjBuilder* read_elf(Compiler* c, const char* name, Sym sym = pool_intern(c->global, nm, nlen); u16 sec_kind = elf_kind_from_name(nm, nlen, sh->sh_flags, sh->sh_type); - u16 sec_sem = elf_type_to_sem(sh->sh_type); + int type_known; + u16 sec_sem = elf_type_to_sem(sh->sh_type, &type_known); u16 flags = elf_flags_to_obj(sh->sh_flags); u32 align = sh->sh_addralign ? (u32)sh->sh_addralign : 1; @@ -253,6 +268,17 @@ ObjBuilder* read_elf(Compiler* c, const char* name, "read_elf: obj_section_ex failed for '%s'", nm); elf_to_obj[i] = id; + /* Preserve format-specific bits the canonical SecSem/SecFlag + * mapping can't represent so emit_elf can write them back + * verbatim. ext_type only set when the sh_type fell through + * to the "unknown" path. */ + u32 leftover = (u32)(sh->sh_flags & ~ELF_KNOWN_FLAGS_MASK); + if (!type_known || leftover) { + obj_section_set_ext(ob, id, OBJ_EXT_ELF, + type_known ? 0 : sh->sh_type, + leftover); + } + /* Body bytes. */ if (sh->sh_type == SHT_NOBITS) { obj_reserve_bss(ob, id, (u32)sh->sh_size, align); diff --git a/src/obj/obj.c b/src/obj/obj.c @@ -210,6 +210,16 @@ void obj_section_set_align(ObjBuilder* ob, ObjSecId id, u32 align) void obj_section_set_group(ObjBuilder* ob, ObjSecId id, ObjGroupId gid) { if (id != OBJ_SEC_NONE && id < ob->nsections) ob->sections[id].group_id = gid; } +void obj_section_set_ext(ObjBuilder* ob, ObjSecId id, ObjExtKind ek, + u32 ext_type, u32 ext_flags) +{ + if (id == OBJ_SEC_NONE || id >= ob->nsections) return; + Section* s = &ob->sections[id]; + s->ext_kind = (u16)ek; + s->ext_type = ext_type; + s->ext_flags = ext_flags; +} + void obj_write(ObjBuilder* ob, ObjSecId id, const void* data, size_t n) { if (id == OBJ_SEC_NONE || id >= ob->nsections) return; diff --git a/src/obj/obj.h b/src/obj/obj.h @@ -133,6 +133,15 @@ typedef struct Section { u32 info; /* section-format dependent, typed by sem/ext_kind */ ObjGroupId group_id; /* OBJ_GROUP_NONE if not in a COMDAT/group */ u32 bss_size; /* nonzero only for SEC_BSS */ + /* Format-specific raw section type (ELF sh_type, COFF Characteristics + * subfield, etc.). Set by .o readers when the canonical SecSem + * mapping is lossy — e.g., SHT_LLVM_ADDRSIG (0x6FFF4C03) and + * SHT_ARM_ATTRIBUTES (0x70000003) collapse to SSEM_PROGBITS but + * the emitter must write back the original value to round-trip. + * Zero means "no override; derive from sem". */ + u32 ext_type; + u32 ext_flags; /* same idea for format-specific sh_flags bits + not represented in SecFlag (e.g. SHF_EXCLUDE) */ Buf bytes; } Section; @@ -198,6 +207,10 @@ ObjSecId obj_section_ex(ObjBuilder*, Sym name, SecKind, SecSem, u16 flags, void obj_section_set_flags(ObjBuilder*, ObjSecId, u16 flags); void obj_section_set_align(ObjBuilder*, ObjSecId, u32 align); void obj_section_set_group(ObjBuilder*, ObjSecId, ObjGroupId); +/* Set format-specific raw sh_type/sh_flags overrides (see Section.ext_type + * comment). Zero ext_type means "no override". */ +void obj_section_set_ext(ObjBuilder*, ObjSecId, ObjExtKind, + u32 ext_type, u32 ext_flags); void obj_write (ObjBuilder*, ObjSecId section_id, const void* data, size_t n); u8* obj_reserve(ObjBuilder*, ObjSecId section_id, size_t n); void obj_reserve_bss(ObjBuilder*, ObjSecId section_id, u32 size, u32 align); diff --git a/test/elf/normalize.py b/test/elf/normalize.py @@ -70,16 +70,11 @@ _RELA_LINE = re.compile( # meaningful. Each entry here is paired with a comment in the C # implementation noting why; remove from this set when the model is # extended. -_DROP_SHDR_NAMES = { - # SHT_LLVM_ADDRSIG (0x6FFF4C03) + SHF_EXCLUDE (0x80000000) — LLVM - # address-significance hint; cfree collapses unknown sh_types to - # SSEM_PROGBITS and has no SF_EXCLUDE in the SecFlag enum. - ".llvm_addrsig", - # SHT_ARM_ATTRIBUTES (0x70000003 = LOPROC+0x3) — ARM build-attribute - # table. Same root cause: cfree collapses unknown sh_types to - # SSEM_PROGBITS so the sh_type field doesn't round-trip yet. - ".ARM.attributes", -} +_DROP_SHDR_NAMES = set() +# When the cfree data model can't preserve a section across roundtrip, +# add its name here with a comment pointing at the underlying gap. +# Sections previously listed (.llvm_addrsig, .ARM.attributes) now +# round-trip via Section.ext_type / Section.ext_flags. def _normalize_shdr(line):