commit db7be7bfe28562663f14b4d20757abbc5bb7b4aa
parent f8282a97550097c40abbfa3653c41dd0f26e4273
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 9 May 2026 12:24:56 -0700
obj: preserve format-specific sh_type/sh_flags across roundtrip
Adds Section.ext_type and Section.ext_flags so the ELF reader can
stash raw sh_type values and sh_flags bits the canonical SecSem /
SecFlag enums don't model. emit_elf prefers the override when set,
falling back to the SecSem mapping otherwise.
Concrete cases unblocked: .llvm_addrsig (SHT_LLVM_ADDRSIG +
SHF_EXCLUDE) and .ARM.attributes (LOPROC+0x3) — both previously
papered over by the test normalizer's drop list, which is now
emptied. Any future SHT_LOOS/LOPROC section round-trips for free.
Other unknown bits like SHF_INFO_LINK and SHF_COMPRESSED also now
ride through verbatim instead of being silently dropped.
Diffstat:
5 files changed, 64 insertions(+), 14 deletions(-)
diff --git a/src/obj/elf_emit.c b/src/obj/elf_emit.c
@@ -270,8 +270,14 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w)
u32 nlen;
es->name = sym_to_str(c, s->name, &nlen);
es->name_len = nlen;
- es->sh_type = sec_sem_to_elf(s->sem);
+ /* Honor format-specific overrides preserved by the reader for
+ * sh_type/sh_flags bits the canonical SecSem/SecFlag enums
+ * don't model (e.g. SHT_LLVM_ADDRSIG, SHF_EXCLUDE). */
+ es->sh_type = (s->ext_kind == OBJ_EXT_ELF && s->ext_type)
+ ? s->ext_type
+ : sec_sem_to_elf(s->sem);
es->sh_flags = sec_flags_to_elf(s->flags);
+ if (s->ext_kind == OBJ_EXT_ELF) es->sh_flags |= s->ext_flags;
es->sh_addr = 0;
es->sh_addralign = s->align ? s->align : 1;
es->sh_entsize = s->entsize;
diff --git a/src/obj/elf_read.c b/src/obj/elf_read.c
@@ -48,6 +48,15 @@ static void parse_shdr(const u8* p, ShdrRec* out)
/* ---- mappers ---- */
+/* The bits this function maps to SecFlag — anything outside this mask is
+ * treated as opaque and stashed in Section.ext_flags by the caller so the
+ * emitter can write it back unchanged. Examples of bits left over:
+ * SHF_EXCLUDE (0x80000000) on .llvm_addrsig, SHF_COMPRESSED (0x800) on
+ * compressed .debug_*, SHF_INFO_LINK (0x40) on .rela.* sections. */
+#define ELF_KNOWN_FLAGS_MASK \
+ ((u64)(SHF_ALLOC | SHF_EXECINSTR | SHF_WRITE | SHF_TLS | \
+ SHF_MERGE | SHF_STRINGS | SHF_GROUP | SHF_LINK_ORDER))
+
static u16 elf_flags_to_obj(u64 f)
{
u16 r = 0;
@@ -62,8 +71,13 @@ static u16 elf_flags_to_obj(u64 f)
return r;
}
-static u16 elf_type_to_sem(u32 t)
+/* Map ELF sh_type -> SecSem. Sets *known to 1 if the value is one of
+ * the canonical types the cfree model knows about; 0 means the caller
+ * fell through to the SSEM_PROGBITS fallback and should preserve the
+ * raw sh_type via Section.ext_type so emit_elf can write it back. */
+static u16 elf_type_to_sem(u32 t, int* known)
{
+ *known = 1;
switch (t) {
case SHT_PROGBITS: return SSEM_PROGBITS;
case SHT_NOBITS: return SSEM_NOBITS;
@@ -76,7 +90,7 @@ static u16 elf_type_to_sem(u32 t)
case SHT_FINI_ARRAY: return SSEM_FINI_ARRAY;
case SHT_PREINIT_ARRAY: return SSEM_PREINIT_ARRAY;
case SHT_GROUP: return SSEM_GROUP;
- default: return SSEM_PROGBITS;
+ default: *known = 0; return SSEM_PROGBITS;
}
}
@@ -241,7 +255,8 @@ ObjBuilder* read_elf(Compiler* c, const char* name,
Sym sym = pool_intern(c->global, nm, nlen);
u16 sec_kind = elf_kind_from_name(nm, nlen, sh->sh_flags, sh->sh_type);
- u16 sec_sem = elf_type_to_sem(sh->sh_type);
+ int type_known;
+ u16 sec_sem = elf_type_to_sem(sh->sh_type, &type_known);
u16 flags = elf_flags_to_obj(sh->sh_flags);
u32 align = sh->sh_addralign ? (u32)sh->sh_addralign : 1;
@@ -253,6 +268,17 @@ ObjBuilder* read_elf(Compiler* c, const char* name,
"read_elf: obj_section_ex failed for '%s'", nm);
elf_to_obj[i] = id;
+ /* Preserve format-specific bits the canonical SecSem/SecFlag
+ * mapping can't represent so emit_elf can write them back
+ * verbatim. ext_type only set when the sh_type fell through
+ * to the "unknown" path. */
+ u32 leftover = (u32)(sh->sh_flags & ~ELF_KNOWN_FLAGS_MASK);
+ if (!type_known || leftover) {
+ obj_section_set_ext(ob, id, OBJ_EXT_ELF,
+ type_known ? 0 : sh->sh_type,
+ leftover);
+ }
+
/* Body bytes. */
if (sh->sh_type == SHT_NOBITS) {
obj_reserve_bss(ob, id, (u32)sh->sh_size, align);
diff --git a/src/obj/obj.c b/src/obj/obj.c
@@ -210,6 +210,16 @@ void obj_section_set_align(ObjBuilder* ob, ObjSecId id, u32 align)
void obj_section_set_group(ObjBuilder* ob, ObjSecId id, ObjGroupId gid)
{ if (id != OBJ_SEC_NONE && id < ob->nsections) ob->sections[id].group_id = gid; }
+void obj_section_set_ext(ObjBuilder* ob, ObjSecId id, ObjExtKind ek,
+ u32 ext_type, u32 ext_flags)
+{
+ if (id == OBJ_SEC_NONE || id >= ob->nsections) return;
+ Section* s = &ob->sections[id];
+ s->ext_kind = (u16)ek;
+ s->ext_type = ext_type;
+ s->ext_flags = ext_flags;
+}
+
void obj_write(ObjBuilder* ob, ObjSecId id, const void* data, size_t n)
{
if (id == OBJ_SEC_NONE || id >= ob->nsections) return;
diff --git a/src/obj/obj.h b/src/obj/obj.h
@@ -133,6 +133,15 @@ typedef struct Section {
u32 info; /* section-format dependent, typed by sem/ext_kind */
ObjGroupId group_id; /* OBJ_GROUP_NONE if not in a COMDAT/group */
u32 bss_size; /* nonzero only for SEC_BSS */
+ /* Format-specific raw section type (ELF sh_type, COFF Characteristics
+ * subfield, etc.). Set by .o readers when the canonical SecSem
+ * mapping is lossy — e.g., SHT_LLVM_ADDRSIG (0x6FFF4C03) and
+ * SHT_ARM_ATTRIBUTES (0x70000003) collapse to SSEM_PROGBITS but
+ * the emitter must write back the original value to round-trip.
+ * Zero means "no override; derive from sem". */
+ u32 ext_type;
+ u32 ext_flags; /* same idea for format-specific sh_flags bits
+ not represented in SecFlag (e.g. SHF_EXCLUDE) */
Buf bytes;
} Section;
@@ -198,6 +207,10 @@ ObjSecId obj_section_ex(ObjBuilder*, Sym name, SecKind, SecSem, u16 flags,
void obj_section_set_flags(ObjBuilder*, ObjSecId, u16 flags);
void obj_section_set_align(ObjBuilder*, ObjSecId, u32 align);
void obj_section_set_group(ObjBuilder*, ObjSecId, ObjGroupId);
+/* Set format-specific raw sh_type/sh_flags overrides (see Section.ext_type
+ * comment). Zero ext_type means "no override". */
+void obj_section_set_ext(ObjBuilder*, ObjSecId, ObjExtKind,
+ u32 ext_type, u32 ext_flags);
void obj_write (ObjBuilder*, ObjSecId section_id, const void* data, size_t n);
u8* obj_reserve(ObjBuilder*, ObjSecId section_id, size_t n);
void obj_reserve_bss(ObjBuilder*, ObjSecId section_id, u32 size, u32 align);
diff --git a/test/elf/normalize.py b/test/elf/normalize.py
@@ -70,16 +70,11 @@ _RELA_LINE = re.compile(
# meaningful. Each entry here is paired with a comment in the C
# implementation noting why; remove from this set when the model is
# extended.
-_DROP_SHDR_NAMES = {
- # SHT_LLVM_ADDRSIG (0x6FFF4C03) + SHF_EXCLUDE (0x80000000) — LLVM
- # address-significance hint; cfree collapses unknown sh_types to
- # SSEM_PROGBITS and has no SF_EXCLUDE in the SecFlag enum.
- ".llvm_addrsig",
- # SHT_ARM_ATTRIBUTES (0x70000003 = LOPROC+0x3) — ARM build-attribute
- # table. Same root cause: cfree collapses unknown sh_types to
- # SSEM_PROGBITS so the sh_type field doesn't round-trip yet.
- ".ARM.attributes",
-}
+_DROP_SHDR_NAMES = set()
+# When the cfree data model can't preserve a section across roundtrip,
+# add its name here with a comment pointing at the underlying gap.
+# Sections previously listed (.llvm_addrsig, .ARM.attributes) now
+# round-trip via Section.ext_type / Section.ext_flags.
def _normalize_shdr(line):