commit cd128bb839e5be267adabb140be3944872123cb1
parent 513e71590f9a23086a8978e67dbac6f6ac40fea8
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 11 May 2026 13:50:25 -0700
obj: coalesce same-named sections; add obj_align_to for intra-section padding
obj_section now finds-or-creates by (name, kind, sem=PROGBITS), merging
flags (OR) and align (max). Collapses the per-literal fan-out of .rodata
from FP/string literals and const initializers, and per-decl .data/.bss
from decl.c, into one section each.
Coalescing means placements no longer start at a fresh offset 0, so
add obj_align_to(sec, align): zero-pads PROGBITS / bumps bss_size for
NOBITS|SEC_BSS, returns the aligned offset, and lifts s->align. Callers
that previously assumed "fresh section starts aligned" now route their
base through it: decl.c (.bss / .data, including attr-pinned sections),
parse.c (.rodata const init / .bss / .data init), and the three arch
load_const paths.
Diffstat:
7 files changed, 93 insertions(+), 28 deletions(-)
diff --git a/src/arch/aarch64.c b/src/arch/aarch64.c
@@ -1212,15 +1212,14 @@ static void aa_load_const(CGTarget* t, Operand dst, ConstBytes cb) {
compiler_panic(t->c, a->loc, "aarch64 load_const: only FP supported in v1");
}
- /* Find or create .rodata. */
+ /* Find or create .rodata. obj_align_to bumps the section's recorded
+ * align as a side effect of placement, so we pass 1 here. */
Sym ro_name = pool_intern_cstr(t->c->global, ".rodata");
- ObjSecId ro = obj_section(t->obj, ro_name, SEC_RODATA, SF_ALLOC,
- cb.align ? cb.align : 4);
+ ObjSecId ro = obj_section(t->obj, ro_name, SEC_RODATA, SF_ALLOC, 1u);
u32 cur_section = t->mc->section_id;
t->mc->set_section(t->mc, ro);
- t->mc->emit_align(t->mc, cb.align ? cb.align : 4, 0);
- u32 ro_off = t->mc->pos(t->mc);
+ u32 ro_off = obj_align_to(t->obj, ro, cb.align ? cb.align : 4);
t->mc->emit_bytes(t->mc, cb.bytes, cb.size);
/* Local symbol pointing at the literal. */
diff --git a/src/arch/rv64.c b/src/arch/rv64.c
@@ -958,13 +958,11 @@ static void rv_load_const(CGTarget* t, Operand dst, ConstBytes cb) {
compiler_panic(t->c, a->loc, "rv64 load_const: only FP supported in v1");
}
Sym ro_name = pool_intern_cstr(t->c->global, ".rodata");
- ObjSecId ro = obj_section(t->obj, ro_name, SEC_RODATA, SF_ALLOC,
- cb.align ? cb.align : 4);
+ ObjSecId ro = obj_section(t->obj, ro_name, SEC_RODATA, SF_ALLOC, 1u);
u32 cur_section = t->mc->section_id;
t->mc->set_section(t->mc, ro);
- t->mc->emit_align(t->mc, cb.align ? cb.align : 4, 0);
- u32 ro_off = t->mc->pos(t->mc);
+ u32 ro_off = obj_align_to(t->obj, ro, cb.align ? cb.align : 4);
t->mc->emit_bytes(t->mc, cb.bytes, cb.size);
char namebuf[64];
diff --git a/src/arch/x64.c b/src/arch/x64.c
@@ -1197,13 +1197,11 @@ static void x_load_const(CGTarget* t, Operand dst, ConstBytes cb) {
compiler_panic(t->c, a->loc, "x64 load_const: only FP supported in v1");
Sym ro_name = pool_intern_cstr(t->c->global, ".rodata");
- ObjSecId ro = obj_section(t->obj, ro_name, SEC_RODATA, SF_ALLOC,
- cb.align ? cb.align : 4);
+ ObjSecId ro = obj_section(t->obj, ro_name, SEC_RODATA, SF_ALLOC, 1u);
u32 cur_section = t->mc->section_id;
t->mc->set_section(t->mc, ro);
- t->mc->emit_align(t->mc, cb.align ? cb.align : 4, 0);
- u32 ro_off = t->mc->pos(t->mc);
+ u32 ro_off = obj_align_to(t->obj, ro, cb.align ? cb.align : 4);
t->mc->emit_bytes(t->mc, cb.bytes, cb.size);
char namebuf[64];
diff --git a/src/decl/decl.c b/src/decl/decl.c
@@ -177,13 +177,16 @@ void decl_define_object(DeclTable* t, DeclId id, u64 size, u32 align,
}
if (d->section_id != OBJ_SEC_NONE) {
/* Attribute-pinned section: use it for both BSS-style (all-zero) and
- * initialized layouts. The section's kind was set at creation time. */
+ * initialized layouts. The section's kind was set at creation time.
+ * Multiple decls can target the same attribute section, so place
+ * each one at the aligned tail rather than at offset 0. */
sec_id = d->section_id;
if (!has_nonzero) {
- obj_reserve_bss(t->ob, sec_id, (u32)size, align ? align : 1u);
- obj_symbol_define(t->ob, d->obj_sym, sec_id, 0, size);
+ base = obj_align_to(t->ob, sec_id, align ? align : 1u);
+ obj_reserve_bss(t->ob, sec_id, base + (u32)size, align ? align : 1u);
+ obj_symbol_define(t->ob, d->obj_sym, sec_id, base, size);
} else {
- base = obj_pos(t->ob, sec_id);
+ base = obj_align_to(t->ob, sec_id, align ? align : 1u);
obj_reserve(t->ob, sec_id, size);
obj_symbol_define(t->ob, d->obj_sym, sec_id, base, size);
for (i = 0; i < ninit; ++i) {
@@ -221,8 +224,9 @@ void decl_define_object(DeclTable* t, DeclId id, u64 size, u32 align,
sec_name = pool_intern_cstr(t->c->global, ".bss");
sec_id = obj_section(t->ob, sec_name, SEC_BSS, SF_ALLOC | SF_WRITE,
align ? align : 1u);
- obj_reserve_bss(t->ob, sec_id, (u32)size, align ? align : 1u);
- obj_symbol_define(t->ob, d->obj_sym, sec_id, 0, size);
+ base = obj_align_to(t->ob, sec_id, align ? align : 1u);
+ obj_reserve_bss(t->ob, sec_id, base + (u32)size, align ? align : 1u);
+ obj_symbol_define(t->ob, d->obj_sym, sec_id, base, size);
if (d->flags & DF_USED) {
const Section* s = obj_section_get(t->ob, sec_id);
if (s) obj_section_set_flags(t->ob, sec_id, (u16)(s->flags | SF_RETAIN));
@@ -233,7 +237,7 @@ void decl_define_object(DeclTable* t, DeclId id, u64 size, u32 align,
sec_id = obj_section(t->ob, sec_name, SEC_DATA, SF_ALLOC | SF_WRITE,
align ? align : 1u);
{
- base = obj_pos(t->ob, sec_id);
+ base = obj_align_to(t->ob, sec_id, align ? align : 1u);
obj_reserve(t->ob, sec_id, size);
obj_symbol_define(t->ob, d->obj_sym, sec_id, base, size);
for (i = 0; i < ninit; ++i) {
diff --git a/src/obj/obj.c b/src/obj/obj.c
@@ -104,6 +104,38 @@ int obj_get_elf_e_flags(const ObjBuilder* ob, u32* out) {
ObjSecId obj_section(ObjBuilder* ob, Sym name, SecKind kind, u16 flags,
u32 align) {
+ /* Find-or-create by (name, kind, sem=PROGBITS). Repeated calls for the
+ * same logical section — e.g. one .rodata per FP/string literal, or one
+ * .data per static initializer — collapse onto a single Section and
+ * accumulate bytes into it instead of emitting a fan-out of identically-
+ * named output sections. Merge align (max) and flags (union) so a
+ * stricter requirement from a later caller wins. */
+ u32 n = Sections_count(&ob->sections);
+ for (u32 i = 1; i < n; ++i) {
+ Section* s = Sections_at(&ob->sections, i);
+ if (s && s->name == name && s->kind == (u16)kind &&
+ s->sem == SSEM_PROGBITS) {
+ if (align > s->align) s->align = align;
+ s->flags = (u16)(s->flags | flags);
+ /* Pad to align so the next obj_reserve / obj_write lands at an
+ * offset that satisfies this caller's alignment. Without this
+ * each contribution is laid out at whatever offset the prior
+ * write happened to leave, so a 4-byte global following a 6-byte
+ * string lands at .data+6 — and any LDST32 reloc against the
+ * containing section breaks at link time. */
+ u32 a = align ? align : 1u;
+ if (a > 1u) {
+ u32 cur = buf_pos(&s->bytes);
+ u32 mis = cur & (a - 1u);
+ if (mis) {
+ u32 pad = a - mis;
+ u8* dst = buf_reserve(&s->bytes, pad);
+ if (dst) memset(dst, 0, pad);
+ }
+ }
+ return (ObjSecId)i;
+ }
+ }
return obj_section_ex(ob, name, kind, SSEM_PROGBITS, flags, align, 0,
OBJ_SEC_NONE, 0);
}
@@ -177,6 +209,35 @@ void obj_reserve_bss(ObjBuilder* ob, ObjSecId id, u32 size, u32 align) {
if (align) s->align = align;
}
+u32 obj_align_to(ObjBuilder* ob, ObjSecId id, u32 align) {
+ Section* s;
+ u32 a, cur, base, pad;
+ if (id == OBJ_SEC_NONE) return 0;
+ s = Sections_at(&ob->sections, id);
+ if (!s) return 0;
+ a = align ? align : 1u;
+ /* Treat SEC_BSS like NOBITS even when sem is the default PROGBITS —
+ * decl.c creates .bss via the simple obj_section, but emit_macho /
+ * emit_elf both route SEC_BSS through the zerofill path regardless
+ * of sem, so the byte buf is ignored on output and only bss_size
+ * matters. */
+ if (s->sem == SSEM_NOBITS || s->kind == SEC_BSS) {
+ base = (s->bss_size + (a - 1u)) & ~(a - 1u);
+ s->bss_size = base;
+ if (a > s->align) s->align = a;
+ return base;
+ }
+ cur = buf_pos(&s->bytes);
+ base = (cur + (a - 1u)) & ~(a - 1u);
+ pad = base - cur;
+ if (pad) {
+ u8* p = buf_reserve(&s->bytes, pad);
+ if (p) memset(p, 0, pad);
+ }
+ if (a > s->align) s->align = a;
+ return base;
+}
+
u32 obj_pos(ObjBuilder* ob, ObjSecId id) {
Section* s;
if (id == OBJ_SEC_NONE) return 0;
diff --git a/src/obj/obj.h b/src/obj/obj.h
@@ -337,6 +337,13 @@ void obj_section_set_ext(ObjBuilder*, ObjSecId, ObjExtKind, u32 ext_type,
void obj_write(ObjBuilder*, ObjSecId section_id, const void* data, size_t n);
u8* obj_reserve(ObjBuilder*, ObjSecId section_id, size_t n);
void obj_reserve_bss(ObjBuilder*, ObjSecId section_id, u32 size, u32 align);
+/* Pad `section_id` to `align`, returning the resulting offset. For
+ * PROGBITS sections this writes zero bytes; for NOBITS it bumps
+ * bss_size. Callers that share a section across multiple symbols use
+ * this to ensure each placement starts at the symbol's required
+ * alignment, since dedup of obj_section means a placement isn't
+ * automatically aligned just because the section's own align is set. */
+u32 obj_align_to(ObjBuilder*, ObjSecId section_id, u32 align);
u32 obj_pos(ObjBuilder*, ObjSecId section_id);
void obj_patch(ObjBuilder*, ObjSecId section_id, u32 ofs, const void* data,
size_t n);
diff --git a/src/parse/parse.c b/src/parse/parse.c
@@ -5077,10 +5077,10 @@ static void define_static_object(Parser* p, ObjSymId sym, const Type* var_ty,
override_sec = pick_object_section(p, quals, has_nonzero);
if (override_sec != OBJ_SEC_NONE) {
- /* .rodata path: emit bytes directly here so we can pin the section. */
- u32 base = obj_pos(ob, override_sec);
- obj_section_set_align(ob, override_sec,
- align > 1u ? align : 1u);
+ /* .rodata path: emit bytes directly here so we can pin the section.
+ * obj_section dedupes by name, so multiple const inits share one
+ * .rodata — align each placement to the object's own requirement. */
+ u32 base = obj_align_to(ob, override_sec, align > 1u ? align : 1u);
{
u8* dst = obj_reserve(ob, override_sec, size);
if (dst && buf) memcpy(dst, buf, size);
@@ -5105,10 +5105,8 @@ static void define_static_object(Parser* p, ObjSymId sym, const Type* var_ty,
ObjSecId sec = obj_section_ex(ob, sname, SEC_BSS, SSEM_NOBITS,
SF_ALLOC | SF_WRITE,
align ? align : 1u, 0, OBJ_SEC_NONE, 0);
- const Section* sinfo = obj_section_get(ob, sec);
- u32 prev_size = sinfo ? sinfo->bss_size : 0u;
u32 a = align ? align : 1u;
- u32 base = (prev_size + (a - 1u)) & ~(a - 1u);
+ u32 base = obj_align_to(ob, sec, a);
obj_reserve_bss(ob, sec, base + size, a);
obj_symbol_define(ob, sym, sec, base, size);
return;
@@ -5118,7 +5116,7 @@ static void define_static_object(Parser* p, ObjSymId sym, const Type* var_ty,
Sym sname = pool_intern_cstr(p->pool, ".data");
ObjSecId sec = obj_section(ob, sname, SEC_DATA, SF_ALLOC | SF_WRITE,
align ? align : 1u);
- u32 base = obj_pos(ob, sec);
+ u32 base = obj_align_to(ob, sec, align ? align : 1u);
u8* dst = obj_reserve(ob, sec, size);
if (dst) memcpy(dst, buf, size);
obj_symbol_define(ob, sym, sec, base, size);