commit faac4a4e8df891391b8695fe464799333c1c5754
parent 7ef74a962f314f7d01416856d4484aff2ed07079
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 29 May 2026 23:07:11 -0700
asm: emit .bss as NOBITS (close the last round-trip skip)
The standalone assembler created .bss via obj_section (SSEM_PROGBITS) and
filled it with real zero bytes via emit_fill, tracking position by byte
count. So a zero-init global landed at offset 0 in an SHT_PROGBITS section,
and the round-tripped .bss loaded read-only in the JIT image — a store to
the global faulted (L0/L1 passed, L2 aborted: roundtrip/glob_bss_write).
Match codegen: the assembler now creates .bss as SSEM_NOBITS, and obj_write
/obj_pos track the bss_size cursor for a NOBITS section (reserve space,
store nothing) instead of a byte buffer the ELF emitter drops. So a .zero
fill and label offsets advance bss_size, and the section emits SHT_NOBITS.
Codegen is unaffected — it reserves BSS via obj_reserve_bss and its own
counter, never obj_write/obj_pos on a BSS section.
Round-trip aa64 now 858/0/0 (was 852/1-skip). link, elf, macho, smoke
x64/rv64 green.
Diffstat:
4 files changed, 37 insertions(+), 18 deletions(-)
diff --git a/doc/ASM_ROUNDTRIP_TESTING.md b/doc/ASM_ROUNDTRIP_TESTING.md
@@ -12,9 +12,8 @@ unary, conversions (incl. bitcast), loads/stores of every width, control flow,
switch (compare-chain + jump-table), indirect/recursive/stack-arg calls,
aggregates (struct by-val/by-ref, bitfields, unions), globals/static-locals, and
atomics (RMW / compare-exchange via the exclusive-monitor sequence) — and the
-round-trip passes all three lanes at `-O0` and `-O1`: **852 lane-checks pass,
-1 skip** (the lone skip, `glob_bss_write`, is the assembler `.bss`-NOBITS
-follow-up below). L0+L1 are wired into the default `make test` via
+round-trip passes all three lanes at `-O0` and `-O1`: **858 lane-checks pass,
+0 skip**. L0+L1 are wired into the default `make test` via
`test-asm-roundtrip`; L2 stays opt-in (`test-asm-roundtrip-exec`, native arch).
### Implemented so far (aa64)
@@ -37,6 +36,12 @@ follow-up below). L0+L1 are wired into the default `make test` via
unscaled family (`sturb`/`ldurb`/`sturh`/`ldurh`/`ldursb`/`ldursh`/`ldursw`);
the disassembler decodes the signed unscaled loads (keying Wt/Xt on opc). This
unblocked every FP spill and conversion case.
+- **Assembler `.bss` NOBITS (DONE).** The standalone assembler emitted `.bss` as
+ `SHT_PROGBITS` (a zero-init global store then faulted in the JIT image). The
+ assembler now creates `.bss` as `SSEM_NOBITS`, and `obj_write`/`obj_pos` track
+ the `bss_size` cursor for a NOBITS section, so `.zero`/labels reserve space
+ rather than writing a byte buffer the emitters drop. (Codegen is unaffected —
+ it never `obj_write`s/`obj_pos`es a BSS section.) Closed the last corpus skip.
- **Exclusive / acquire-release atomic decode (DONE).** The assembler already
encoded `ldxr`/`ldaxr`/`stxr`/`stlxr`/`ldar`/`stlr` (+ b/h), but the
disassembler rendered them `.inst`, so the atomic RMW sequence codegen emits
@@ -97,17 +102,6 @@ the baseline (`bash test/asm/symmetry.sh --update`).
### Remaining (tracked here)
-- **Assembler `.bss` is PROGBITS, not NOBITS (the one corpus skip).** `cc -S`
- renders a zero-init global as `.section .bss` + `.zero N`; `as` writes real
- zero bytes and tracks position by byte count, so the symbol lands at offset 0
- and the section emits `SHT_PROGBITS`. The round-tripped `.bss` then loads
- read-only in the JIT image and a store faults (L0/L1 pass, L2 aborts —
- `roundtrip/glob_bss_write`). Fix: NOBITS position-tracking in the assembler —
- a `SEC_BSS`/`SSEM_NOBITS` section's symbol offsets and `.zero`/`.skip`/`.align`
- must advance `bss_size` instead of `obj_write`ing bytes (the obj layer already
- treats `SEC_BSS` specially in `obj_align_to`; `obj_pos`/`m_emit_fill`/
- `process_label` need the matching NOBITS path). `glob_rw` covers the
- global-write path via a `.data` global meanwhile.
- **FP register-offset + 128-bit `q` decode.** The assembler now *encodes* FP
register-offset (`str d0,[x,x,lsl#3]`) and `q` ldr/str, but the disassembler
decodes neither (renders `.inst`). Codegen emits neither for scalar C (FP
diff --git a/src/asm/asm.c b/src/asm/asm.c
@@ -145,8 +145,17 @@ static int starts_with(AsmDriver* d, Sym s, const char* prefix) {
static ObjSecId ensure_section(AsmDriver* d, Sym name, SecKind kind, u16 flags,
u32 align) {
ObjSecId* hit = SymSecMap_get(&d->sec_map, name);
+ ObjSecId id;
if (hit) return *hit;
- ObjSecId id = obj_section(d->ob, name, kind, flags, align);
+ /* A .bss section is NOBITS: it stores no bytes, only a size. Create it that
+ * way (codegen does the same via obj_section_ex) so the ELF emitter writes
+ * SHT_NOBITS and `.zero`/labels track bss_size, not a byte buffer — matching
+ * `cc -c` so the round-tripped object isn't a writable-but-loaded .bss. */
+ if (kind == SEC_BSS)
+ id = obj_section_ex(d->ob, name, kind, SSEM_NOBITS, flags, align, 0,
+ OBJ_SEC_NONE, 0);
+ else
+ id = obj_section(d->ob, name, kind, flags, align);
SymSecMap_set(&d->sec_map, name, id);
return id;
}
diff --git a/src/obj/obj.c b/src/obj/obj.c
@@ -412,11 +412,27 @@ void obj_section_set_ext(ObjBuilder* ob, ObjSecId id, ObjExtKind ek,
s->ext_flags = ext_flags;
}
+/* A NOBITS section (.bss / .tbss) stores no bytes — only a size. decl.c and
+ * obj_align_to already treat SEC_BSS this way regardless of sem; obj_write and
+ * obj_pos must agree so the MCEmitter path (the standalone assembler's
+ * `.zero`/`.skip` fills and label positions) advances and reports the bss_size
+ * cursor instead of a byte buffer that the emitters then ignore. Codegen never
+ * writes/positions a BSS section through these (it uses obj_reserve_bss and its
+ * own counter), so this only affects the assembler's path. */
+static int sec_is_nobits(const Section* s) {
+ return s->sem == SSEM_NOBITS || s->kind == SEC_BSS;
+}
+
void obj_write(ObjBuilder* ob, ObjSecId id, const void* data, size_t n) {
Section* s;
if (id == OBJ_SEC_NONE) return;
s = Sections_at(&ob->sections, id);
- if (s) buf_write(&s->bytes, data, n);
+ if (!s) return;
+ if (sec_is_nobits(s)) {
+ s->bss_size += (u32)n; /* reserve zero-fill space; store nothing */
+ return;
+ }
+ buf_write(&s->bytes, data, n);
}
u8* obj_reserve(ObjBuilder* ob, ObjSecId id, size_t n) {
@@ -468,7 +484,8 @@ u32 obj_pos(ObjBuilder* ob, ObjSecId id) {
Section* s;
if (id == OBJ_SEC_NONE) return 0;
s = Sections_at(&ob->sections, id);
- return s ? buf_pos(&s->bytes) : 0;
+ if (!s) return 0;
+ return sec_is_nobits(s) ? s->bss_size : buf_pos(&s->bytes);
}
void obj_patch(ObjBuilder* ob, ObjSecId id, u32 ofs, const void* data,
diff --git a/test/asm/roundtrip/glob_bss_write.skip b/test/asm/roundtrip/glob_bss_write.skip
@@ -1 +0,0 @@
-assembler .bss is PROGBITS, not NOBITS: cc -S emits `.section .bss` + `.zero N`, but `as` writes real zero bytes and tracks position by byte count, so the symbol lands at offset 0 and the section emits SHT_PROGBITS. The round-tripped .bss loads read-only in the JIT image and a store faults. L0/L1 pass; L2 aborts. Needs NOBITS position-tracking in the assembler (symbol offsets + .zero/.skip/.align advance bss_size). Tracked in doc/ASM_ROUNDTRIP_TESTING.md. glob_rw covers the write path via a .data global.