commit 68782ef83e884f5df4fdc516ad044a14cc6f7d10
parent 0980aa9bd5b779ecfa4c0e35d2f5c5b18a4306b6
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 30 May 2026 11:08:46 -0700
asm: round-trip SEC_OTHER named sections; toy /ld exec lane
cc -S dropped every SEC_OTHER section (sec_directive returned NULL), so a global
in an explicit named section -- e.g. .section(".rodata.toy.merge") with
merge/strings/retain attrs -- lost its label and bytes; the re-assembled object
then carried a dangling reference and failed to link.
Emit (src/api/asm_emit.c): emit_section_header writes SEC_OTHER as
`.section <name>, "<flags>", @<type>[, <entsize>]` in GNU-as syntax, mapping
SF_* -> a/w/x/M/S/T/R. Canonical kinds emit their fixed directive unchanged.
Assemble (src/asm/asm.c): the .section parser reads the optional
"flags"/@type/entsize operands and reconstructs flags/kind/entsize (was skipped
to EOL). Adds obj_section_set_entsize (src/obj/obj.{c,h}). The round-tripped
object's section table is byte-identical to a direct cc -c object.
test/asm/roundtrip_toy.sh: add a native /ld lane (cfree ld -> native exec,
exit-code oracle) alongside the existing /run JIT lane; closes the
118_decl_extra_attrs skip. 141_threadlocal_mutate (new TLS case from the interp
merge) is quarantined in SKIP -- a separate, pre-existing cc -S TLS-symbolization
gap (unsymbolized adrp x,0x0), not a named-section issue.
Verified from a clean build at this base:
roundtrip-toy 624 pass / 0 fail / 1 skip (141)
test-asm-roundtrip-exec 858/0/0, test-asm, test-isa,
test-toy 1338/0/24, diff-llvm agrees (271 agree / 32 reloc-equiv / 0 differ).
Diffstat:
6 files changed, 175 insertions(+), 40 deletions(-)
diff --git a/doc/ASM_ROUNDTRIP_TESTING.md b/doc/ASM_ROUNDTRIP_TESTING.md
@@ -150,11 +150,14 @@ and four classes of gap the hand corpus never reached:
`sym+0` and `br` dispatched into hyperspace. It now writes the addend into the
data like codegen (harmless on ELF, where RELA overwrites it with S+A).
-The lone remaining skip (`118_decl_extra_attrs`): a global with an explicit
-`__attribute__((section(...)))` + merge attributes lands in a `SEC_OTHER`
-section that `cc -S` doesn't emit. Round-tripping arbitrary named sections with
-their flags (without also emitting `.eh_frame`/debug) is a section-attribute
-feature beyond this lane's scope. Also still open: `smulh`/`umulh` (and the
+Named sections now round-trip (closed `118_decl_extra_attrs`): a global in an
+explicit `__attribute__((section(...)))` with merge/strings/retain attributes
+lands in a `SEC_OTHER` section, which `cc -S` now emits as
+`.section <name>, "<flags>", @<type>[, <entsize>]` (GNU-as) and `as`
+reconstructs — the round-tripped section table matches a direct `cc -c` object.
+The remaining skip is `141_threadlocal_mutate`, a new case blocked on TLS
+symbolization (`cc -S` emits an unsymbolized `adrp x,0x0` for a thread-local
+access; tracked separately). Also still open: `smulh`/`umulh` (and the
`*L` long multiplies) DP3 *decode* — correctness is restored by the `.inst`
fix, but `-S` still shows `.inst` for them.
diff --git a/src/api/asm_emit.c b/src/api/asm_emit.c
@@ -196,21 +196,69 @@ static CfreeStatus emit_size_directives(Writer* w, Compiler* c, ObjBuilder* ob,
return st;
}
-static const char* sec_directive(const Section* sec) {
+/* GNU-as flag letters for a named (SEC_OTHER) section's `, "flags"` operand.
+ * The assembler's .section parser (src/asm/asm.c) is the inverse mapping. */
+static void w_secflags(Writer* w, u16 flags) {
+ if (flags & SF_ALLOC) w_str(w, "a");
+ if (flags & SF_WRITE) w_str(w, "w");
+ if (flags & SF_EXEC) w_str(w, "x");
+ if (flags & SF_MERGE) w_str(w, "M");
+ if (flags & SF_STRINGS) w_str(w, "S");
+ if (flags & SF_TLS) w_str(w, "T");
+ if (flags & SF_RETAIN) w_str(w, "R");
+}
+
+/* Emit the section directive line for `sec`, with indentation and trailing
+ * newline. Returns 0 to signal "skip this section" for kinds cc -S does not
+ * round-trip (TLS variants, SEC_DEBUG) — the caller drops the section, matching
+ * the prior sec_directive()==NULL behavior.
+ *
+ * Canonical kinds emit their fixed directive unchanged. SEC_OTHER (a global in
+ * an explicitly-named section, e.g. __attribute__((section(...)))) emits the
+ * real name plus its flags/type/entsize in GNU-as syntax, so the label and
+ * bytes survive re-assembly. Previously such sections were dropped, leaving a
+ * dangling reference (see test/asm/roundtrip_toy.sh case 118). */
+static int emit_section_header(Writer* w, Compiler* c, const Section* sec) {
switch (sec->kind) {
case SEC_TEXT:
- return ".text";
+ w_str(w, " .text");
+ w_newline(w);
+ return 1;
case SEC_RODATA:
- if (sec->flags & SF_TLS) return NULL;
- return ".section\t.rodata";
+ if (sec->flags & SF_TLS) return 0;
+ w_str(w, " .section\t.rodata");
+ w_newline(w);
+ return 1;
case SEC_DATA:
- if (sec->flags & SF_TLS) return NULL;
- return ".section\t.data";
+ if (sec->flags & SF_TLS) return 0;
+ w_str(w, " .section\t.data");
+ w_newline(w);
+ return 1;
case SEC_BSS:
- if (sec->flags & SF_TLS) return NULL;
- return ".section\t.bss";
+ if (sec->flags & SF_TLS) return 0;
+ w_str(w, " .section\t.bss");
+ w_newline(w);
+ return 1;
+ case SEC_OTHER: {
+ Slice nm;
+ if (sec->flags & SF_TLS) return 0;
+ nm = pool_slice(c->global, sec->name);
+ if (nm.len == 0) return 0;
+ w_str(w, " .section\t");
+ cfree_writer_write(w, nm.s, nm.len);
+ w_str(w, ", \"");
+ w_secflags(w, sec->flags);
+ w_str(w, "\", ");
+ w_str(w, sec->sem == SSEM_NOBITS ? "@nobits" : "@progbits");
+ if ((sec->flags & SF_MERGE) || sec->entsize) {
+ w_str(w, ", ");
+ w_dec(w, (u64)(sec->entsize ? sec->entsize : 1));
+ }
+ w_newline(w);
+ return 1;
+ }
default:
- return NULL;
+ return 0;
}
}
@@ -775,7 +823,6 @@ CfreeStatus cfree_obj_builder_emit_asm(CfreeObjBuilder* builder,
for (i = 1; i < nsec; ++i) {
const Section* sec = obj_section_get(ob, (ObjSecId)i);
- const char* dir;
SymLabel* labels;
u32 nlabels, total, off, li;
ArchDisasm* dasm;
@@ -788,15 +835,10 @@ CfreeStatus cfree_obj_builder_emit_asm(CfreeObjBuilder* builder,
EmitCtx ctx;
if (!sec || sec->removed) continue;
- dir = sec_directive(sec);
- if (!dir) continue;
+ if (!emit_section_header(w, c, sec)) continue;
labels = collect_labels(c, ob, (ObjSecId)i, &nlabels);
- w_str(w, " ");
- w_str(w, dir);
- w_newline(w);
-
if (sec->align > 1) {
w_str(w, " .align ");
w_dec(w, (u64)sec->align);
diff --git a/src/asm/asm.c b/src/asm/asm.c
@@ -693,10 +693,64 @@ static void do_directive(AsmDriver* d, Sym name) {
}
SecKind kind = SEC_OTHER;
u16 flags = 0;
+ u32 entsize = 0;
+ int have_flags = 0;
+
+ /* Optional GNU-as operands: , "flags" [, @type [, entsize]]. The emitter
+ * (src/api/asm_emit.c) writes these for SEC_OTHER named sections; parse
+ * them back so a global's section flags/entsize round-trip faithfully. */
+ if (asm_driver_eat_comma(d)) {
+ AsmTok ft = d_peek(d);
+ if (ft.kind == ASM_TOK_STR) {
+ size_t fn = 0;
+ const char* fp = asm_str(d, ft.spelling, &fn);
+ size_t fi;
+ for (fi = 0; fp && fi < fn; ++fi) {
+ switch (fp[fi]) {
+ case 'a': flags |= SF_ALLOC; break;
+ case 'w': flags |= SF_WRITE; break;
+ case 'x': flags |= SF_EXEC; break;
+ case 'M': flags |= SF_MERGE; break;
+ case 'S': flags |= SF_STRINGS; break;
+ case 'T': flags |= SF_TLS; break;
+ case 'R': flags |= SF_RETAIN; break;
+ default: break; /* surrounding quotes / unknown letters */
+ }
+ }
+ have_flags = 1;
+ (void)d_next(d);
+ if (asm_driver_eat_comma(d)) {
+ AsmTok ty = d_peek(d);
+ if (tok_is_punct(ty, '@')) {
+ (void)d_next(d);
+ (void)d_next(d); /* the @type ident (progbits/nobits) */
+ } else if (ty.kind == ASM_TOK_IDENT) {
+ (void)d_next(d);
+ }
+ if (asm_driver_eat_comma(d)) {
+ AsmTok es = d_peek(d);
+ if (es.kind == ASM_TOK_NUM) {
+ entsize = (u32)lit_to_i64(d, es.spelling);
+ (void)d_next(d);
+ }
+ }
+ }
+ }
+ }
+
{
size_t nn = 0;
const char* p = asm_str(d, sname, &nn);
- if (p) {
+ if (have_flags) {
+ /* Explicit flags: a canonical name keeps its kind; any other name is a
+ * SEC_OTHER named section (matching codegen for section(...) globals). */
+ if (p && nn == 5 && memcmp(p, ".text", 5) == 0) kind = SEC_TEXT;
+ else if (p && nn == 7 && memcmp(p, ".rodata", 7) == 0) kind = SEC_RODATA;
+ else if (p && nn == 5 && memcmp(p, ".data", 5) == 0) kind = SEC_DATA;
+ else if (p && nn == 4 && memcmp(p, ".bss", 4) == 0) kind = SEC_BSS;
+ else kind = SEC_OTHER;
+ } else if (p) {
+ /* No flag string: infer kind+flags from a canonical name prefix. */
if (nn >= 5 && memcmp(p, ".text", 5) == 0) {
kind = SEC_TEXT;
flags = (u16)(SF_ALLOC | SF_EXEC);
@@ -712,9 +766,15 @@ static void do_directive(AsmDriver* d, Sym name) {
}
}
}
- /* Skip optional remainder: flags string, type tag, etc. */
+
+ /* Consume any remaining operands (e.g. ,unique,N or group fields). */
d_skip_to_eol(d);
- set_section(d, sname, kind, flags, 1);
+ {
+ ObjSecId sid = ensure_section(d, sname, kind, flags, 1);
+ if (entsize) obj_section_set_entsize(d->ob, sid, entsize);
+ d->cur_sec = sid;
+ d->mc->set_section(d->mc, sid);
+ }
return;
}
if (sym_eq(d, name, "globl") || sym_eq(d, name, "global")) {
diff --git a/src/obj/obj.c b/src/obj/obj.c
@@ -381,6 +381,11 @@ void obj_section_set_flags(ObjBuilder* ob, ObjSecId id, u16 flags) {
if (s && id != OBJ_SEC_NONE) s->flags = flags;
}
+void obj_section_set_entsize(ObjBuilder* ob, ObjSecId id, u32 entsize) {
+ Section* s = Sections_at(&ob->sections, id);
+ if (s && id != OBJ_SEC_NONE) s->entsize = entsize;
+}
+
void obj_section_set_align(ObjBuilder* ob, ObjSecId id, u32 align) {
Section* s = Sections_at(&ob->sections, id);
if (s && id != OBJ_SEC_NONE) s->align = align ? align : 1;
diff --git a/src/obj/obj.h b/src/obj/obj.h
@@ -401,6 +401,7 @@ ObjSecId obj_section(ObjBuilder*, Sym name, SecKind, u16 flags, u32 align);
ObjSecId obj_section_ex(ObjBuilder*, Sym name, SecKind, SecSem, u16 flags,
u32 align, u32 entsize, u32 link, u32 info);
void obj_section_set_flags(ObjBuilder*, ObjSecId, u16 flags);
+void obj_section_set_entsize(ObjBuilder*, ObjSecId, u32 entsize);
void obj_section_set_align(ObjBuilder*, ObjSecId, u32 align);
void obj_section_set_group(ObjBuilder*, ObjSecId, ObjGroupId);
void obj_section_set_link_info(ObjBuilder*, ObjSecId, ObjSecId link, u32 info);
diff --git a/test/asm/roundtrip_toy.sh b/test/asm/roundtrip_toy.sh
@@ -7,17 +7,18 @@
# test/asm/roundtrip/ set: for every case, compare the DIRECT compile to the
# round-tripped one and require the same exit code —
#
-# direct: cfree run case.toy
-# round-trip: cfree cc -S case.toy | cfree as | cfree run <obj>
+# Two exec lanes over the re-assembled object, both compared to the case's
+# exit-code oracle (test/toy/cases/<name>.expected, default 0):
+# /run: cfree cc -S | cfree as | cfree run <obj> (in-process JIT)
+# /ld: cfree cc -S | cfree as | cfree ld | ./a.out (native link + exec)
#
# Native target (aarch64 macOS here): cfree run propagates main()'s return as
# the process exit, so the oracle is the exit code. This found a real miscompile
# (a multiply-high the disassembler couldn't decode, dropped by `as` until the
# `.inst` fix) that the hand corpus never reached.
#
-# Cases that hit a known `cc -S` symbolizer gap (tentative/common defs, merged
-# strings, computed-goto label-address tables) are listed in SKIP below until
-# those land; the lane stays green and gates regressions. Opt-in.
+# Any case that hits a `cc -S` symbolizer gap can be quarantined in SKIP below
+# so the lane stays green and gates regressions; SKIP is currently empty. Opt-in.
set -u
@@ -28,12 +29,15 @@ WORK="$ROOT/build/test/asm/roundtrip_toy"
OPTS="${CFREE_TEST_OPTS:-O0 O1}"
FILTER="${1:-}"
-# Remaining known gap: a global with an explicit __attribute__((section(...)))
-# plus merge/strings attributes becomes a SEC_OTHER section, which cc -S does
-# not emit (so `as` sees an undefined reference). Round-tripping arbitrary named
-# sections with their flags is a section-attribute feature beyond this lane's
-# scope. See doc/ASM_ROUNDTRIP_TESTING.md.
-SKIP="118_decl_extra_attrs"
+# Named sections now round-trip (closed 118_decl_extra_attrs): cc -S emits the
+# section with its "flags"/@type/entsize in GNU-as syntax and `as` reconstructs
+# it. SKIP quarantines cases blocked on a *separate*, known cc -S symbolizer gap:
+# - 141_threadlocal_mutate: a thread-local access emits an unsymbolized
+# `adrp x, 0x0` (the TLS GOT/descriptor reloc is not yet symbolized in cc -S),
+# which `as` rejects ("adr/adrp: symbol required"). This is TLS symbolization,
+# tracked separately from named-section round-trip. See
+# doc/ASM_ROUNDTRIP_TESTING.md.
+SKIP="141_threadlocal_mutate"
color_red() { printf '\033[31m%s\033[0m' "$1"; }
color_grn() { printf '\033[32m%s\033[0m' "$1"; }
@@ -66,16 +70,36 @@ for src in "$CASES"/*.toy; do
fail=$((fail+1)); failnames+=("$name[-$opt] as: $(head -1 "$w/as.err"|sed 's|.*: ||')")
printf ' %s %s[-%s] as failed: %s\n' "$(color_red FAIL)" "$name" "$opt" "$(head -1 "$w/as.err"|sed 's|.*: ||')"; continue
fi
+ # Lane /run: cfree run JIT-links and executes the object in-process.
"$CFREE" run "$w/rt.o" >"$w/out" 2>"$w/run.err"; rc=$?
if [ -s "$w/run.err" ]; then
- fail=$((fail+1)); failnames+=("$name[-$opt] run: $(head -1 "$w/run.err"|sed 's|.*: ||')")
- printf ' %s %s[-%s] run failed: %s\n' "$(color_red FAIL)" "$name" "$opt" "$(head -1 "$w/run.err"|sed 's|.*: ||')"; continue
+ fail=$((fail+1)); failnames+=("$name[-$opt]/run: $(head -1 "$w/run.err"|sed 's|.*: ||')")
+ printf ' %s %s[-%s]/run failed: %s\n' "$(color_red FAIL)" "$name" "$opt" "$(head -1 "$w/run.err"|sed 's|.*: ||')"
+ elif [ "$rc" -eq "$exp" ]; then
+ pass=$((pass+1))
+ else
+ fail=$((fail+1)); failnames+=("$name[-$opt]/run exit $rc != $exp")
+ printf ' %s %s[-%s]/run exit %d != expected %d\n' "$(color_red FAIL)" "$name" "$opt" "$rc" "$exp"
+ fi
+
+ # Lane /ld: cfree ld links a real native executable; run it via the OS
+ # loader and compare exit. Exercises cfree ld (layout + relocation +
+ # image emit) and a real process exit — none of which the JIT 'run'
+ # path covers. Native host only (no -target is passed).
+ if ! "$CFREE" ld "$w/rt.o" -o "$w/a.out" 2>"$w/ld.err" || [ -s "$w/ld.err" ]; then
+ fail=$((fail+1)); failnames+=("$name[-$opt]/ld: $(head -1 "$w/ld.err"|sed 's|.*: ||')")
+ printf ' %s %s[-%s]/ld failed: %s\n' "$(color_red FAIL)" "$name" "$opt" "$(head -1 "$w/ld.err"|sed 's|.*: ||')"; continue
fi
- if [ "$rc" -eq "$exp" ]; then
+ chmod +x "$w/a.out" 2>/dev/null || true
+ "$w/a.out" >"$w/ldout" 2>"$w/ldrun.err"; ldrc=$?
+ if [ -s "$w/ldrun.err" ]; then
+ fail=$((fail+1)); failnames+=("$name[-$opt]/ld-run: $(head -1 "$w/ldrun.err"|sed 's|.*: ||')")
+ printf ' %s %s[-%s]/ld-run stderr: %s\n' "$(color_red FAIL)" "$name" "$opt" "$(head -1 "$w/ldrun.err"|sed 's|.*: ||')"
+ elif [ "$ldrc" -eq "$exp" ]; then
pass=$((pass+1))
else
- fail=$((fail+1)); failnames+=("$name[-$opt] exit $rc != $exp")
- printf ' %s %s[-%s] exit %d != expected %d\n' "$(color_red FAIL)" "$name" "$opt" "$rc" "$exp"
+ fail=$((fail+1)); failnames+=("$name[-$opt]/ld exit $ldrc != $exp")
+ printf ' %s %s[-%s]/ld exit %d != expected %d\n' "$(color_red FAIL)" "$name" "$opt" "$ldrc" "$exp"
fi
done
done