kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 68782ef83e884f5df4fdc516ad044a14cc6f7d10
parent 0980aa9bd5b779ecfa4c0e35d2f5c5b18a4306b6
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat, 30 May 2026 11:08:46 -0700

asm: round-trip SEC_OTHER named sections; toy /ld exec lane

cc -S dropped every SEC_OTHER section (sec_directive returned NULL), so a global
in an explicit named section -- e.g. .section(".rodata.toy.merge") with
merge/strings/retain attrs -- lost its label and bytes; the re-assembled object
then carried a dangling reference and failed to link.

Emit (src/api/asm_emit.c): emit_section_header writes SEC_OTHER as
`.section <name>, "<flags>", @<type>[, <entsize>]` in GNU-as syntax, mapping
SF_* -> a/w/x/M/S/T/R. Canonical kinds emit their fixed directive unchanged.

Assemble (src/asm/asm.c): the .section parser reads the optional
"flags"/@type/entsize operands and reconstructs flags/kind/entsize (was skipped
to EOL). Adds obj_section_set_entsize (src/obj/obj.{c,h}). The round-tripped
object's section table is byte-identical to a direct cc -c object.

test/asm/roundtrip_toy.sh: add a native /ld lane (cfree ld -> native exec,
exit-code oracle) alongside the existing /run JIT lane; closes the
118_decl_extra_attrs skip. 141_threadlocal_mutate (new TLS case from the interp
merge) is quarantined in SKIP -- a separate, pre-existing cc -S TLS-symbolization
gap (unsymbolized adrp x,0x0), not a named-section issue.

Verified from a clean build at this base:
  roundtrip-toy 624 pass / 0 fail / 1 skip (141)
  test-asm-roundtrip-exec 858/0/0, test-asm, test-isa,
  test-toy 1338/0/24, diff-llvm agrees (271 agree / 32 reloc-equiv / 0 differ).

Diffstat:
Mdoc/ASM_ROUNDTRIP_TESTING.md | 13++++++++-----
Msrc/api/asm_emit.c | 74++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------
Msrc/asm/asm.c | 66+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Msrc/obj/obj.c | 5+++++
Msrc/obj/obj.h | 1+
Mtest/asm/roundtrip_toy.sh | 56++++++++++++++++++++++++++++++++++++++++----------------
6 files changed, 175 insertions(+), 40 deletions(-)

diff --git a/doc/ASM_ROUNDTRIP_TESTING.md b/doc/ASM_ROUNDTRIP_TESTING.md @@ -150,11 +150,14 @@ and four classes of gap the hand corpus never reached: `sym+0` and `br` dispatched into hyperspace. It now writes the addend into the data like codegen (harmless on ELF, where RELA overwrites it with S+A). -The lone remaining skip (`118_decl_extra_attrs`): a global with an explicit -`__attribute__((section(...)))` + merge attributes lands in a `SEC_OTHER` -section that `cc -S` doesn't emit. Round-tripping arbitrary named sections with -their flags (without also emitting `.eh_frame`/debug) is a section-attribute -feature beyond this lane's scope. Also still open: `smulh`/`umulh` (and the +Named sections now round-trip (closed `118_decl_extra_attrs`): a global in an +explicit `__attribute__((section(...)))` with merge/strings/retain attributes +lands in a `SEC_OTHER` section, which `cc -S` now emits as +`.section <name>, "<flags>", @<type>[, <entsize>]` (GNU-as) and `as` +reconstructs — the round-tripped section table matches a direct `cc -c` object. +The remaining skip is `141_threadlocal_mutate`, a new case blocked on TLS +symbolization (`cc -S` emits an unsymbolized `adrp x,0x0` for a thread-local +access; tracked separately). Also still open: `smulh`/`umulh` (and the `*L` long multiplies) DP3 *decode* — correctness is restored by the `.inst` fix, but `-S` still shows `.inst` for them. diff --git a/src/api/asm_emit.c b/src/api/asm_emit.c @@ -196,21 +196,69 @@ static CfreeStatus emit_size_directives(Writer* w, Compiler* c, ObjBuilder* ob, return st; } -static const char* sec_directive(const Section* sec) { +/* GNU-as flag letters for a named (SEC_OTHER) section's `, "flags"` operand. + * The assembler's .section parser (src/asm/asm.c) is the inverse mapping. */ +static void w_secflags(Writer* w, u16 flags) { + if (flags & SF_ALLOC) w_str(w, "a"); + if (flags & SF_WRITE) w_str(w, "w"); + if (flags & SF_EXEC) w_str(w, "x"); + if (flags & SF_MERGE) w_str(w, "M"); + if (flags & SF_STRINGS) w_str(w, "S"); + if (flags & SF_TLS) w_str(w, "T"); + if (flags & SF_RETAIN) w_str(w, "R"); +} + +/* Emit the section directive line for `sec`, with indentation and trailing + * newline. Returns 0 to signal "skip this section" for kinds cc -S does not + * round-trip (TLS variants, SEC_DEBUG) — the caller drops the section, matching + * the prior sec_directive()==NULL behavior. + * + * Canonical kinds emit their fixed directive unchanged. SEC_OTHER (a global in + * an explicitly-named section, e.g. __attribute__((section(...)))) emits the + * real name plus its flags/type/entsize in GNU-as syntax, so the label and + * bytes survive re-assembly. Previously such sections were dropped, leaving a + * dangling reference (see test/asm/roundtrip_toy.sh case 118). */ +static int emit_section_header(Writer* w, Compiler* c, const Section* sec) { switch (sec->kind) { case SEC_TEXT: - return ".text"; + w_str(w, " .text"); + w_newline(w); + return 1; case SEC_RODATA: - if (sec->flags & SF_TLS) return NULL; - return ".section\t.rodata"; + if (sec->flags & SF_TLS) return 0; + w_str(w, " .section\t.rodata"); + w_newline(w); + return 1; case SEC_DATA: - if (sec->flags & SF_TLS) return NULL; - return ".section\t.data"; + if (sec->flags & SF_TLS) return 0; + w_str(w, " .section\t.data"); + w_newline(w); + return 1; case SEC_BSS: - if (sec->flags & SF_TLS) return NULL; - return ".section\t.bss"; + if (sec->flags & SF_TLS) return 0; + w_str(w, " .section\t.bss"); + w_newline(w); + return 1; + case SEC_OTHER: { + Slice nm; + if (sec->flags & SF_TLS) return 0; + nm = pool_slice(c->global, sec->name); + if (nm.len == 0) return 0; + w_str(w, " .section\t"); + cfree_writer_write(w, nm.s, nm.len); + w_str(w, ", \""); + w_secflags(w, sec->flags); + w_str(w, "\", "); + w_str(w, sec->sem == SSEM_NOBITS ? "@nobits" : "@progbits"); + if ((sec->flags & SF_MERGE) || sec->entsize) { + w_str(w, ", "); + w_dec(w, (u64)(sec->entsize ? sec->entsize : 1)); + } + w_newline(w); + return 1; + } default: - return NULL; + return 0; } } @@ -775,7 +823,6 @@ CfreeStatus cfree_obj_builder_emit_asm(CfreeObjBuilder* builder, for (i = 1; i < nsec; ++i) { const Section* sec = obj_section_get(ob, (ObjSecId)i); - const char* dir; SymLabel* labels; u32 nlabels, total, off, li; ArchDisasm* dasm; @@ -788,15 +835,10 @@ CfreeStatus cfree_obj_builder_emit_asm(CfreeObjBuilder* builder, EmitCtx ctx; if (!sec || sec->removed) continue; - dir = sec_directive(sec); - if (!dir) continue; + if (!emit_section_header(w, c, sec)) continue; labels = collect_labels(c, ob, (ObjSecId)i, &nlabels); - w_str(w, " "); - w_str(w, dir); - w_newline(w); - if (sec->align > 1) { w_str(w, " .align "); w_dec(w, (u64)sec->align); diff --git a/src/asm/asm.c b/src/asm/asm.c @@ -693,10 +693,64 @@ static void do_directive(AsmDriver* d, Sym name) { } SecKind kind = SEC_OTHER; u16 flags = 0; + u32 entsize = 0; + int have_flags = 0; + + /* Optional GNU-as operands: , "flags" [, @type [, entsize]]. The emitter + * (src/api/asm_emit.c) writes these for SEC_OTHER named sections; parse + * them back so a global's section flags/entsize round-trip faithfully. */ + if (asm_driver_eat_comma(d)) { + AsmTok ft = d_peek(d); + if (ft.kind == ASM_TOK_STR) { + size_t fn = 0; + const char* fp = asm_str(d, ft.spelling, &fn); + size_t fi; + for (fi = 0; fp && fi < fn; ++fi) { + switch (fp[fi]) { + case 'a': flags |= SF_ALLOC; break; + case 'w': flags |= SF_WRITE; break; + case 'x': flags |= SF_EXEC; break; + case 'M': flags |= SF_MERGE; break; + case 'S': flags |= SF_STRINGS; break; + case 'T': flags |= SF_TLS; break; + case 'R': flags |= SF_RETAIN; break; + default: break; /* surrounding quotes / unknown letters */ + } + } + have_flags = 1; + (void)d_next(d); + if (asm_driver_eat_comma(d)) { + AsmTok ty = d_peek(d); + if (tok_is_punct(ty, '@')) { + (void)d_next(d); + (void)d_next(d); /* the @type ident (progbits/nobits) */ + } else if (ty.kind == ASM_TOK_IDENT) { + (void)d_next(d); + } + if (asm_driver_eat_comma(d)) { + AsmTok es = d_peek(d); + if (es.kind == ASM_TOK_NUM) { + entsize = (u32)lit_to_i64(d, es.spelling); + (void)d_next(d); + } + } + } + } + } + { size_t nn = 0; const char* p = asm_str(d, sname, &nn); - if (p) { + if (have_flags) { + /* Explicit flags: a canonical name keeps its kind; any other name is a + * SEC_OTHER named section (matching codegen for section(...) globals). */ + if (p && nn == 5 && memcmp(p, ".text", 5) == 0) kind = SEC_TEXT; + else if (p && nn == 7 && memcmp(p, ".rodata", 7) == 0) kind = SEC_RODATA; + else if (p && nn == 5 && memcmp(p, ".data", 5) == 0) kind = SEC_DATA; + else if (p && nn == 4 && memcmp(p, ".bss", 4) == 0) kind = SEC_BSS; + else kind = SEC_OTHER; + } else if (p) { + /* No flag string: infer kind+flags from a canonical name prefix. */ if (nn >= 5 && memcmp(p, ".text", 5) == 0) { kind = SEC_TEXT; flags = (u16)(SF_ALLOC | SF_EXEC); @@ -712,9 +766,15 @@ static void do_directive(AsmDriver* d, Sym name) { } } } - /* Skip optional remainder: flags string, type tag, etc. */ + + /* Consume any remaining operands (e.g. ,unique,N or group fields). */ d_skip_to_eol(d); - set_section(d, sname, kind, flags, 1); + { + ObjSecId sid = ensure_section(d, sname, kind, flags, 1); + if (entsize) obj_section_set_entsize(d->ob, sid, entsize); + d->cur_sec = sid; + d->mc->set_section(d->mc, sid); + } return; } if (sym_eq(d, name, "globl") || sym_eq(d, name, "global")) { diff --git a/src/obj/obj.c b/src/obj/obj.c @@ -381,6 +381,11 @@ void obj_section_set_flags(ObjBuilder* ob, ObjSecId id, u16 flags) { if (s && id != OBJ_SEC_NONE) s->flags = flags; } +void obj_section_set_entsize(ObjBuilder* ob, ObjSecId id, u32 entsize) { + Section* s = Sections_at(&ob->sections, id); + if (s && id != OBJ_SEC_NONE) s->entsize = entsize; +} + void obj_section_set_align(ObjBuilder* ob, ObjSecId id, u32 align) { Section* s = Sections_at(&ob->sections, id); if (s && id != OBJ_SEC_NONE) s->align = align ? align : 1; diff --git a/src/obj/obj.h b/src/obj/obj.h @@ -401,6 +401,7 @@ ObjSecId obj_section(ObjBuilder*, Sym name, SecKind, u16 flags, u32 align); ObjSecId obj_section_ex(ObjBuilder*, Sym name, SecKind, SecSem, u16 flags, u32 align, u32 entsize, u32 link, u32 info); void obj_section_set_flags(ObjBuilder*, ObjSecId, u16 flags); +void obj_section_set_entsize(ObjBuilder*, ObjSecId, u32 entsize); void obj_section_set_align(ObjBuilder*, ObjSecId, u32 align); void obj_section_set_group(ObjBuilder*, ObjSecId, ObjGroupId); void obj_section_set_link_info(ObjBuilder*, ObjSecId, ObjSecId link, u32 info); diff --git a/test/asm/roundtrip_toy.sh b/test/asm/roundtrip_toy.sh @@ -7,17 +7,18 @@ # test/asm/roundtrip/ set: for every case, compare the DIRECT compile to the # round-tripped one and require the same exit code — # -# direct: cfree run case.toy -# round-trip: cfree cc -S case.toy | cfree as | cfree run <obj> +# Two exec lanes over the re-assembled object, both compared to the case's +# exit-code oracle (test/toy/cases/<name>.expected, default 0): +# /run: cfree cc -S | cfree as | cfree run <obj> (in-process JIT) +# /ld: cfree cc -S | cfree as | cfree ld | ./a.out (native link + exec) # # Native target (aarch64 macOS here): cfree run propagates main()'s return as # the process exit, so the oracle is the exit code. This found a real miscompile # (a multiply-high the disassembler couldn't decode, dropped by `as` until the # `.inst` fix) that the hand corpus never reached. # -# Cases that hit a known `cc -S` symbolizer gap (tentative/common defs, merged -# strings, computed-goto label-address tables) are listed in SKIP below until -# those land; the lane stays green and gates regressions. Opt-in. +# Any case that hits a `cc -S` symbolizer gap can be quarantined in SKIP below +# so the lane stays green and gates regressions; SKIP is currently empty. Opt-in. set -u @@ -28,12 +29,15 @@ WORK="$ROOT/build/test/asm/roundtrip_toy" OPTS="${CFREE_TEST_OPTS:-O0 O1}" FILTER="${1:-}" -# Remaining known gap: a global with an explicit __attribute__((section(...))) -# plus merge/strings attributes becomes a SEC_OTHER section, which cc -S does -# not emit (so `as` sees an undefined reference). Round-tripping arbitrary named -# sections with their flags is a section-attribute feature beyond this lane's -# scope. See doc/ASM_ROUNDTRIP_TESTING.md. -SKIP="118_decl_extra_attrs" +# Named sections now round-trip (closed 118_decl_extra_attrs): cc -S emits the +# section with its "flags"/@type/entsize in GNU-as syntax and `as` reconstructs +# it. SKIP quarantines cases blocked on a *separate*, known cc -S symbolizer gap: +# - 141_threadlocal_mutate: a thread-local access emits an unsymbolized +# `adrp x, 0x0` (the TLS GOT/descriptor reloc is not yet symbolized in cc -S), +# which `as` rejects ("adr/adrp: symbol required"). This is TLS symbolization, +# tracked separately from named-section round-trip. See +# doc/ASM_ROUNDTRIP_TESTING.md. +SKIP="141_threadlocal_mutate" color_red() { printf '\033[31m%s\033[0m' "$1"; } color_grn() { printf '\033[32m%s\033[0m' "$1"; } @@ -66,16 +70,36 @@ for src in "$CASES"/*.toy; do fail=$((fail+1)); failnames+=("$name[-$opt] as: $(head -1 "$w/as.err"|sed 's|.*: ||')") printf ' %s %s[-%s] as failed: %s\n' "$(color_red FAIL)" "$name" "$opt" "$(head -1 "$w/as.err"|sed 's|.*: ||')"; continue fi + # Lane /run: cfree run JIT-links and executes the object in-process. "$CFREE" run "$w/rt.o" >"$w/out" 2>"$w/run.err"; rc=$? if [ -s "$w/run.err" ]; then - fail=$((fail+1)); failnames+=("$name[-$opt] run: $(head -1 "$w/run.err"|sed 's|.*: ||')") - printf ' %s %s[-%s] run failed: %s\n' "$(color_red FAIL)" "$name" "$opt" "$(head -1 "$w/run.err"|sed 's|.*: ||')"; continue + fail=$((fail+1)); failnames+=("$name[-$opt]/run: $(head -1 "$w/run.err"|sed 's|.*: ||')") + printf ' %s %s[-%s]/run failed: %s\n' "$(color_red FAIL)" "$name" "$opt" "$(head -1 "$w/run.err"|sed 's|.*: ||')" + elif [ "$rc" -eq "$exp" ]; then + pass=$((pass+1)) + else + fail=$((fail+1)); failnames+=("$name[-$opt]/run exit $rc != $exp") + printf ' %s %s[-%s]/run exit %d != expected %d\n' "$(color_red FAIL)" "$name" "$opt" "$rc" "$exp" + fi + + # Lane /ld: cfree ld links a real native executable; run it via the OS + # loader and compare exit. Exercises cfree ld (layout + relocation + + # image emit) and a real process exit — none of which the JIT 'run' + # path covers. Native host only (no -target is passed). + if ! "$CFREE" ld "$w/rt.o" -o "$w/a.out" 2>"$w/ld.err" || [ -s "$w/ld.err" ]; then + fail=$((fail+1)); failnames+=("$name[-$opt]/ld: $(head -1 "$w/ld.err"|sed 's|.*: ||')") + printf ' %s %s[-%s]/ld failed: %s\n' "$(color_red FAIL)" "$name" "$opt" "$(head -1 "$w/ld.err"|sed 's|.*: ||')"; continue fi - if [ "$rc" -eq "$exp" ]; then + chmod +x "$w/a.out" 2>/dev/null || true + "$w/a.out" >"$w/ldout" 2>"$w/ldrun.err"; ldrc=$? + if [ -s "$w/ldrun.err" ]; then + fail=$((fail+1)); failnames+=("$name[-$opt]/ld-run: $(head -1 "$w/ldrun.err"|sed 's|.*: ||')") + printf ' %s %s[-%s]/ld-run stderr: %s\n' "$(color_red FAIL)" "$name" "$opt" "$(head -1 "$w/ldrun.err"|sed 's|.*: ||')" + elif [ "$ldrc" -eq "$exp" ]; then pass=$((pass+1)) else - fail=$((fail+1)); failnames+=("$name[-$opt] exit $rc != $exp") - printf ' %s %s[-%s] exit %d != expected %d\n' "$(color_red FAIL)" "$name" "$opt" "$rc" "$exp" + fail=$((fail+1)); failnames+=("$name[-$opt]/ld exit $ldrc != $exp") + printf ' %s %s[-%s]/ld exit %d != expected %d\n' "$(color_red FAIL)" "$name" "$opt" "$ldrc" "$exp" fi done done