kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 8fa59182120ef726749e340bca33f554eee98588
parent 7dba70b7a1cba47915a1a2c9129f2ce7b1fbdcd1
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri, 29 May 2026 19:20:22 -0700

asm: relax same-section local branches at finalize (round-trip P2)

The standalone assembler emitted a relocation for every symbolic branch
target, including intra-section branches that codegen resolves locally.
That made cc -S | as diverge from cc -c on the .text relocation table for
any control-flow-bearing function, so the L1 round-trip lane auto-skipped
branchy code.

Match codegen/GNU-as: at asm_parse finalize, resolve branch relocations
(JUMP26/CONDBR19/TSTBR14, never CALL26) whose target is a defined, local,
non-function symbol in the same section — patch the displacement via
link_reloc_apply with section-relative S/P and drop the reloc. The local
guard mirrors GNU as (a global target may be interposed); the non-function
guard mirrors codegen (a same-file call/tail-call keeps its reloc).

Round-trip aa64 now 36 pass / 0 skip (was 28 / 8 L1-skips). Existing asm,
inline-asm, and link corpora unaffected.

Diffstat:
Msrc/asm/asm.c | 60++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtest/asm/roundtrip.sh | 14+++++---------
2 files changed, 65 insertions(+), 9 deletions(-)

diff --git a/src/asm/asm.c b/src/asm/asm.c @@ -32,6 +32,7 @@ #include "core/pool.h" #include "core/slice.h" #include "obj/obj.h" +#include "obj/reloc_apply.h" HASHMAP_DEFINE(SymSecMap, Sym, ObjSecId, hash_u32); HASHMAP_DEFINE(SymSymMap, Sym, ObjSymId, hash_u32); @@ -975,6 +976,64 @@ static void do_directive(AsmDriver* d, Sym name) { d_skip_to_eol(d); } +/* ---- same-section branch relaxation ---- + * + * The per-arch parser emits a relocation for every symbolic branch target, + * even one that resolves within the current section (a forward reference like + * `b .Lfoo` is only known to be local once `.Lfoo:` is seen). GNU as / llvm-mc + * — and cfree's own codegen — instead resolve such intra-section branches at + * assembly time: compute the displacement, patch the instruction, and emit no + * relocation. Matching that is what makes `cc -S | as` reproduce `cc -c`'s + * .text relocation table for control-flow-bearing code (the L1 round-trip + * lane; see doc/ASM_ROUNDTRIP_TESTING.md). + * + * We relax only PC-relative *branch* relocations (never CALL26 — a call keeps + * its relocation on both sides) whose target is a symbol defined in the same + * section, with local binding, and not a function entry. The "local" guard + * matches GNU as (a global symbol may be interposed, so its branch keeps the + * relocation); the "not a function" guard matches cfree codegen, which keeps + * the relocation for an intra-file call/tail-call to a function symbol while + * resolving branches to internal labels. + * + * Restricted to the AArch64 branch kinds for now — the round-trip vertical + * slice is aa64; rv64/x64 keep their current behavior. */ +static int is_relaxable_branch_kind(u16 kind) { + switch (kind) { + case R_AARCH64_JUMP26: + case R_AARCH64_CONDBR19: + case R_AARCH64_TSTBR14: + return 1; + default: + return 0; + } +} + +static void relax_local_branches(AsmDriver* d) { + u32 total = obj_reloc_total(d->ob), i; + for (i = 0; i < total; ++i) { + const Reloc* r = obj_reloc_at(d->ob, i); + const ObjSym* tgt; + Section* sec; + u8 insn[4]; + if (!r || r->removed) continue; + if (!is_relaxable_branch_kind(r->kind)) continue; + tgt = obj_symbol_get(d->ob, r->sym); + if (!tgt) continue; + if (tgt->section_id != r->section_id) continue; /* cross-section / undef */ + if (tgt->bind != SB_LOCAL) continue; /* preemptible; keep */ + if (tgt->kind == SK_FUNC) continue; /* call/tail-call; keep */ + sec = (Section*)obj_section_get(d->ob, r->section_id); + if (!sec) continue; + if ((u64)r->offset + 4 > sec->bytes.total) continue; + buf_read(&sec->bytes, r->offset, insn, 4); + /* Section-relative S and P make the base cancel: disp = S + A - P. */ + link_reloc_apply(d->c, (RelocKind)r->kind, insn, tgt->value, r->addend, + r->offset); + buf_patch(&sec->bytes, r->offset, insn, 4); + ((Reloc*)r)->removed = 1; + } +} + /* ---- driver loop ---- */ static void process_label(AsmDriver* d, Sym name) { @@ -1122,6 +1181,7 @@ void asm_parse(Compiler* c, AsmLexer* l, MCEmitter* mc) { } promote_undef_externs(&d); + relax_local_branches(&d); if (d.arch_asm && d.arch_asm->destroy) d.arch_asm->destroy(d.arch_asm); SymSecMap_fini(&d.sec_map); diff --git a/test/asm/roundtrip.sh b/test/asm/roundtrip.sh @@ -180,15 +180,11 @@ for src in "$CORPUS_DIR"/*.c; do fi # ---- L1: byte + reloc round-trip --------------------------------- - # Intra-function branches are re-assemblable (L0/L2 cover them) but the - # assembler relocates same-section branch targets that codegen resolves - # locally, so the .text reloc tables differ. Skip L1 for such cases - # until the assembler grows same-section branch relaxation (P2, - # doc/ASM_ROUNDTRIP_TESTING.md). The synthesized `Lcf_` label in the - # listing is the marker. - if [ $RUN_L1 -eq 1 ] && grep -q 'Lcf_' "$asm"; then - note_skip "$tag/L1" "intra-function branch; needs assembler local-reloc relaxation (P2)" - elif [ $RUN_L1 -eq 1 ]; then + # Intra-function branches now round-trip: `as` relaxes same-section + # local-label branches at finalize (matching codegen), so the .text + # reloc tables agree. A case that still can't round-trip is gated with + # a per-case `<name>.skip` file rather than auto-skipped here. + if [ $RUN_L1 -eq 1 ]; then l1_ok=1; l1_why="" if ! "$CFREE" cc -c "-$opt" -target "$TRIPLE" "$src" -o "$direct" \ >"$work/cc_c.log" 2>&1; then