commit 8fa59182120ef726749e340bca33f554eee98588
parent 7dba70b7a1cba47915a1a2c9129f2ce7b1fbdcd1
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 29 May 2026 19:20:22 -0700
asm: relax same-section local branches at finalize (round-trip P2)
The standalone assembler emitted a relocation for every symbolic branch
target, including intra-section branches that codegen resolves locally.
That made cc -S | as diverge from cc -c on the .text relocation table for
any control-flow-bearing function, so the L1 round-trip lane auto-skipped
branchy code.
Match codegen/GNU-as: at asm_parse finalize, resolve branch relocations
(JUMP26/CONDBR19/TSTBR14, never CALL26) whose target is a defined, local,
non-function symbol in the same section — patch the displacement via
link_reloc_apply with section-relative S/P and drop the reloc. The local
guard mirrors GNU as (a global target may be interposed); the non-function
guard mirrors codegen (a same-file call/tail-call keeps its reloc).
Round-trip aa64 now 36 pass / 0 skip (was 28 / 8 L1-skips). Existing asm,
inline-asm, and link corpora unaffected.
Diffstat:
2 files changed, 65 insertions(+), 9 deletions(-)
diff --git a/src/asm/asm.c b/src/asm/asm.c
@@ -32,6 +32,7 @@
#include "core/pool.h"
#include "core/slice.h"
#include "obj/obj.h"
+#include "obj/reloc_apply.h"
HASHMAP_DEFINE(SymSecMap, Sym, ObjSecId, hash_u32);
HASHMAP_DEFINE(SymSymMap, Sym, ObjSymId, hash_u32);
@@ -975,6 +976,64 @@ static void do_directive(AsmDriver* d, Sym name) {
d_skip_to_eol(d);
}
+/* ---- same-section branch relaxation ----
+ *
+ * The per-arch parser emits a relocation for every symbolic branch target,
+ * even one that resolves within the current section (a forward reference like
+ * `b .Lfoo` is only known to be local once `.Lfoo:` is seen). GNU as / llvm-mc
+ * — and cfree's own codegen — instead resolve such intra-section branches at
+ * assembly time: compute the displacement, patch the instruction, and emit no
+ * relocation. Matching that is what makes `cc -S | as` reproduce `cc -c`'s
+ * .text relocation table for control-flow-bearing code (the L1 round-trip
+ * lane; see doc/ASM_ROUNDTRIP_TESTING.md).
+ *
+ * We relax only PC-relative *branch* relocations (never CALL26 — a call keeps
+ * its relocation on both sides) whose target is a symbol defined in the same
+ * section, with local binding, and not a function entry. The "local" guard
+ * matches GNU as (a global symbol may be interposed, so its branch keeps the
+ * relocation); the "not a function" guard matches cfree codegen, which keeps
+ * the relocation for an intra-file call/tail-call to a function symbol while
+ * resolving branches to internal labels.
+ *
+ * Restricted to the AArch64 branch kinds for now — the round-trip vertical
+ * slice is aa64; rv64/x64 keep their current behavior. */
+static int is_relaxable_branch_kind(u16 kind) {
+ switch (kind) {
+ case R_AARCH64_JUMP26:
+ case R_AARCH64_CONDBR19:
+ case R_AARCH64_TSTBR14:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static void relax_local_branches(AsmDriver* d) {
+ u32 total = obj_reloc_total(d->ob), i;
+ for (i = 0; i < total; ++i) {
+ const Reloc* r = obj_reloc_at(d->ob, i);
+ const ObjSym* tgt;
+ Section* sec;
+ u8 insn[4];
+ if (!r || r->removed) continue;
+ if (!is_relaxable_branch_kind(r->kind)) continue;
+ tgt = obj_symbol_get(d->ob, r->sym);
+ if (!tgt) continue;
+ if (tgt->section_id != r->section_id) continue; /* cross-section / undef */
+ if (tgt->bind != SB_LOCAL) continue; /* preemptible; keep */
+ if (tgt->kind == SK_FUNC) continue; /* call/tail-call; keep */
+ sec = (Section*)obj_section_get(d->ob, r->section_id);
+ if (!sec) continue;
+ if ((u64)r->offset + 4 > sec->bytes.total) continue;
+ buf_read(&sec->bytes, r->offset, insn, 4);
+ /* Section-relative S and P make the base cancel: disp = S + A - P. */
+ link_reloc_apply(d->c, (RelocKind)r->kind, insn, tgt->value, r->addend,
+ r->offset);
+ buf_patch(&sec->bytes, r->offset, insn, 4);
+ ((Reloc*)r)->removed = 1;
+ }
+}
+
/* ---- driver loop ---- */
static void process_label(AsmDriver* d, Sym name) {
@@ -1122,6 +1181,7 @@ void asm_parse(Compiler* c, AsmLexer* l, MCEmitter* mc) {
}
promote_undef_externs(&d);
+ relax_local_branches(&d);
if (d.arch_asm && d.arch_asm->destroy) d.arch_asm->destroy(d.arch_asm);
SymSecMap_fini(&d.sec_map);
diff --git a/test/asm/roundtrip.sh b/test/asm/roundtrip.sh
@@ -180,15 +180,11 @@ for src in "$CORPUS_DIR"/*.c; do
fi
# ---- L1: byte + reloc round-trip ---------------------------------
- # Intra-function branches are re-assemblable (L0/L2 cover them) but the
- # assembler relocates same-section branch targets that codegen resolves
- # locally, so the .text reloc tables differ. Skip L1 for such cases
- # until the assembler grows same-section branch relaxation (P2,
- # doc/ASM_ROUNDTRIP_TESTING.md). The synthesized `Lcf_` label in the
- # listing is the marker.
- if [ $RUN_L1 -eq 1 ] && grep -q 'Lcf_' "$asm"; then
- note_skip "$tag/L1" "intra-function branch; needs assembler local-reloc relaxation (P2)"
- elif [ $RUN_L1 -eq 1 ]; then
+ # Intra-function branches now round-trip: `as` relaxes same-section
+ # local-label branches at finalize (matching codegen), so the .text
+ # reloc tables agree. A case that still can't round-trip is gated with
+ # a per-case `<name>.skip` file rather than auto-skipped here.
+ if [ $RUN_L1 -eq 1 ]; then
l1_ok=1; l1_why=""
if ! "$CFREE" cc -c "-$opt" -target "$TRIPLE" "$src" -o "$direct" \
>"$work/cc_c.log" 2>&1; then