commit e41b2bb8ab846ea74c1492d6d5c391acbffc723e
parent aa49b4c0bc19392da1ef6159929122d5e7cdcb77
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 29 May 2026 19:33:01 -0700
asm: symbolize data-section relocations in cc -S (jump tables)
emit_data_range dumped every data section as raw .byte, so a relocated
data word (a switch jump-table slot, R_ABS64 against the function; a
global pointer-array entry) re-assembled with no relocation — the linked
table would hold a tiny absolute offset and dispatch into hyperspace.
Render a covered relocation as a symbolic integer directive instead:
R_ABS64 -> .quad sym+addend, R_ABS32 -> .word sym+addend (the inverse of
the assembler's emit_int_directive). Kinds with no integer-directive form,
or targets the assembler can't spell, keep raw bytes — the dropped reloc
then shows up in the round-trip's reloc comparison rather than silently.
The L1 lane now compares relocations across .text/.rodata/.data (was
.text only) so data relocs are validated, not just code relocs. New corpus
case roundtrip/jumptable.c (dense switch). Round-trip aa64 42/0/0; the
round-tripped jump table links and dispatches correctly (L2 exit 42).
Diffstat:
4 files changed, 118 insertions(+), 14 deletions(-)
diff --git a/src/api/asm_emit.c b/src/api/asm_emit.c
@@ -178,8 +178,9 @@ static const char* sec_directive(const Section* sec) {
}
}
-static CfreeStatus emit_data_range(Writer* w, const u8* data, u32 start,
- u32 end) {
+/* Emit a run of raw `.byte` lines for [start, end). */
+static CfreeStatus emit_raw_bytes(Writer* w, const u8* data, u32 start,
+ u32 end) {
u32 off;
for (off = start; off < end; off += ASM_BYTES_PER_LINE) {
u32 rem = end - off;
@@ -202,6 +203,26 @@ static CfreeStatus emit_data_range(Writer* w, const u8* data, u32 start,
return CFREE_OK;
}
+/* A reloc kind whose data field carries a symbol value reproducible by an
+ * integer directive: maps to (directive, byte width). The assembler emits the
+ * matching R_ABS{32,64} for `.word`/`.quad SYM+addend` (emit_int_directive),
+ * so the round-tripped relocation matches codegen's. Returns 0 for kinds with
+ * no integer-directive spelling (caller keeps the raw bytes). */
+static int data_reloc_directive(u16 kind, const char** dir, u32* width) {
+ switch (kind) {
+ case R_ABS64:
+ *dir = " .quad ";
+ *width = 8;
+ return 1;
+ case R_ABS32:
+ *dir = " .word ";
+ *width = 4;
+ return 1;
+ default:
+ return 0;
+ }
+}
+
static CfreeStatus emit_zero_range(Writer* w, u32 size) {
CfreeStatus st;
if (size == 0) return CFREE_OK;
@@ -603,6 +624,58 @@ static CfreeStatus emit_operands(Writer* w, const EmitCtx* x,
return cfree_writer_write(w, insn->operands.s, insn->operands.len);
}
+/* Emit a data range, rendering any covered relocation as a symbolic integer
+ * directive (`.quad sym+addend`) so cc -S | as reproduces the data relocation
+ * table — switch jump tables (R_ABS64 against the function) and any other
+ * relocated rodata/data. A reloc kind with no integer-directive form, or a
+ * target the assembler can't spell, falls back to raw `.byte`; the dropped
+ * reloc then surfaces in the round-trip's reloc comparison. `relocs` is the
+ * section's relocation list, sorted by offset. */
+static CfreeStatus emit_data_range(Writer* w, Compiler* c, const u8* data,
+ u32 start, u32 end, const SecReloc* relocs,
+ u32 nrelocs) {
+ u32 off = start;
+ while (off < end) {
+ const SecReloc* r = NULL;
+ u32 next = end;
+ u32 i;
+ /* Find a reloc starting at `off`, and the offset of the next reloc that
+ * starts strictly after `off` (which bounds the raw-byte run). */
+ for (i = 0; i < nrelocs; ++i) {
+ if (relocs[i].offset == off) {
+ r = &relocs[i];
+ } else if (relocs[i].offset > off && relocs[i].offset < next) {
+ next = relocs[i].offset;
+ }
+ }
+ if (r) {
+ const char* dir;
+ u32 width;
+ char symref[256];
+ if (data_reloc_directive(r->kind, &dir, &width) && off + width <= end &&
+ build_symref(symref, sizeof symref, c, "", r->sym, r->addend) >= 0) {
+ CfreeStatus st = w_str(w, dir);
+ if (st != CFREE_OK) return st;
+ st = w_str(w, symref);
+ if (st != CFREE_OK) return st;
+ st = w_newline(w);
+ if (st != CFREE_OK) return st;
+ off += width;
+ continue;
+ }
+ /* Unsupported kind / unspellable target: keep raw bytes for this slot
+ * (advance to the next reloc boundary so we don't re-handle it). */
+ }
+ if (next <= off) next = end;
+ {
+ CfreeStatus st = emit_raw_bytes(w, data, off, next);
+ if (st != CFREE_OK) return st;
+ }
+ off = next;
+ }
+ return CFREE_OK;
+}
+
static CfreeStatus emit_disasm_range(Writer* w, const EmitCtx* x,
ArchDisasm* dasm, const u8* data, u32 start,
u32 end) {
@@ -716,6 +789,7 @@ CfreeStatus cfree_obj_builder_emit_asm(CfreeObjBuilder* builder,
}
} else if (total > 0 && sec->kind != SEC_BSS) {
Heap* heap = c->ctx->heap;
+ relocs = collect_relocs(c, ob, (ObjSecId)i, &nrelocs);
heap_data = (u8*)heap->alloc(heap, total, 1);
if (heap_data) {
buf_flatten(&sec->bytes, heap_data);
@@ -764,7 +838,7 @@ CfreeStatus cfree_obj_builder_emit_asm(CfreeObjBuilder* builder,
} else if ((sec->flags & SF_EXEC) && dasm && flat_data) {
emit_disasm_range(w, &ctx, dasm, flat_data, off, next);
} else if (flat_data) {
- emit_data_range(w, flat_data, off, next);
+ emit_data_range(w, c, flat_data, off, next, relocs, nrelocs);
}
off = next;
}
diff --git a/test/asm/roundtrip.sh b/test/asm/roundtrip.sh
@@ -105,13 +105,17 @@ case_applies() {
# .text bytes as objdump hex-dump lines (filename header stripped).
text_bytes() { "$CFREE" objdump -s -j .text "$1" 2>/dev/null | awk '/^ *[0-9a-f]+ /'; }
-# .text relocation records only (kind/offset/target), excluding other sections
-# (e.g. .eh_frame) that `cc -S` does not reproduce.
-text_relocs() {
+# Relocation records (kind/offset/target) for the sections `cc -S` reproduces
+# — .text, .rodata, .data — so the comparison covers code relocs AND data
+# relocs (switch jump tables live in .rodata). Sections cc -S does not emit
+# (e.g. .eh_frame) are excluded so their absence in the round-tripped object
+# is not flagged. The section header is printed so a reloc at the same offset
+# in two sections stays distinct.
+reproduced_relocs() {
"$CFREE" objdump -r "$1" 2>/dev/null | awk '
- /RELOCATION RECORDS FOR \[\.text\]/ { f=1; next }
- /RELOCATION RECORDS FOR/ { f=0 }
- f && /^[0-9a-f]/ { print }'
+ /^RELOCATION RECORDS FOR \[\.(text|rodata|data)\]/ { f=1; print; next }
+ /^RELOCATION RECORDS FOR/ { f=0; next }
+ f && /^[0-9a-f]/ { print }'
}
# Emit the in-function decode-failure markers found in a `cc -S` listing.
@@ -193,14 +197,14 @@ for src in "$CORPUS_DIR"/*.c; do
>"$work/as.log" 2>&1; then
l1_ok=0; l1_why="as failed; see $work/as.log"
else
- text_bytes "$direct" >"$work/direct.text"
- text_bytes "$rt" >"$work/rt.text"
- text_relocs "$direct" >"$work/direct.rel"
- text_relocs "$rt" >"$work/rt.rel"
+ text_bytes "$direct" >"$work/direct.text"
+ text_bytes "$rt" >"$work/rt.text"
+ reproduced_relocs "$direct" >"$work/direct.rel"
+ reproduced_relocs "$rt" >"$work/rt.rel"
if ! diff -u "$work/direct.text" "$work/rt.text" >"$work/text.diff"; then
l1_ok=0; l1_why=".text bytes differ; see $work/text.diff"
elif ! diff -u "$work/direct.rel" "$work/rt.rel" >"$work/rel.diff"; then
- l1_ok=0; l1_why=".text relocs differ; see $work/rel.diff"
+ l1_ok=0; l1_why="relocs differ (.text/.rodata/.data); see $work/rel.diff"
fi
fi
if [ $l1_ok -eq 1 ]; then note_pass "$tag/L1"; else note_fail "$tag/L1 ($l1_why)"; fi
diff --git a/test/asm/roundtrip/jumptable.c b/test/asm/roundtrip/jumptable.c
@@ -0,0 +1,25 @@
+/* A dense switch lowered to a jump table — the .rodata table is a vector of
+ * absolute target addresses (R_ABS64 against the function). codegen renders it
+ * as raw bytes with relocations; the symbolizer must emit `.quad f+off` so
+ * cc -S | as reproduces those data relocations (L1) and the linked table
+ * actually dispatches (L2). The register-indirect `br` plus the adrp/add table
+ * base also exercise the .Lcfree_jt.0 local-symbol reference. Exit: case 7. */
+int sel(int x) {
+ switch (x) {
+ case 0: return 10;
+ case 1: return 11;
+ case 2: return 12;
+ case 3: return 13;
+ case 4: return 14;
+ case 5: return 15;
+ case 6: return 16;
+ case 7: return 42;
+ case 8: return 18;
+ case 9: return 19;
+ default: return 99;
+ }
+}
+int test_main(void) {
+ volatile int x = 7;
+ return sel(x);
+}
diff --git a/test/asm/roundtrip/jumptable.expected b/test/asm/roundtrip/jumptable.expected
@@ -0,0 +1 @@
+42