commit caa879d161dc0a3bd199b5f4efdf630702c33f4f
parent d095314213d32055bf1f738b97bd8e36cbd8b536
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 29 May 2026 15:15:33 -0700
rv64 link: apply R_RISCV_SET_ULEB128/SUB_ULEB128 (DWARF diff relocs)
link_reloc_apply now handles the ULEB128 SET/SUB reloc pair used in
.debug_rnglists/.debug_line: reads the original ULEB128 field width at the
fixup site and re-encodes the relocated value into the same byte width
(redundant-ULEB128 padding) so layout never shifts. reloc_width returns a
documented sentinel (the field nothing reads; the real width is taken at apply
time from the bytes). New test-link-reloc-uleb128 unit drives link_reloc_apply
directly with values derived from a real clang -g riscv64 object.
Diffstat:
4 files changed, 281 insertions(+), 0 deletions(-)
diff --git a/src/link/link_reloc_layout.c b/src/link/link_reloc_layout.c
@@ -29,6 +29,12 @@ static SrcLoc no_loc(void) {
return l;
}
+/* Nominal (non-zero) width reported for the variable-length ULEB128
+ * RISC-V relocs. See the comment in reloc_width(): this value only has
+ * to be non-zero to pass the "supported kind" gate — the byte-exact
+ * width is determined at apply time from the field bytes themselves. */
+#define RELOC_RV_ULEB128_NOMINAL_WIDTH 1u
+
/* ---- pass 3: assign symbol vaddrs ---- */
void link_assign_symbol_vaddrs(Linker* l, LinkImage* img) {
@@ -339,6 +345,22 @@ static u8 reloc_width(RelocKind k) {
case R_RV_ADD64:
case R_RV_SUB64:
return 8;
+ case R_RV_SET_ULEB128:
+ case R_RV_SUB_ULEB128:
+ /* ULEB128 fields are variable-length: the true width is the number
+ * of bytes the assembler reserved at the reloc offset, which is
+ * data-dependent and only knowable from the section bytes at the
+ * site. reloc_width() is keyed solely on RelocKind and has no view
+ * of those bytes, and the width it returns is consumed ONLY as a
+ * non-zero "is this kind supported?" gate in link_emit_relocations
+ * (LinkRelocApply.width is never read by any apply or output path —
+ * link_reloc_apply is dispatched on RelocKind and re-reads the
+ * encoded ULEB128 length straight from P_bytes). So we return a
+ * fixed sentinel here purely to pass that gate; the byte-exact
+ * width is established at apply time in link_reloc_apply.
+ * RELOC_RV_ULEB128_NOMINAL_WIDTH is the common 1-byte case for the
+ * small DWARF symbol differences these encode. */
+ return RELOC_RV_ULEB128_NOMINAL_WIDTH;
case R_COFF_SECREL:
return 4;
case R_COFF_SECTION:
diff --git a/src/obj/reloc_apply.c b/src/obj/reloc_apply.c
@@ -25,6 +25,66 @@ static SrcLoc no_loc(void) {
return l;
}
+/* ---- ULEB128 codec for R_RISCV_{SET,SUB}_ULEB128 ----
+ *
+ * These RISC-V relocs patch a variable-length ULEB128 field in place
+ * (DWARF .debug_rnglists / .debug_loclists / .debug_line encode
+ * symbol differences this way). The crux: ULEB128 is variable-length,
+ * but rewriting it must NOT shift the section layout, so we re-encode
+ * the new value into the SAME number of bytes the assembler reserved
+ * at the site. ULEB128 permits "redundant" encodings: extra low-order
+ * groups of zero with the continuation bit set, terminated by a final
+ * group whose continuation bit is clear (RISC-V psABI / DWARF v5
+ * §7.6). We exploit that to pad to a fixed width.
+ *
+ * RELOC_ULEB128_MAX_BYTES bounds a 64-bit value: ceil(64/7) = 10. */
+#define RELOC_ULEB128_MAX_BYTES 10u
+#define RELOC_ULEB128_CONT 0x80u /* continuation bit */
+#define RELOC_ULEB128_MASK 0x7fu /* 7 payload bits per byte */
+
+/* Length of the ULEB128 field encoded at p: count bytes up to and
+ * including the first whose continuation bit is clear. */
+static u32 reloc_uleb128_len(const u8* p) {
+ u32 n = 0;
+ for (;;) {
+ u8 byte = p[n++];
+ if (!(byte & RELOC_ULEB128_CONT)) break;
+ if (n >= RELOC_ULEB128_MAX_BYTES) break;
+ }
+ return n;
+}
+
+/* Decode the ULEB128 value encoded at p (assumes a well-formed field
+ * of at most RELOC_ULEB128_MAX_BYTES). */
+static u64 reloc_uleb128_read(const u8* p) {
+ u64 v = 0;
+ u32 shift = 0;
+ u32 n = 0;
+ for (;;) {
+ u8 byte = p[n++];
+ if (shift < 64) v |= (u64)(byte & RELOC_ULEB128_MASK) << shift;
+ shift += 7;
+ if (!(byte & RELOC_ULEB128_CONT)) break;
+ if (n >= RELOC_ULEB128_MAX_BYTES) break;
+ }
+ return v;
+}
+
+/* Re-encode v as a ULEB128 occupying exactly `width` bytes, padding
+ * with redundant continuation groups so the in-place field size is
+ * preserved. The final byte's continuation bit is clear; every prior
+ * byte's is set, carrying the next 7 value bits (or zero once v is
+ * exhausted). */
+static void reloc_uleb128_write_fixed(u8* p, u64 v, u32 width) {
+ u32 i;
+ for (i = 0; i < width; ++i) {
+ u8 byte = (u8)(v & RELOC_ULEB128_MASK);
+ v >>= 7;
+ if (i + 1u < width) byte |= RELOC_ULEB128_CONT;
+ p[i] = byte;
+ }
+}
+
void link_reloc_apply(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A,
u64 P) {
switch (k) {
@@ -508,6 +568,28 @@ void link_reloc_apply(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A,
case R_RV_SET32:
wr_u32_le(P_bytes, (u32)((S + (u64)A) & 0xffffffffu));
return;
+ case R_RV_SET_ULEB128: {
+ /* Variable-length ULEB128 field set to (S + A). These come as a
+ * PAIR at the same offset (RISC-V psABI): SET_ULEB128 sets the
+ * field, a following SUB_ULEB128 then subtracts the second
+ * symbol — net encoding (sym_hi - sym_lo) for DWARF symbol
+ * differences. Re-encode into the original field width so the
+ * section layout doesn't shift. */
+ u32 width = reloc_uleb128_len(P_bytes);
+ u64 v = S + (u64)A;
+ reloc_uleb128_write_fixed(P_bytes, v, width);
+ return;
+ }
+ case R_RV_SUB_ULEB128: {
+ /* field -= (S + A), preserving the original ULEB128 width. The
+ * paired SET_ULEB128 ran first (same offset); we read back the
+ * value it wrote and subtract this symbol's resolved address. */
+ u32 width = reloc_uleb128_len(P_bytes);
+ u64 cur = reloc_uleb128_read(P_bytes);
+ u64 v = cur - (S + (u64)A);
+ reloc_uleb128_write_fixed(P_bytes, v, width);
+ return;
+ }
default:
compiler_panic(c, no_loc(), "link: unsupported reloc kind %u",
(unsigned)k);
diff --git a/test/link/reloc_uleb128_unit.c b/test/link/reloc_uleb128_unit.c
@@ -0,0 +1,165 @@
+/* test/link/reloc_uleb128_unit.c — direct unit test for the RISC-V
+ * R_RISCV_SET_ULEB128 / R_RISCV_SUB_ULEB128 relocation application.
+ *
+ * Why a direct unit test (not a corpus/roundtrip case): the relocs are
+ * APPLIED by the static/JIT linker (link_reloc_apply), not by the object
+ * roundtrip path, so the decisive assertion is that the rewritten
+ * ULEB128 field equals the encoded symbol difference AND keeps its
+ * original byte width (so the section layout never shifts). We construct
+ * the section bytes + the SET/SUB pair in memory and call
+ * link_reloc_apply directly.
+ *
+ * The fixtures are taken from a real RISC-V object: compiling
+ * void f(void){other();} void g(void){other();other();}
+ * with `clang -c -g -ffunction-sections --target=riscv64-linux-gnu
+ * -march=rv64gc` emits, in .debug_rnglists, two SET_ULEB128/SUB_ULEB128
+ * pairs encoding (sym_hi - sym_lo):
+ * off 0x12: SET .L0(=0x18), SUB .L0(=0x00) -> field = 0x18 (1 byte)
+ * off 0x15: SET .L0(=0x20), SUB .L0(=0x18) -> field = 0x08 (1 byte)
+ * We reproduce those, plus multi-byte width cases that exercise the
+ * fixed-width "redundant ULEB128" padding the in-place rewrite relies on.
+ *
+ * link_reloc_apply's ULEB128 success path never touches its Compiler*
+ * argument (it only does on the unsupported-kind panic), so we pass NULL.
+ *
+ * Exit 0 = pass; non-zero = fail (one line per failure on stderr). */
+
+#include <cfree/core.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "obj/obj.h"
+#include "obj/reloc_apply.h"
+
+static int g_failures;
+#define CHECK(cond, ...) \
+ do { \
+ if (!(cond)) { \
+ fprintf(stderr, "FAIL %s:%d: ", __FILE__, __LINE__); \
+ fprintf(stderr, __VA_ARGS__); \
+ fputc('\n', stderr); \
+ g_failures++; \
+ } \
+ } while (0)
+
+/* Decode a ULEB128 at p, returning value and (via *len_out) byte length. */
+static uint64_t decode_uleb128(const uint8_t* p, uint32_t* len_out) {
+ uint64_t v = 0;
+ uint32_t shift = 0;
+ uint32_t n = 0;
+ for (;;) {
+ uint8_t byte = p[n++];
+ v |= (uint64_t)(byte & 0x7fu) << shift;
+ shift += 7;
+ if (!(byte & 0x80u)) break;
+ }
+ *len_out = n;
+ return v;
+}
+
+/* Apply a SET_ULEB128 then SUB_ULEB128 pair at the same offset, exactly
+ * as the linker does: SET writes (S_hi + A_hi); SUB subtracts (S_lo +
+ * A_lo). Net field = (S_hi + A_hi) - (S_lo + A_lo). The original field
+ * width must be preserved. */
+static void apply_pair(uint8_t* site, uint64_t s_hi, int64_t a_hi,
+ uint64_t s_lo, int64_t a_lo) {
+ link_reloc_apply(NULL, R_RV_SET_ULEB128, site, s_hi, a_hi, 0);
+ link_reloc_apply(NULL, R_RV_SUB_ULEB128, site, s_lo, a_lo, 0);
+}
+
+/* Verify the field at site decodes to want_val and occupies exactly
+ * want_width bytes, and that bytes beyond the field are untouched. */
+static void expect_field(const char* label, const uint8_t* site,
+ uint64_t want_val, uint32_t want_width,
+ uint8_t guard, uint8_t actual_guard) {
+ uint32_t got_width = 0;
+ uint64_t got_val = decode_uleb128(site, &got_width);
+ CHECK(got_val == want_val,
+ "%s: value got 0x%llx want 0x%llx", label,
+ (unsigned long long)got_val, (unsigned long long)want_val);
+ CHECK(got_width == want_width, "%s: width got %u want %u (layout shift!)",
+ label, got_width, want_width);
+ CHECK(actual_guard == guard,
+ "%s: trailing guard byte clobbered: got 0x%02x want 0x%02x", label,
+ actual_guard, guard);
+}
+
+int main(void) {
+ /* ---- Case 1: real-object fixtures (1-byte fields) ---- */
+ {
+ /* off 0x12 of .debug_rnglists: pre-filled assembler value 0x18, a
+ * trailing guard byte follows (0x03 in the real section). SET 0x18,
+ * SUB 0x00 -> 0x18, still 1 byte. */
+ uint8_t buf[2] = {0x18, 0x03};
+ apply_pair(buf, /*hi*/ 0x18, 0, /*lo*/ 0x00, 0);
+ expect_field("rnglists@0x12", buf, 0x18u, 1u, 0x03, buf[1]);
+ }
+ {
+ /* off 0x15: pre-filled 0x20; SET 0x20, SUB 0x18 -> 0x08, 1 byte. */
+ uint8_t buf[2] = {0x20, 0x00};
+ apply_pair(buf, /*hi*/ 0x20, 0, /*lo*/ 0x18, 0);
+ expect_field("rnglists@0x15", buf, 0x08u, 1u, 0x00, buf[1]);
+ }
+
+ /* ---- Case 2: addends fold into S+A ---- */
+ {
+ /* (S_hi + A_hi) - (S_lo + A_lo) = (0x10+4) - (0x08-2) = 0x14 - 6 = 0x0e. */
+ uint8_t buf[2] = {0x00, 0xee};
+ apply_pair(buf, 0x10, 4, 0x08, -2);
+ expect_field("addend-fold", buf, 0x0eu, 1u, 0xee, buf[1]);
+ }
+
+ /* ---- Case 3: fixed-width padding — a value that NATURALLY needs 1
+ * byte must be re-encoded into a reserved 2-byte field via a redundant
+ * continuation group, so the layout never shifts. ---- */
+ {
+ /* Reserved field is 2 bytes (0x80,0x00 = redundant encoding of 0).
+ * SET 0x05, SUB 0x00 -> 0x05, but must STAY 2 bytes wide. */
+ uint8_t buf[3] = {0x80, 0x00, 0x77};
+ apply_pair(buf, 0x05, 0, 0x00, 0);
+ expect_field("pad-1-into-2", buf, 0x05u, 2u, 0x77, buf[2]);
+ /* The encoding must be {0x85, 0x00}: low group 0x05 + cont bit, then
+ * terminating 0x00. */
+ CHECK(buf[0] == 0x85 && buf[1] == 0x00,
+ "pad-1-into-2: bytes got {0x%02x,0x%02x} want {0x85,0x00}", buf[0],
+ buf[1]);
+ }
+
+ /* ---- Case 4: genuine multi-byte value round-trips ---- */
+ {
+ /* A 2-byte field reserved (0x80,0x00). Difference 0x100 = 256 needs
+ * two ULEB groups: 0x80 (low 7 = 0, cont) then 0x02. Width stays 2. */
+ uint8_t buf[3] = {0x80, 0x00, 0x5a};
+ apply_pair(buf, 0x100, 0, 0x00, 0);
+ expect_field("multibyte-0x100", buf, 0x100u, 2u, 0x5a, buf[2]);
+ CHECK(buf[0] == 0x80 && buf[1] == 0x02,
+ "multibyte-0x100: bytes got {0x%02x,0x%02x} want {0x80,0x02}",
+ buf[0], buf[1]);
+ }
+
+ /* ---- Case 5: 3-byte reserved field, value 0x3fff -> {0xff,0xff,0x00}.
+ * Naturally 0x3fff is 2 bytes; padding to 3 appends a redundant 0. ---- */
+ {
+ uint8_t buf[4] = {0x80, 0x80, 0x00, 0xc3};
+ apply_pair(buf, 0x3fff, 0, 0x00, 0);
+ expect_field("pad-2-into-3", buf, 0x3fffu, 3u, 0xc3, buf[3]);
+ CHECK(buf[0] == 0xff && buf[1] == 0xff && buf[2] == 0x00,
+ "pad-2-into-3: bytes got {0x%02x,0x%02x,0x%02x} want {0xff,0xff,0x00}",
+ buf[0], buf[1], buf[2]);
+ }
+
+ /* ---- Case 6: standalone SET then SUB-to-zero leaves field == SET. ---- */
+ {
+ uint8_t buf[2] = {0x00, 0x9d};
+ link_reloc_apply(NULL, R_RV_SET_ULEB128, buf, 0x2a, 0, 0);
+ expect_field("set-only", buf, 0x2au, 1u, 0x9d, buf[1]);
+ }
+
+ if (g_failures) {
+ fprintf(stderr, "reloc_uleb128_unit: %d failure(s)\n", g_failures);
+ return 1;
+ }
+ fputs("reloc_uleb128_unit: OK\n", stderr);
+ return 0;
+}
diff --git a/test/test.mk b/test/test.mk
@@ -278,6 +278,18 @@ $(EMU_RV64_TEST_BIN): test/emu/rv64_smoke_test.c $(LIB_AR)
@mkdir -p $(dir $@)
$(CC) $(TEST_HOST_CFLAGS) -Isrc test/emu/rv64_smoke_test.c $(LIB_AR) -o $@
+# RISC-V ULEB128 diff-reloc application unit test. link_reloc_apply is an
+# internal (hidden) symbol, so link the raw lib objects like the other
+# internal-surface unit tests rather than libcfree.a.
+RELOC_ULEB128_TEST_BIN = build/test/reloc_uleb128_unit
+
+test-link-reloc-uleb128: $(RELOC_ULEB128_TEST_BIN)
+ $(RELOC_ULEB128_TEST_BIN)
+
+$(RELOC_ULEB128_TEST_BIN): test/link/reloc_uleb128_unit.c $(LIB_OBJS)
+ @mkdir -p $(dir $@)
+ $(CC) $(TEST_HOST_CFLAGS) -Isrc test/link/reloc_uleb128_unit.c $(LIB_OBJS) -o $@
+
CG_API_TEST_BIN = build/test/cg_api_test
CG_SWITCH_TEST_BIN = build/test/cg_switch_test
ABI_CLASSIFY_TEST_BIN = build/test/abi_classify_test