kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit caa879d161dc0a3bd199b5f4efdf630702c33f4f
parent d095314213d32055bf1f738b97bd8e36cbd8b536
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri, 29 May 2026 15:15:33 -0700

rv64 link: apply R_RISCV_SET_ULEB128/SUB_ULEB128 (DWARF diff relocs)

link_reloc_apply now handles the ULEB128 SET/SUB reloc pair used in
.debug_rnglists/.debug_line: reads the original ULEB128 field width at the
fixup site and re-encodes the relocated value into the same byte width
(redundant-ULEB128 padding) so layout never shifts. reloc_width returns a
documented sentinel (the field nothing reads; the real width is taken at apply
time from the bytes). New test-link-reloc-uleb128 unit drives link_reloc_apply
directly with values derived from a real clang -g riscv64 object.

Diffstat:
Msrc/link/link_reloc_layout.c | 22++++++++++++++++++++++
Msrc/obj/reloc_apply.c | 82+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/link/reloc_uleb128_unit.c | 165+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtest/test.mk | 12++++++++++++
4 files changed, 281 insertions(+), 0 deletions(-)

diff --git a/src/link/link_reloc_layout.c b/src/link/link_reloc_layout.c @@ -29,6 +29,12 @@ static SrcLoc no_loc(void) { return l; } +/* Nominal (non-zero) width reported for the variable-length ULEB128 + * RISC-V relocs. See the comment in reloc_width(): this value only has + * to be non-zero to pass the "supported kind" gate — the byte-exact + * width is determined at apply time from the field bytes themselves. */ +#define RELOC_RV_ULEB128_NOMINAL_WIDTH 1u + /* ---- pass 3: assign symbol vaddrs ---- */ void link_assign_symbol_vaddrs(Linker* l, LinkImage* img) { @@ -339,6 +345,22 @@ static u8 reloc_width(RelocKind k) { case R_RV_ADD64: case R_RV_SUB64: return 8; + case R_RV_SET_ULEB128: + case R_RV_SUB_ULEB128: + /* ULEB128 fields are variable-length: the true width is the number + * of bytes the assembler reserved at the reloc offset, which is + * data-dependent and only knowable from the section bytes at the + * site. reloc_width() is keyed solely on RelocKind and has no view + * of those bytes, and the width it returns is consumed ONLY as a + * non-zero "is this kind supported?" gate in link_emit_relocations + * (LinkRelocApply.width is never read by any apply or output path — + * link_reloc_apply is dispatched on RelocKind and re-reads the + * encoded ULEB128 length straight from P_bytes). So we return a + * fixed sentinel here purely to pass that gate; the byte-exact + * width is established at apply time in link_reloc_apply. + * RELOC_RV_ULEB128_NOMINAL_WIDTH is the common 1-byte case for the + * small DWARF symbol differences these encode. */ + return RELOC_RV_ULEB128_NOMINAL_WIDTH; case R_COFF_SECREL: return 4; case R_COFF_SECTION: diff --git a/src/obj/reloc_apply.c b/src/obj/reloc_apply.c @@ -25,6 +25,66 @@ static SrcLoc no_loc(void) { return l; } +/* ---- ULEB128 codec for R_RISCV_{SET,SUB}_ULEB128 ---- + * + * These RISC-V relocs patch a variable-length ULEB128 field in place + * (DWARF .debug_rnglists / .debug_loclists / .debug_line encode + * symbol differences this way). The crux: ULEB128 is variable-length, + * but rewriting it must NOT shift the section layout, so we re-encode + * the new value into the SAME number of bytes the assembler reserved + * at the site. ULEB128 permits "redundant" encodings: extra low-order + * groups of zero with the continuation bit set, terminated by a final + * group whose continuation bit is clear (RISC-V psABI / DWARF v5 + * §7.6). We exploit that to pad to a fixed width. + * + * RELOC_ULEB128_MAX_BYTES bounds a 64-bit value: ceil(64/7) = 10. */ +#define RELOC_ULEB128_MAX_BYTES 10u +#define RELOC_ULEB128_CONT 0x80u /* continuation bit */ +#define RELOC_ULEB128_MASK 0x7fu /* 7 payload bits per byte */ + +/* Length of the ULEB128 field encoded at p: count bytes up to and + * including the first whose continuation bit is clear. */ +static u32 reloc_uleb128_len(const u8* p) { + u32 n = 0; + for (;;) { + u8 byte = p[n++]; + if (!(byte & RELOC_ULEB128_CONT)) break; + if (n >= RELOC_ULEB128_MAX_BYTES) break; + } + return n; +} + +/* Decode the ULEB128 value encoded at p (assumes a well-formed field + * of at most RELOC_ULEB128_MAX_BYTES). */ +static u64 reloc_uleb128_read(const u8* p) { + u64 v = 0; + u32 shift = 0; + u32 n = 0; + for (;;) { + u8 byte = p[n++]; + if (shift < 64) v |= (u64)(byte & RELOC_ULEB128_MASK) << shift; + shift += 7; + if (!(byte & RELOC_ULEB128_CONT)) break; + if (n >= RELOC_ULEB128_MAX_BYTES) break; + } + return v; +} + +/* Re-encode v as a ULEB128 occupying exactly `width` bytes, padding + * with redundant continuation groups so the in-place field size is + * preserved. The final byte's continuation bit is clear; every prior + * byte's is set, carrying the next 7 value bits (or zero once v is + * exhausted). */ +static void reloc_uleb128_write_fixed(u8* p, u64 v, u32 width) { + u32 i; + for (i = 0; i < width; ++i) { + u8 byte = (u8)(v & RELOC_ULEB128_MASK); + v >>= 7; + if (i + 1u < width) byte |= RELOC_ULEB128_CONT; + p[i] = byte; + } +} + void link_reloc_apply(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A, u64 P) { switch (k) { @@ -508,6 +568,28 @@ void link_reloc_apply(Compiler* c, RelocKind k, u8* P_bytes, u64 S, i64 A, case R_RV_SET32: wr_u32_le(P_bytes, (u32)((S + (u64)A) & 0xffffffffu)); return; + case R_RV_SET_ULEB128: { + /* Variable-length ULEB128 field set to (S + A). These come as a + * PAIR at the same offset (RISC-V psABI): SET_ULEB128 sets the + * field, a following SUB_ULEB128 then subtracts the second + * symbol — net encoding (sym_hi - sym_lo) for DWARF symbol + * differences. Re-encode into the original field width so the + * section layout doesn't shift. */ + u32 width = reloc_uleb128_len(P_bytes); + u64 v = S + (u64)A; + reloc_uleb128_write_fixed(P_bytes, v, width); + return; + } + case R_RV_SUB_ULEB128: { + /* field -= (S + A), preserving the original ULEB128 width. The + * paired SET_ULEB128 ran first (same offset); we read back the + * value it wrote and subtract this symbol's resolved address. */ + u32 width = reloc_uleb128_len(P_bytes); + u64 cur = reloc_uleb128_read(P_bytes); + u64 v = cur - (S + (u64)A); + reloc_uleb128_write_fixed(P_bytes, v, width); + return; + } default: compiler_panic(c, no_loc(), "link: unsupported reloc kind %u", (unsigned)k); diff --git a/test/link/reloc_uleb128_unit.c b/test/link/reloc_uleb128_unit.c @@ -0,0 +1,165 @@ +/* test/link/reloc_uleb128_unit.c — direct unit test for the RISC-V + * R_RISCV_SET_ULEB128 / R_RISCV_SUB_ULEB128 relocation application. + * + * Why a direct unit test (not a corpus/roundtrip case): the relocs are + * APPLIED by the static/JIT linker (link_reloc_apply), not by the object + * roundtrip path, so the decisive assertion is that the rewritten + * ULEB128 field equals the encoded symbol difference AND keeps its + * original byte width (so the section layout never shifts). We construct + * the section bytes + the SET/SUB pair in memory and call + * link_reloc_apply directly. + * + * The fixtures are taken from a real RISC-V object: compiling + * void f(void){other();} void g(void){other();other();} + * with `clang -c -g -ffunction-sections --target=riscv64-linux-gnu + * -march=rv64gc` emits, in .debug_rnglists, two SET_ULEB128/SUB_ULEB128 + * pairs encoding (sym_hi - sym_lo): + * off 0x12: SET .L0(=0x18), SUB .L0(=0x00) -> field = 0x18 (1 byte) + * off 0x15: SET .L0(=0x20), SUB .L0(=0x18) -> field = 0x08 (1 byte) + * We reproduce those, plus multi-byte width cases that exercise the + * fixed-width "redundant ULEB128" padding the in-place rewrite relies on. + * + * link_reloc_apply's ULEB128 success path never touches its Compiler* + * argument (it only does on the unsupported-kind panic), so we pass NULL. + * + * Exit 0 = pass; non-zero = fail (one line per failure on stderr). */ + +#include <cfree/core.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> + +#include "obj/obj.h" +#include "obj/reloc_apply.h" + +static int g_failures; +#define CHECK(cond, ...) \ + do { \ + if (!(cond)) { \ + fprintf(stderr, "FAIL %s:%d: ", __FILE__, __LINE__); \ + fprintf(stderr, __VA_ARGS__); \ + fputc('\n', stderr); \ + g_failures++; \ + } \ + } while (0) + +/* Decode a ULEB128 at p, returning value and (via *len_out) byte length. */ +static uint64_t decode_uleb128(const uint8_t* p, uint32_t* len_out) { + uint64_t v = 0; + uint32_t shift = 0; + uint32_t n = 0; + for (;;) { + uint8_t byte = p[n++]; + v |= (uint64_t)(byte & 0x7fu) << shift; + shift += 7; + if (!(byte & 0x80u)) break; + } + *len_out = n; + return v; +} + +/* Apply a SET_ULEB128 then SUB_ULEB128 pair at the same offset, exactly + * as the linker does: SET writes (S_hi + A_hi); SUB subtracts (S_lo + + * A_lo). Net field = (S_hi + A_hi) - (S_lo + A_lo). The original field + * width must be preserved. */ +static void apply_pair(uint8_t* site, uint64_t s_hi, int64_t a_hi, + uint64_t s_lo, int64_t a_lo) { + link_reloc_apply(NULL, R_RV_SET_ULEB128, site, s_hi, a_hi, 0); + link_reloc_apply(NULL, R_RV_SUB_ULEB128, site, s_lo, a_lo, 0); +} + +/* Verify the field at site decodes to want_val and occupies exactly + * want_width bytes, and that bytes beyond the field are untouched. */ +static void expect_field(const char* label, const uint8_t* site, + uint64_t want_val, uint32_t want_width, + uint8_t guard, uint8_t actual_guard) { + uint32_t got_width = 0; + uint64_t got_val = decode_uleb128(site, &got_width); + CHECK(got_val == want_val, + "%s: value got 0x%llx want 0x%llx", label, + (unsigned long long)got_val, (unsigned long long)want_val); + CHECK(got_width == want_width, "%s: width got %u want %u (layout shift!)", + label, got_width, want_width); + CHECK(actual_guard == guard, + "%s: trailing guard byte clobbered: got 0x%02x want 0x%02x", label, + actual_guard, guard); +} + +int main(void) { + /* ---- Case 1: real-object fixtures (1-byte fields) ---- */ + { + /* off 0x12 of .debug_rnglists: pre-filled assembler value 0x18, a + * trailing guard byte follows (0x03 in the real section). SET 0x18, + * SUB 0x00 -> 0x18, still 1 byte. */ + uint8_t buf[2] = {0x18, 0x03}; + apply_pair(buf, /*hi*/ 0x18, 0, /*lo*/ 0x00, 0); + expect_field("rnglists@0x12", buf, 0x18u, 1u, 0x03, buf[1]); + } + { + /* off 0x15: pre-filled 0x20; SET 0x20, SUB 0x18 -> 0x08, 1 byte. */ + uint8_t buf[2] = {0x20, 0x00}; + apply_pair(buf, /*hi*/ 0x20, 0, /*lo*/ 0x18, 0); + expect_field("rnglists@0x15", buf, 0x08u, 1u, 0x00, buf[1]); + } + + /* ---- Case 2: addends fold into S+A ---- */ + { + /* (S_hi + A_hi) - (S_lo + A_lo) = (0x10+4) - (0x08-2) = 0x14 - 6 = 0x0e. */ + uint8_t buf[2] = {0x00, 0xee}; + apply_pair(buf, 0x10, 4, 0x08, -2); + expect_field("addend-fold", buf, 0x0eu, 1u, 0xee, buf[1]); + } + + /* ---- Case 3: fixed-width padding — a value that NATURALLY needs 1 + * byte must be re-encoded into a reserved 2-byte field via a redundant + * continuation group, so the layout never shifts. ---- */ + { + /* Reserved field is 2 bytes (0x80,0x00 = redundant encoding of 0). + * SET 0x05, SUB 0x00 -> 0x05, but must STAY 2 bytes wide. */ + uint8_t buf[3] = {0x80, 0x00, 0x77}; + apply_pair(buf, 0x05, 0, 0x00, 0); + expect_field("pad-1-into-2", buf, 0x05u, 2u, 0x77, buf[2]); + /* The encoding must be {0x85, 0x00}: low group 0x05 + cont bit, then + * terminating 0x00. */ + CHECK(buf[0] == 0x85 && buf[1] == 0x00, + "pad-1-into-2: bytes got {0x%02x,0x%02x} want {0x85,0x00}", buf[0], + buf[1]); + } + + /* ---- Case 4: genuine multi-byte value round-trips ---- */ + { + /* A 2-byte field reserved (0x80,0x00). Difference 0x100 = 256 needs + * two ULEB groups: 0x80 (low 7 = 0, cont) then 0x02. Width stays 2. */ + uint8_t buf[3] = {0x80, 0x00, 0x5a}; + apply_pair(buf, 0x100, 0, 0x00, 0); + expect_field("multibyte-0x100", buf, 0x100u, 2u, 0x5a, buf[2]); + CHECK(buf[0] == 0x80 && buf[1] == 0x02, + "multibyte-0x100: bytes got {0x%02x,0x%02x} want {0x80,0x02}", + buf[0], buf[1]); + } + + /* ---- Case 5: 3-byte reserved field, value 0x3fff -> {0xff,0xff,0x00}. + * Naturally 0x3fff is 2 bytes; padding to 3 appends a redundant 0. ---- */ + { + uint8_t buf[4] = {0x80, 0x80, 0x00, 0xc3}; + apply_pair(buf, 0x3fff, 0, 0x00, 0); + expect_field("pad-2-into-3", buf, 0x3fffu, 3u, 0xc3, buf[3]); + CHECK(buf[0] == 0xff && buf[1] == 0xff && buf[2] == 0x00, + "pad-2-into-3: bytes got {0x%02x,0x%02x,0x%02x} want {0xff,0xff,0x00}", + buf[0], buf[1], buf[2]); + } + + /* ---- Case 6: standalone SET then SUB-to-zero leaves field == SET. ---- */ + { + uint8_t buf[2] = {0x00, 0x9d}; + link_reloc_apply(NULL, R_RV_SET_ULEB128, buf, 0x2a, 0, 0); + expect_field("set-only", buf, 0x2au, 1u, 0x9d, buf[1]); + } + + if (g_failures) { + fprintf(stderr, "reloc_uleb128_unit: %d failure(s)\n", g_failures); + return 1; + } + fputs("reloc_uleb128_unit: OK\n", stderr); + return 0; +} diff --git a/test/test.mk b/test/test.mk @@ -278,6 +278,18 @@ $(EMU_RV64_TEST_BIN): test/emu/rv64_smoke_test.c $(LIB_AR) @mkdir -p $(dir $@) $(CC) $(TEST_HOST_CFLAGS) -Isrc test/emu/rv64_smoke_test.c $(LIB_AR) -o $@ +# RISC-V ULEB128 diff-reloc application unit test. link_reloc_apply is an +# internal (hidden) symbol, so link the raw lib objects like the other +# internal-surface unit tests rather than libcfree.a. +RELOC_ULEB128_TEST_BIN = build/test/reloc_uleb128_unit + +test-link-reloc-uleb128: $(RELOC_ULEB128_TEST_BIN) + $(RELOC_ULEB128_TEST_BIN) + +$(RELOC_ULEB128_TEST_BIN): test/link/reloc_uleb128_unit.c $(LIB_OBJS) + @mkdir -p $(dir $@) + $(CC) $(TEST_HOST_CFLAGS) -Isrc test/link/reloc_uleb128_unit.c $(LIB_OBJS) -o $@ + CG_API_TEST_BIN = build/test/cg_api_test CG_SWITCH_TEST_BIN = build/test/cg_switch_test ABI_CLASSIFY_TEST_BIN = build/test/abi_classify_test