kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit e618f928841783b78b52452854d70240ab4dbd3f
parent a5ca78289eba8bfea2fda044eb10128c786bf750
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue,  2 Jun 2026 12:20:56 -0700

rt: mark the remaining libc symbols weak

The mem/string/stdlib conversion functions were already weak so a user
libc or a tuned arch version wins; extend that to the holdouts: the
printf family (vsnprintf/snprintf/vsprintf/sprintf/fctprintf),
qsort/bsearch, div/ldiv/lldiv, and setjmp/longjmp across every coro
backend. This also closes a duplicate-symbol hazard: printf.c and
stdlib.c bundle several of these into one TU, so a program that
overrides one (e.g. snprintf) but calls a sibling (sprintf) would
otherwise hit a strong-vs-strong collision when the archive member is
pulled.

setjmp/longjmp are hand-written asm. The aarch64 Mach-O file uses the
canonical `.globl` + `.weak_definition` (clang rejects GNU `.weak` on
Mach-O); the ELF .s and the inline-asm .c backends use `.weak`, which
cfree's own assembler -- the one that builds rt -- emits correctly on
ELF, Mach-O, and COFF.

Supporting changes:
- asm: teach the assembler the `.weak_definition` directive (-> SB_WEAK),
  so cfree can ingest canonical Mach-O weak definitions.
- core: add a hidden weak __cfree_assert_fail fallback. Vendored lz4
  compiled into libcfree references it in non-NDEBUG builds; a hidden
  weak trap keeps libcfree.a self-contained for every consumer (the
  cfree binary and the test harnesses that link the archive directly)
  without exposing a non-public symbol or pulling in the runtime.
- test/driver: accept a weak (W/w) vsnprintf definition, not just T/t,
  now that the rt libc surface is weak.

Diffstat:
Mrt/lib/coro/aarch64_elf.s | 4++--
Mrt/lib/coro/aarch64_macho.s | 2++
Mrt/lib/coro/arm32.c | 4++--
Mrt/lib/coro/arm32_thumb1.c | 4++--
Mrt/lib/coro/i386.c | 4++--
Mrt/lib/coro/riscv32.c | 4++--
Mrt/lib/coro/riscv64.c | 4++--
Mrt/lib/coro/x86_64.c | 4++--
Mrt/lib/coro/x86_64_win.c | 4++--
Mrt/lib/stdio/printf.c | 13+++++++------
Mrt/lib/stdlib/qsort.c | 3++-
Mrt/lib/stdlib/stdlib.c | 11++++++-----
Msrc/asm/asm.c | 6+++++-
Msrc/core/core.c | 24++++++++++++++++++++++++
Mtest/driver/run.sh | 4+++-
15 files changed, 65 insertions(+), 30 deletions(-)

diff --git a/rt/lib/coro/aarch64_elf.s b/rt/lib/coro/aarch64_elf.s @@ -1,7 +1,7 @@ .text .align 4 -.globl setjmp +.weak setjmp setjmp: stp x19, x20, [x0, #0] stp x21, x22, [x0, #16] @@ -18,7 +18,7 @@ setjmp: mov x0, #0 ret -.globl longjmp +.weak longjmp longjmp: ldp d8, d9, [x0, #104] ldp d10, d11, [x0, #120] diff --git a/rt/lib/coro/aarch64_macho.s b/rt/lib/coro/aarch64_macho.s @@ -2,6 +2,7 @@ .align 4 .globl _setjmp +.weak_definition _setjmp _setjmp: stp x19, x20, [x0, #0] stp x21, x22, [x0, #16] @@ -19,6 +20,7 @@ _setjmp: ret .globl _longjmp +.weak_definition _longjmp _longjmp: ldp d8, d9, [x0, #104] ldp d10, d11, [x0, #120] diff --git a/rt/lib/coro/arm32.c b/rt/lib/coro/arm32.c @@ -204,7 +204,7 @@ __asm__ ( /* setjmp(env) -- env in r0. lr at call time is the return address into the caller, exactly what longjmp must restore. */ - ".globl " SYM(setjmp) "\n" + ".weak " SYM(setjmp) "\n" ".thumb_func\n" ".type " SYM(setjmp) ", %function\n" SYM(setjmp) ":\n" @@ -216,7 +216,7 @@ __asm__ ( longjmp(_, 0) must deliver 1 (C11 7.13.2.1p4); the IT block gives r1 = (r1 == 0) ? 1 : r1, then we move it into r0 and branch to the saved lr. */ - ".globl " SYM(longjmp) "\n" + ".weak " SYM(longjmp) "\n" ".thumb_func\n" ".type " SYM(longjmp) ", %function\n" SYM(longjmp) ":\n" diff --git a/rt/lib/coro/arm32_thumb1.c b/rt/lib/coro/arm32_thumb1.c @@ -138,7 +138,7 @@ __asm__ ( /* setjmp(env) -- env in r0. lr at call time is the return address into the caller, exactly what longjmp must restore. */ - ".globl " SYM(setjmp) "\n" + ".weak " SYM(setjmp) "\n" ".thumb_func\n" ".type " SYM(setjmp) ", %function\n" SYM(setjmp) ":\n" @@ -152,7 +152,7 @@ __asm__ ( Both `r0 <- r1` and the immediate ops use the T2 (`movs`) form since plain `mov rd, rm` with both low operands is UNPREDICTABLE on ARMv6-M. */ - ".globl " SYM(longjmp) "\n" + ".weak " SYM(longjmp) "\n" ".thumb_func\n" ".type " SYM(longjmp) ", %function\n" SYM(longjmp) ":\n" diff --git a/rt/lib/coro/i386.c b/rt/lib/coro/i386.c @@ -111,7 +111,7 @@ __asm__ ( ".p2align 4\n" /* setjmp(env) -- env at 4(%esp). */ - ".globl " SYM(setjmp) "\n" + ".weak " SYM(setjmp) "\n" SYM(setjmp) ":\n" " movl 4(%esp), %edx\n" SAVE_INTO("%edx") @@ -120,7 +120,7 @@ __asm__ ( /* longjmp(env, val) -- env at 4(%esp), val at 8(%esp). longjmp(_, 0) must deliver 1 (C11 7.13.2.1p4). */ - ".globl " SYM(longjmp) "\n" + ".weak " SYM(longjmp) "\n" SYM(longjmp) ":\n" " movl 4(%esp), %edx\n" /* env */ " movl 8(%esp), %eax\n" /* val */ diff --git a/rt/lib/coro/riscv32.c b/rt/lib/coro/riscv32.c @@ -242,7 +242,7 @@ __asm__ ( /* setjmp(env) -- env=a0. ra at function entry is the caller's return address, exactly what longjmp must restore. */ - ".globl " SYM(setjmp) "\n" + ".weak " SYM(setjmp) "\n" ".type " SYM(setjmp) ", @function\n" SYM(setjmp) ":\n" SAVE_INTO("a0") @@ -255,7 +255,7 @@ __asm__ ( seqz t0, a1 ; t0 = (a1 == 0) add a0, a1, t0 so a0 = a1 if a1 != 0, else 1. */ - ".globl " SYM(longjmp) "\n" + ".weak " SYM(longjmp) "\n" ".type " SYM(longjmp) ", @function\n" SYM(longjmp) ":\n" RESTORE_FROM("a0") diff --git a/rt/lib/coro/riscv64.c b/rt/lib/coro/riscv64.c @@ -195,7 +195,7 @@ __asm__ ( /* setjmp(env) -- env in a0. ra at call time is the caller's return address, which is exactly what longjmp must restore. */ - ".globl " SYM(setjmp) "\n" + ".weak " SYM(setjmp) "\n" ".type " SYM(setjmp) ", @function\n" SYM(setjmp) ":\n" SAVE_INTO("a0") @@ -209,7 +209,7 @@ __asm__ ( doesn't touch t0/a0/a1, so the seqz/add can run after it and write a0 directly -- one fewer instruction than munging a1 first and mv'ing later. */ - ".globl " SYM(longjmp) "\n" + ".weak " SYM(longjmp) "\n" ".type " SYM(longjmp) ", @function\n" SYM(longjmp) ":\n" RESTORE_FROM("a0") diff --git a/rt/lib/coro/x86_64.c b/rt/lib/coro/x86_64.c @@ -110,7 +110,7 @@ __asm__ ( ".p2align 4\n" /* setjmp(env) -- env=%rdi */ - ".globl " SYM(setjmp) "\n" + ".weak " SYM(setjmp) "\n" SYM(setjmp) ":\n" SAVE_INTO("%rdi") " xorl %eax, %eax\n" @@ -118,7 +118,7 @@ __asm__ ( /* longjmp(env, val) -- env=%rdi, val=%esi. longjmp(_, 0) must deliver 1 (C11 7.13.2.1p4). */ - ".globl " SYM(longjmp) "\n" + ".weak " SYM(longjmp) "\n" SYM(longjmp) ":\n" " movslq %esi, %rax\n" /* sign-extend int → long */ " testq %rax, %rax\n" diff --git a/rt/lib/coro/x86_64_win.c b/rt/lib/coro/x86_64_win.c @@ -181,7 +181,7 @@ __asm__ ( ".p2align 4\n" /* setjmp(env) -- env=%rcx */ - ".globl " SYM(setjmp) "\n" + ".weak " SYM(setjmp) "\n" SYM(setjmp) ":\n" SAVE_INTO("%rcx") " xorl %eax, %eax\n" @@ -189,7 +189,7 @@ __asm__ ( /* longjmp(env, val) -- env=%rcx, val=%edx. longjmp(_, 0) must deliver 1 (C11 7.13.2.1p4). */ - ".globl " SYM(longjmp) "\n" + ".weak " SYM(longjmp) "\n" SYM(longjmp) ":\n" " movslq %edx, %rax\n" /* sign-extend int -> long */ " testq %rax, %rax\n" diff --git a/rt/lib/stdio/printf.c b/rt/lib/stdio/printf.c @@ -479,7 +479,8 @@ static int cfree_vsnprintf_impl(char* s, size_t n, const char* fmt, return rc; } -int vsnprintf(char* s, size_t n, const char* fmt, va_list ap) { +__attribute__((weak)) int vsnprintf(char* s, size_t n, const char* fmt, + va_list ap) { va_list copy; int rc; va_copy(copy, ap); @@ -488,7 +489,7 @@ int vsnprintf(char* s, size_t n, const char* fmt, va_list ap) { return rc; } -int snprintf(char* s, size_t n, const char* fmt, ...) { +__attribute__((weak)) int snprintf(char* s, size_t n, const char* fmt, ...) { va_list ap; int rc; va_start(ap, fmt); @@ -497,11 +498,11 @@ int snprintf(char* s, size_t n, const char* fmt, ...) { return rc; } -int vsprintf(char* s, const char* fmt, va_list ap) { +__attribute__((weak)) int vsprintf(char* s, const char* fmt, va_list ap) { return vsnprintf(s, (size_t)-1, fmt, ap); } -int sprintf(char* s, const char* fmt, ...) { +__attribute__((weak)) int sprintf(char* s, const char* fmt, ...) { va_list ap; int rc; va_start(ap, fmt); @@ -510,8 +511,8 @@ int sprintf(char* s, const char* fmt, ...) { return rc; } -int fctprintf(void (*out_fct)(char ch, void* arg), void* arg, const char* fmt, - ...) { +__attribute__((weak)) int fctprintf(void (*out_fct)(char ch, void* arg), + void* arg, const char* fmt, ...) { CfreePrintfOut out; va_list ap; int rc; diff --git a/rt/lib/stdlib/qsort.c b/rt/lib/stdlib/qsort.c @@ -165,7 +165,8 @@ static void trinkle(unsigned char* head, size_t width, cmpfun cmp, size_t pp[2], } } -void qsort(void* base, size_t nel, size_t width, cmpfun cmp) { +__attribute__((weak)) void qsort(void* base, size_t nel, size_t width, + cmpfun cmp) { size_t lp[12 * sizeof(size_t)]; size_t i, size = width * nel; unsigned char *head, *high; diff --git a/rt/lib/stdlib/stdlib.c b/rt/lib/stdlib/stdlib.c @@ -184,29 +184,30 @@ __attribute__((weak)) int abs(int j) { return j < 0 ? -j : j; } __attribute__((weak)) long labs(long j) { return j < 0 ? -j : j; } __attribute__((weak)) long long llabs(long long j) { return j < 0 ? -j : j; } -div_t div(int numer, int denom) { +__attribute__((weak)) div_t div(int numer, int denom) { div_t r; r.quot = numer / denom; r.rem = numer % denom; return r; } -ldiv_t ldiv(long numer, long denom) { +__attribute__((weak)) ldiv_t ldiv(long numer, long denom) { ldiv_t r; r.quot = numer / denom; r.rem = numer % denom; return r; } -lldiv_t lldiv(long long numer, long long denom) { +__attribute__((weak)) lldiv_t lldiv(long long numer, long long denom) { lldiv_t r; r.quot = numer / denom; r.rem = numer % denom; return r; } -void* bsearch(const void* key, const void* base, size_t nmemb, size_t size, - int (*compar)(const void*, const void*)) { +__attribute__((weak)) void* bsearch(const void* key, const void* base, + size_t nmemb, size_t size, + int (*compar)(const void*, const void*)) { const unsigned char* b = (const unsigned char*)base; size_t low = 0; size_t high = nmemb; diff --git a/src/asm/asm.c b/src/asm/asm.c @@ -913,7 +913,11 @@ static void do_directive(AsmDriver* d, Sym name) { d_skip_to_eol(d); return; } - if (sym_eq(d, name, "weak")) { + /* `.weak_definition` is the Mach-O spelling for a weak *defined* symbol + * (clang rejects GNU `.weak` on Mach-O). It pairs with a `.globl`; cfree + * collapses both to SB_WEAK, which the Mach-O emitter turns into + * N_EXT|N_WEAK_DEF and ELF into STB_WEAK. */ + if (sym_eq(d, name, "weak") || sym_eq(d, name, "weak_definition")) { Sym n = expect_ident(d, ".weak"); sym_mut(d, intern_sym(d, n))->bind = (u16)SB_WEAK; d_skip_to_eol(d); diff --git a/src/core/core.c b/src/core/core.c @@ -31,6 +31,30 @@ const char* cfree_debug_getenv(const char* name) { return NULL; } +/* Weak fallback for the <assert.h> failure hook (rt/include/assert.h). cfree's + * own code uses compiler_panic, not C assert(), but vendored code compiled into + * libcfree (the lz4 codecs) references __cfree_assert_fail in non-NDEBUG builds. + * A hidden weak trap lets every libcfree.a consumer -- the cfree binary, the + * test harnesses that link the archive directly, a standalone embedder -- + * resolve it without pulling in the runtime. Hidden (the -fvisibility=hidden + * default, NOT visibility-default) keeps it off libcfree's public export + * surface that scripts/lib_reloc_defined_prefixes.py guards. The freestanding + * runtime ships its own weak __cfree_assert_fail (rt/lib/assert) for programs + * that link it. The contract is _Noreturn, so trap rather than return. */ +#if defined(__GNUC__) || defined(__clang__) || defined(__cfree__) +__attribute__((weak)) +#endif +void __cfree_assert_fail(const char* expr, const char* file, int line, + const char* func) { + (void)expr; + (void)file; + (void)line; + (void)func; + __builtin_trap(); + for (;;) { + } +} + SourceManager* source_new(Compiler*); void source_free(SourceManager*); diff --git a/test/driver/run.sh b/test/driver/run.sh @@ -273,12 +273,14 @@ SRC # linked image proves the runtime's printf.c (a libc source, not just # compiler-rt) was auto-built and linked — what the old `ar t | grep printf.c` # member check verified, but location-independent of the rt cache dir. +# The rt's libc symbols are weak (a user libc may override them), so accept a +# weak (W/w) definition as well as a strong (T/t) one. if "$CFREE" cc --support-dir "$work/rt-support" -target x86_64-linux \ -e _start "$repo_root/test/rt/cases/freestanding_lib.c" \ "$work/rt-x64-start.c" \ -o "$work/rt-x64" > "$work/rt-x64.out" 2> "$work/rt-x64.err" && "$CFREE" nm "$work/rt-x64" 2> "$work/rt-x64-nm.err" \ - | grep -qE '[Tt] vsnprintf'; then + | grep -qE '[TtWw] vsnprintf'; then ok "cc-auto-builds-and-links-libcfree-rt-x64" else { sed 's/^/cc: /' "$work/rt-x64.err"