commit e618f928841783b78b52452854d70240ab4dbd3f
parent a5ca78289eba8bfea2fda044eb10128c786bf750
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 2 Jun 2026 12:20:56 -0700
rt: mark the remaining libc symbols weak
The mem/string/stdlib conversion functions were already weak so a user
libc or a tuned arch version wins; extend that to the holdouts: the
printf family (vsnprintf/snprintf/vsprintf/sprintf/fctprintf),
qsort/bsearch, div/ldiv/lldiv, and setjmp/longjmp across every coro
backend. This also closes a duplicate-symbol hazard: printf.c and
stdlib.c bundle several of these into one TU, so a program that
overrides one (e.g. snprintf) but calls a sibling (sprintf) would
otherwise hit a strong-vs-strong collision when the archive member is
pulled.
setjmp/longjmp are hand-written asm. The aarch64 Mach-O file uses the
canonical `.globl` + `.weak_definition` (clang rejects GNU `.weak` on
Mach-O); the ELF .s and the inline-asm .c backends use `.weak`, which
cfree's own assembler -- the one that builds rt -- emits correctly on
ELF, Mach-O, and COFF.
Supporting changes:
- asm: teach the assembler the `.weak_definition` directive (-> SB_WEAK),
so cfree can ingest canonical Mach-O weak definitions.
- core: add a hidden weak __cfree_assert_fail fallback. Vendored lz4
compiled into libcfree references it in non-NDEBUG builds; a hidden
weak trap keeps libcfree.a self-contained for every consumer (the
cfree binary and the test harnesses that link the archive directly)
without exposing a non-public symbol or pulling in the runtime.
- test/driver: accept a weak (W/w) vsnprintf definition, not just T/t,
now that the rt libc surface is weak.
Diffstat:
15 files changed, 65 insertions(+), 30 deletions(-)
diff --git a/rt/lib/coro/aarch64_elf.s b/rt/lib/coro/aarch64_elf.s
@@ -1,7 +1,7 @@
.text
.align 4
-.globl setjmp
+.weak setjmp
setjmp:
stp x19, x20, [x0, #0]
stp x21, x22, [x0, #16]
@@ -18,7 +18,7 @@ setjmp:
mov x0, #0
ret
-.globl longjmp
+.weak longjmp
longjmp:
ldp d8, d9, [x0, #104]
ldp d10, d11, [x0, #120]
diff --git a/rt/lib/coro/aarch64_macho.s b/rt/lib/coro/aarch64_macho.s
@@ -2,6 +2,7 @@
.align 4
.globl _setjmp
+.weak_definition _setjmp
_setjmp:
stp x19, x20, [x0, #0]
stp x21, x22, [x0, #16]
@@ -19,6 +20,7 @@ _setjmp:
ret
.globl _longjmp
+.weak_definition _longjmp
_longjmp:
ldp d8, d9, [x0, #104]
ldp d10, d11, [x0, #120]
diff --git a/rt/lib/coro/arm32.c b/rt/lib/coro/arm32.c
@@ -204,7 +204,7 @@ __asm__ (
/* setjmp(env) -- env in r0. lr at call time is the return address
into the caller, exactly what longjmp must restore. */
- ".globl " SYM(setjmp) "\n"
+ ".weak " SYM(setjmp) "\n"
".thumb_func\n"
".type " SYM(setjmp) ", %function\n"
SYM(setjmp) ":\n"
@@ -216,7 +216,7 @@ __asm__ (
longjmp(_, 0) must deliver 1 (C11 7.13.2.1p4); the IT block
gives r1 = (r1 == 0) ? 1 : r1, then we move it into r0 and
branch to the saved lr. */
- ".globl " SYM(longjmp) "\n"
+ ".weak " SYM(longjmp) "\n"
".thumb_func\n"
".type " SYM(longjmp) ", %function\n"
SYM(longjmp) ":\n"
diff --git a/rt/lib/coro/arm32_thumb1.c b/rt/lib/coro/arm32_thumb1.c
@@ -138,7 +138,7 @@ __asm__ (
/* setjmp(env) -- env in r0. lr at call time is the return address
into the caller, exactly what longjmp must restore. */
- ".globl " SYM(setjmp) "\n"
+ ".weak " SYM(setjmp) "\n"
".thumb_func\n"
".type " SYM(setjmp) ", %function\n"
SYM(setjmp) ":\n"
@@ -152,7 +152,7 @@ __asm__ (
Both `r0 <- r1` and the immediate ops use the T2 (`movs`) form
since plain `mov rd, rm` with both low operands is UNPREDICTABLE
on ARMv6-M. */
- ".globl " SYM(longjmp) "\n"
+ ".weak " SYM(longjmp) "\n"
".thumb_func\n"
".type " SYM(longjmp) ", %function\n"
SYM(longjmp) ":\n"
diff --git a/rt/lib/coro/i386.c b/rt/lib/coro/i386.c
@@ -111,7 +111,7 @@ __asm__ (
".p2align 4\n"
/* setjmp(env) -- env at 4(%esp). */
- ".globl " SYM(setjmp) "\n"
+ ".weak " SYM(setjmp) "\n"
SYM(setjmp) ":\n"
" movl 4(%esp), %edx\n"
SAVE_INTO("%edx")
@@ -120,7 +120,7 @@ __asm__ (
/* longjmp(env, val) -- env at 4(%esp), val at 8(%esp).
longjmp(_, 0) must deliver 1 (C11 7.13.2.1p4). */
- ".globl " SYM(longjmp) "\n"
+ ".weak " SYM(longjmp) "\n"
SYM(longjmp) ":\n"
" movl 4(%esp), %edx\n" /* env */
" movl 8(%esp), %eax\n" /* val */
diff --git a/rt/lib/coro/riscv32.c b/rt/lib/coro/riscv32.c
@@ -242,7 +242,7 @@ __asm__ (
/* setjmp(env) -- env=a0. ra at function entry is the caller's
return address, exactly what longjmp must restore. */
- ".globl " SYM(setjmp) "\n"
+ ".weak " SYM(setjmp) "\n"
".type " SYM(setjmp) ", @function\n"
SYM(setjmp) ":\n"
SAVE_INTO("a0")
@@ -255,7 +255,7 @@ __asm__ (
seqz t0, a1 ; t0 = (a1 == 0)
add a0, a1, t0
so a0 = a1 if a1 != 0, else 1. */
- ".globl " SYM(longjmp) "\n"
+ ".weak " SYM(longjmp) "\n"
".type " SYM(longjmp) ", @function\n"
SYM(longjmp) ":\n"
RESTORE_FROM("a0")
diff --git a/rt/lib/coro/riscv64.c b/rt/lib/coro/riscv64.c
@@ -195,7 +195,7 @@ __asm__ (
/* setjmp(env) -- env in a0. ra at call time is the caller's return
address, which is exactly what longjmp must restore. */
- ".globl " SYM(setjmp) "\n"
+ ".weak " SYM(setjmp) "\n"
".type " SYM(setjmp) ", @function\n"
SYM(setjmp) ":\n"
SAVE_INTO("a0")
@@ -209,7 +209,7 @@ __asm__ (
doesn't touch t0/a0/a1, so the seqz/add can run after it and
write a0 directly -- one fewer instruction than munging a1
first and mv'ing later. */
- ".globl " SYM(longjmp) "\n"
+ ".weak " SYM(longjmp) "\n"
".type " SYM(longjmp) ", @function\n"
SYM(longjmp) ":\n"
RESTORE_FROM("a0")
diff --git a/rt/lib/coro/x86_64.c b/rt/lib/coro/x86_64.c
@@ -110,7 +110,7 @@ __asm__ (
".p2align 4\n"
/* setjmp(env) -- env=%rdi */
- ".globl " SYM(setjmp) "\n"
+ ".weak " SYM(setjmp) "\n"
SYM(setjmp) ":\n"
SAVE_INTO("%rdi")
" xorl %eax, %eax\n"
@@ -118,7 +118,7 @@ __asm__ (
/* longjmp(env, val) -- env=%rdi, val=%esi.
longjmp(_, 0) must deliver 1 (C11 7.13.2.1p4). */
- ".globl " SYM(longjmp) "\n"
+ ".weak " SYM(longjmp) "\n"
SYM(longjmp) ":\n"
" movslq %esi, %rax\n" /* sign-extend int → long */
" testq %rax, %rax\n"
diff --git a/rt/lib/coro/x86_64_win.c b/rt/lib/coro/x86_64_win.c
@@ -181,7 +181,7 @@ __asm__ (
".p2align 4\n"
/* setjmp(env) -- env=%rcx */
- ".globl " SYM(setjmp) "\n"
+ ".weak " SYM(setjmp) "\n"
SYM(setjmp) ":\n"
SAVE_INTO("%rcx")
" xorl %eax, %eax\n"
@@ -189,7 +189,7 @@ __asm__ (
/* longjmp(env, val) -- env=%rcx, val=%edx.
longjmp(_, 0) must deliver 1 (C11 7.13.2.1p4). */
- ".globl " SYM(longjmp) "\n"
+ ".weak " SYM(longjmp) "\n"
SYM(longjmp) ":\n"
" movslq %edx, %rax\n" /* sign-extend int -> long */
" testq %rax, %rax\n"
diff --git a/rt/lib/stdio/printf.c b/rt/lib/stdio/printf.c
@@ -479,7 +479,8 @@ static int cfree_vsnprintf_impl(char* s, size_t n, const char* fmt,
return rc;
}
-int vsnprintf(char* s, size_t n, const char* fmt, va_list ap) {
+__attribute__((weak)) int vsnprintf(char* s, size_t n, const char* fmt,
+ va_list ap) {
va_list copy;
int rc;
va_copy(copy, ap);
@@ -488,7 +489,7 @@ int vsnprintf(char* s, size_t n, const char* fmt, va_list ap) {
return rc;
}
-int snprintf(char* s, size_t n, const char* fmt, ...) {
+__attribute__((weak)) int snprintf(char* s, size_t n, const char* fmt, ...) {
va_list ap;
int rc;
va_start(ap, fmt);
@@ -497,11 +498,11 @@ int snprintf(char* s, size_t n, const char* fmt, ...) {
return rc;
}
-int vsprintf(char* s, const char* fmt, va_list ap) {
+__attribute__((weak)) int vsprintf(char* s, const char* fmt, va_list ap) {
return vsnprintf(s, (size_t)-1, fmt, ap);
}
-int sprintf(char* s, const char* fmt, ...) {
+__attribute__((weak)) int sprintf(char* s, const char* fmt, ...) {
va_list ap;
int rc;
va_start(ap, fmt);
@@ -510,8 +511,8 @@ int sprintf(char* s, const char* fmt, ...) {
return rc;
}
-int fctprintf(void (*out_fct)(char ch, void* arg), void* arg, const char* fmt,
- ...) {
+__attribute__((weak)) int fctprintf(void (*out_fct)(char ch, void* arg),
+ void* arg, const char* fmt, ...) {
CfreePrintfOut out;
va_list ap;
int rc;
diff --git a/rt/lib/stdlib/qsort.c b/rt/lib/stdlib/qsort.c
@@ -165,7 +165,8 @@ static void trinkle(unsigned char* head, size_t width, cmpfun cmp, size_t pp[2],
}
}
-void qsort(void* base, size_t nel, size_t width, cmpfun cmp) {
+__attribute__((weak)) void qsort(void* base, size_t nel, size_t width,
+ cmpfun cmp) {
size_t lp[12 * sizeof(size_t)];
size_t i, size = width * nel;
unsigned char *head, *high;
diff --git a/rt/lib/stdlib/stdlib.c b/rt/lib/stdlib/stdlib.c
@@ -184,29 +184,30 @@ __attribute__((weak)) int abs(int j) { return j < 0 ? -j : j; }
__attribute__((weak)) long labs(long j) { return j < 0 ? -j : j; }
__attribute__((weak)) long long llabs(long long j) { return j < 0 ? -j : j; }
-div_t div(int numer, int denom) {
+__attribute__((weak)) div_t div(int numer, int denom) {
div_t r;
r.quot = numer / denom;
r.rem = numer % denom;
return r;
}
-ldiv_t ldiv(long numer, long denom) {
+__attribute__((weak)) ldiv_t ldiv(long numer, long denom) {
ldiv_t r;
r.quot = numer / denom;
r.rem = numer % denom;
return r;
}
-lldiv_t lldiv(long long numer, long long denom) {
+__attribute__((weak)) lldiv_t lldiv(long long numer, long long denom) {
lldiv_t r;
r.quot = numer / denom;
r.rem = numer % denom;
return r;
}
-void* bsearch(const void* key, const void* base, size_t nmemb, size_t size,
- int (*compar)(const void*, const void*)) {
+__attribute__((weak)) void* bsearch(const void* key, const void* base,
+ size_t nmemb, size_t size,
+ int (*compar)(const void*, const void*)) {
const unsigned char* b = (const unsigned char*)base;
size_t low = 0;
size_t high = nmemb;
diff --git a/src/asm/asm.c b/src/asm/asm.c
@@ -913,7 +913,11 @@ static void do_directive(AsmDriver* d, Sym name) {
d_skip_to_eol(d);
return;
}
- if (sym_eq(d, name, "weak")) {
+ /* `.weak_definition` is the Mach-O spelling for a weak *defined* symbol
+ * (clang rejects GNU `.weak` on Mach-O). It pairs with a `.globl`; cfree
+ * collapses both to SB_WEAK, which the Mach-O emitter turns into
+ * N_EXT|N_WEAK_DEF and ELF into STB_WEAK. */
+ if (sym_eq(d, name, "weak") || sym_eq(d, name, "weak_definition")) {
Sym n = expect_ident(d, ".weak");
sym_mut(d, intern_sym(d, n))->bind = (u16)SB_WEAK;
d_skip_to_eol(d);
diff --git a/src/core/core.c b/src/core/core.c
@@ -31,6 +31,30 @@ const char* cfree_debug_getenv(const char* name) {
return NULL;
}
+/* Weak fallback for the <assert.h> failure hook (rt/include/assert.h). cfree's
+ * own code uses compiler_panic, not C assert(), but vendored code compiled into
+ * libcfree (the lz4 codecs) references __cfree_assert_fail in non-NDEBUG builds.
+ * A hidden weak trap lets every libcfree.a consumer -- the cfree binary, the
+ * test harnesses that link the archive directly, a standalone embedder --
+ * resolve it without pulling in the runtime. Hidden (the -fvisibility=hidden
+ * default, NOT visibility-default) keeps it off libcfree's public export
+ * surface that scripts/lib_reloc_defined_prefixes.py guards. The freestanding
+ * runtime ships its own weak __cfree_assert_fail (rt/lib/assert) for programs
+ * that link it. The contract is _Noreturn, so trap rather than return. */
+#if defined(__GNUC__) || defined(__clang__) || defined(__cfree__)
+__attribute__((weak))
+#endif
+void __cfree_assert_fail(const char* expr, const char* file, int line,
+ const char* func) {
+ (void)expr;
+ (void)file;
+ (void)line;
+ (void)func;
+ __builtin_trap();
+ for (;;) {
+ }
+}
+
SourceManager* source_new(Compiler*);
void source_free(SourceManager*);
diff --git a/test/driver/run.sh b/test/driver/run.sh
@@ -273,12 +273,14 @@ SRC
# linked image proves the runtime's printf.c (a libc source, not just
# compiler-rt) was auto-built and linked — what the old `ar t | grep printf.c`
# member check verified, but location-independent of the rt cache dir.
+# The rt's libc symbols are weak (a user libc may override them), so accept a
+# weak (W/w) definition as well as a strong (T/t) one.
if "$CFREE" cc --support-dir "$work/rt-support" -target x86_64-linux \
-e _start "$repo_root/test/rt/cases/freestanding_lib.c" \
"$work/rt-x64-start.c" \
-o "$work/rt-x64" > "$work/rt-x64.out" 2> "$work/rt-x64.err" &&
"$CFREE" nm "$work/rt-x64" 2> "$work/rt-x64-nm.err" \
- | grep -qE '[Tt] vsnprintf'; then
+ | grep -qE '[TtWw] vsnprintf'; then
ok "cc-auto-builds-and-links-libcfree-rt-x64"
else
{ sed 's/^/cc: /' "$work/rt-x64.err"