commit 38dcb984f7b8380b2dfda9bc137e8c06f1a138ac
parent 118e679911158143ee59dfa081e68731e362b1e9
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 10 May 2026 18:37:32 -0700
libc: platform-agnostic headers + macOS hosted shim + test-libc harness
Headers (rt/include/libc/): stdio, stdlib, string, ctype, errno, time,
math, signal, inttypes, locale — pure C11 declarations, ABI-neutral.
stdin/stdout/stderr and errno route through accessor macros so per-OS
divergence stays in the shim.
Predefines (src/pp/pp.c): target-aware __SIZE_TYPE__, __INT*_TYPE__,
__*_MAX__ etc. so rt/include/stddef.h and rt/include/stdint.h are
directly consumable by cfree-cc (not just clang-as-host during stage1).
Spurious-UNDEF prune (src/obj/, src/link/link_layout.c): the C frontend
mints an ObjSym for every parsed extern decl, so a single <stdio.h>
include would demand 50+ libSystem symbols at link time. New
ObjSym::referenced bit (set by obj_reloc_ex and the readers) lets
elf_emit / macho_emit / link_layout drop unreferenced UNDEF externals
from the output and from the in-memory cc->link path.
macOS shim (rt/lib/cfree_hosted/macos.c): bridges __cfree_stdin etc.
to libSystem's __stdinp / __error. Built via `make hosted-macos` into
build/libcfree_hosted_macos.a. clang-compiled for now: cfree's AArch64
codegen emits direct ADRP+LDR for extern globals, but Mach-O dylib
imports need GOT_LOAD_PAGE21 / GOT_LO12_NC -- a follow-up codegen gap
documented in the Makefile target.
test-libc (test/libc/run.sh + Makefile target): host-libc harness
parallel to test-musl / test-glibc. Six new shared cases under
test/libc/cases/. On aa64-darwin 5/7 pass + 2 SKIPs for musl/glibc-only
surface; 14_errno_fopen and 15_ctype fail with a cfree-codegen issue on
multi-arg printf that we'll chase separately.
No regressions: test-elf 37/37, test-link 119/119, test-link macho 95
pass + same 8 documented Path J failures.
Diffstat:
34 files changed, 1201 insertions(+), 7 deletions(-)
diff --git a/Makefile b/Makefile
@@ -27,7 +27,7 @@ DRIVER_DEPS = $(DRIVER_OBJS:.o=.d)
LIB_AR = build/libcfree.a
BIN = build/cfree
-.PHONY: all lib driver bin rt rt-aarch64-linux format clean self self-stage2
+.PHONY: all lib driver bin rt rt-aarch64-linux hosted-macos format clean self self-stage2
# Default: compile libcfree.a, the driver objects, and link the cfree
# binary. The link step currently fails because most libcfree functions
@@ -53,6 +53,31 @@ rt/build/aarch64-linux/libcfree_rt.a:
# `rt` alias builds whichever variants are typically wanted on the host.
rt: rt-aarch64-linux
+# libcfree_hosted: tiny per-OS shim that bridges the ABI-neutral names in
+# rt/include/libc/ to whatever the platform libc actually exports. macOS
+# variant is the only one wired today.
+#
+# Built with clang for now, not cfree-cc: the shim reads libSystem-imported
+# global variables (__stdinp, __stdoutp, __stderrp), and cfree's AArch64
+# codegen always emits direct ADRP+LDR (R_AARCH64_ADR_PREL_PG_HI21 +
+# LDST64_ABS_LO12_NC) for extern globals. Mach-O dylib imports require
+# GOT_LOAD_PAGE21 / GOT_LO12_NC so the load can route through a chained
+# fixup. clang emits GOT_LOAD unconditionally on this target; until cfree
+# matches, the shim has to be built by clang.
+HOSTED_MACOS_AR = build/libcfree_hosted_macos.a
+HOSTED_MACOS_SRC = rt/lib/cfree_hosted/macos.c
+HOSTED_MACOS_OBJ = build/cfree_hosted/macos.o
+
+hosted-macos: $(HOSTED_MACOS_AR)
+
+$(HOSTED_MACOS_OBJ): $(HOSTED_MACOS_SRC)
+ @mkdir -p $(dir $@)
+ $(CC) $(HOST_SYSROOT_CFLAGS) -arch arm64 -ffreestanding -c $< -o $@
+
+$(HOSTED_MACOS_AR): $(HOSTED_MACOS_OBJ) $(BIN)
+ @rm -f $@
+ $(BIN) ar rcs $@ $(HOSTED_MACOS_OBJ)
+
# Replace the archive (`ar rcs` only adds/updates), so removing a .c file
# also removes its .o from the archive on the next build.
$(LIB_AR): $(LIB_OBJS)
diff --git a/rt/include/libc/ctype.h b/rt/include/libc/ctype.h
@@ -0,0 +1,23 @@
+/* ctype.h -- C11 7.4 -- Character classification and conversion
+ *
+ * Inputs are int and must be EOF (-1) or representable as unsigned char.
+ * Behaviour outside that range is undefined per the standard. */
+#ifndef CFREE_LIBC_CTYPE_H
+#define CFREE_LIBC_CTYPE_H
+
+int isalnum(int c);
+int isalpha(int c);
+int isblank(int c);
+int iscntrl(int c);
+int isdigit(int c);
+int isgraph(int c);
+int islower(int c);
+int isprint(int c);
+int ispunct(int c);
+int isspace(int c);
+int isupper(int c);
+int isxdigit(int c);
+int tolower(int c);
+int toupper(int c);
+
+#endif
diff --git a/rt/include/libc/errno.h b/rt/include/libc/errno.h
@@ -0,0 +1,55 @@
+/* errno.h -- C11 7.5 -- Macros reporting error conditions
+ *
+ * C11 only mandates EDOM/EILSEQ/ERANGE. Other names are POSIX.
+ *
+ * `errno` is platform-divergent: macOS uses `int *__error(void)`, glibc /
+ * musl use `int *__errno_location(void)`. We route through an
+ * accessor the libcfree_hosted shim provides, keeping the header itself
+ * ABI-neutral. */
+#ifndef CFREE_LIBC_ERRNO_H
+#define CFREE_LIBC_ERRNO_H
+
+extern int *__cfree_errno_location(void);
+#define errno (*__cfree_errno_location())
+
+/* C11 mandatory */
+#define EDOM 33
+#define EILSEQ 84
+#define ERANGE 34
+
+/* POSIX additions in common use. Values follow the Linux numbering;
+ * Darwin and Linux mostly agree below 35, diverge above. Programs that
+ * inspect numeric values across platforms should use strerror() instead. */
+#define EPERM 1
+#define ENOENT 2
+#define ESRCH 3
+#define EINTR 4
+#define EIO 5
+#define ENXIO 6
+#define E2BIG 7
+#define ENOEXEC 8
+#define EBADF 9
+#define ECHILD 10
+#define EAGAIN 11
+#define ENOMEM 12
+#define EACCES 13
+#define EFAULT 14
+#define EBUSY 16
+#define EEXIST 17
+#define EXDEV 18
+#define ENODEV 19
+#define ENOTDIR 20
+#define EISDIR 21
+#define EINVAL 22
+#define ENFILE 23
+#define EMFILE 24
+#define ENOTTY 25
+#define ETXTBSY 26
+#define EFBIG 27
+#define ENOSPC 28
+#define ESPIPE 29
+#define EROFS 30
+#define EMLINK 31
+#define EPIPE 32
+
+#endif
diff --git a/rt/include/libc/inttypes.h b/rt/include/libc/inttypes.h
@@ -0,0 +1,88 @@
+/* inttypes.h -- C11 7.8 -- Format conversion of integer types */
+#ifndef CFREE_LIBC_INTTYPES_H
+#define CFREE_LIBC_INTTYPES_H
+
+#include <stdint.h>
+
+typedef struct { intmax_t quot, rem; } imaxdiv_t;
+
+/* Width macros: cfree assumes int == 32-bit, long long == 64-bit, and
+ * intmax_t == long long. On LP64 targets long is also 64-bit; we pick the
+ * longer "ll" form for 64-bit conversions to keep the macros portable
+ * across LP64 and LLP64. */
+
+#define PRId8 "d"
+#define PRId16 "d"
+#define PRId32 "d"
+#define PRId64 "lld"
+
+#define PRIi8 "i"
+#define PRIi16 "i"
+#define PRIi32 "i"
+#define PRIi64 "lli"
+
+#define PRIo8 "o"
+#define PRIo16 "o"
+#define PRIo32 "o"
+#define PRIo64 "llo"
+
+#define PRIu8 "u"
+#define PRIu16 "u"
+#define PRIu32 "u"
+#define PRIu64 "llu"
+
+#define PRIx8 "x"
+#define PRIx16 "x"
+#define PRIx32 "x"
+#define PRIx64 "llx"
+
+#define PRIX8 "X"
+#define PRIX16 "X"
+#define PRIX32 "X"
+#define PRIX64 "llX"
+
+#define PRIdMAX "lld"
+#define PRIiMAX "lli"
+#define PRIoMAX "llo"
+#define PRIuMAX "llu"
+#define PRIxMAX "llx"
+#define PRIXMAX "llX"
+
+#define PRIdPTR "ld"
+#define PRIiPTR "li"
+#define PRIoPTR "lo"
+#define PRIuPTR "lu"
+#define PRIxPTR "lx"
+#define PRIXPTR "lX"
+
+#define SCNd8 "hhd"
+#define SCNd16 "hd"
+#define SCNd32 "d"
+#define SCNd64 "lld"
+
+#define SCNi8 "hhi"
+#define SCNi16 "hi"
+#define SCNi32 "i"
+#define SCNi64 "lli"
+
+#define SCNo8 "hho"
+#define SCNo16 "ho"
+#define SCNo32 "o"
+#define SCNo64 "llo"
+
+#define SCNu8 "hhu"
+#define SCNu16 "hu"
+#define SCNu32 "u"
+#define SCNu64 "llu"
+
+#define SCNx8 "hhx"
+#define SCNx16 "hx"
+#define SCNx32 "x"
+#define SCNx64 "llx"
+
+intmax_t imaxabs(intmax_t j);
+imaxdiv_t imaxdiv(intmax_t numer, intmax_t denom);
+intmax_t strtoimax(const char *nptr, char **endptr, int base);
+uintmax_t strtoumax(const char *nptr, char **endptr, int base);
+
+#endif
diff --git a/rt/include/libc/locale.h b/rt/include/libc/locale.h
@@ -0,0 +1,42 @@
+/* locale.h -- C11 7.11 -- Localization */
+#ifndef CFREE_LIBC_LOCALE_H
+#define CFREE_LIBC_LOCALE_H
+
+#define LC_ALL 0
+#define LC_COLLATE 1
+#define LC_CTYPE 2
+#define LC_MONETARY 3
+#define LC_NUMERIC 4
+#define LC_TIME 5
+
+struct lconv {
+ char *decimal_point;
+ char *thousands_sep;
+ char *grouping;
+ char *mon_decimal_point;
+ char *mon_thousands_sep;
+ char *mon_grouping;
+ char *positive_sign;
+ char *negative_sign;
+ char *currency_symbol;
+ char frac_digits;
+ char p_cs_precedes;
+ char n_cs_precedes;
+ char p_sep_by_space;
+ char n_sep_by_space;
+ char p_sign_posn;
+ char n_sign_posn;
+ char *int_curr_symbol;
+ char int_frac_digits;
+ char int_p_cs_precedes;
+ char int_n_cs_precedes;
+ char int_p_sep_by_space;
+ char int_n_sep_by_space;
+ char int_p_sign_posn;
+ char int_n_sign_posn;
+};
+
+char *setlocale(int category, const char *locale);
+struct lconv *localeconv(void);
+
+#endif
diff --git a/rt/include/libc/math.h b/rt/include/libc/math.h
@@ -0,0 +1,132 @@
+/* math.h -- C11 7.12 -- Mathematics
+ *
+ * INFINITY/NAN are defined as quotient expressions; the standard requires
+ * them to be representable as float and to evaluate to +inf / NaN under
+ * IEC 60559, which both forms do. They are not strict constant expressions
+ * in pedantic C11, but every conforming runtime accepts them. */
+#ifndef CFREE_LIBC_MATH_H
+#define CFREE_LIBC_MATH_H
+
+typedef float float_t;
+typedef double double_t;
+
+#define HUGE_VAL (1.0 / 0.0)
+#define HUGE_VALF (1.0f / 0.0f)
+#define HUGE_VALL (1.0L / 0.0L)
+#define INFINITY (1.0f / 0.0f)
+#define NAN (0.0f / 0.0f)
+
+#define FP_INFINITE 1
+#define FP_NAN 2
+#define FP_NORMAL 4
+#define FP_SUBNORMAL 3
+#define FP_ZERO 0
+
+#define MATH_ERRNO 1
+#define MATH_ERREXCEPT 2
+
+#define M_E 2.71828182845904523536
+#define M_LOG2E 1.44269504088896340736
+#define M_LOG10E 0.43429448190325182765
+#define M_LN2 0.69314718055994530942
+#define M_LN10 2.30258509299404568402
+#define M_PI 3.14159265358979323846
+#define M_PI_2 1.57079632679489661923
+#define M_PI_4 0.78539816339744830962
+#define M_SQRT2 1.41421356237309504880
+#define M_SQRT1_2 0.70710678118654752440
+
+/* Trigonometric */
+double acos(double x);
+double asin(double x);
+double atan(double x);
+double atan2(double y, double x);
+double cos(double x);
+double sin(double x);
+double tan(double x);
+
+float acosf(float x);
+float asinf(float x);
+float atanf(float x);
+float atan2f(float y, float x);
+float cosf(float x);
+float sinf(float x);
+float tanf(float x);
+
+/* Hyperbolic */
+double acosh(double x);
+double asinh(double x);
+double atanh(double x);
+double cosh(double x);
+double sinh(double x);
+double tanh(double x);
+
+/* Exponential and logarithmic */
+double exp(double x);
+double exp2(double x);
+double expm1(double x);
+double log(double x);
+double log10(double x);
+double log1p(double x);
+double log2(double x);
+double frexp(double x, int *exp);
+double ldexp(double x, int exp);
+double modf(double x, double *iptr);
+double scalbn(double x, int n);
+
+float expf(float x);
+float logf(float x);
+float log2f(float x);
+float log10f(float x);
+
+/* Power and absolute value */
+double cbrt(double x);
+double fabs(double x);
+double hypot(double x, double y);
+double pow(double x, double y);
+double sqrt(double x);
+
+float fabsf(float x);
+float powf(float x, float y);
+float sqrtf(float x);
+
+/* Error and gamma */
+double erf(double x);
+double erfc(double x);
+double lgamma(double x);
+double tgamma(double x);
+
+/* Nearest integer */
+double ceil(double x);
+double floor(double x);
+double nearbyint(double x);
+double rint(double x);
+double round(double x);
+double trunc(double x);
+long lrint(double x);
+long lround(double x);
+long long llrint(double x);
+long long llround(double x);
+
+float ceilf(float x);
+float floorf(float x);
+float roundf(float x);
+float truncf(float x);
+
+/* Remainder */
+double fmod(double x, double y);
+double remainder(double x, double y);
+
+/* Manipulation / classification */
+double copysign(double x, double y);
+double nan(const char *tagp);
+double nextafter(double x, double y);
+int fpclassify(double x);
+
+/* Maximum, minimum, positive difference, fused multiply-add */
+double fdim(double x, double y);
+double fmax(double x, double y);
+double fmin(double x, double y);
+double fma(double x, double y, double z);
+
+#endif
diff --git a/rt/include/libc/signal.h b/rt/include/libc/signal.h
@@ -0,0 +1,38 @@
+/* signal.h -- C11 7.14 -- Signal handling
+ *
+ * Signal numbers below follow the historical Unix layout (1..15 + SIGTERM)
+ * which both Darwin and Linux honour for the C11-mandated names. */
+#ifndef CFREE_LIBC_SIGNAL_H
+#define CFREE_LIBC_SIGNAL_H
+
+typedef int sig_atomic_t;
+
+typedef void (*__sighandler_t)(int);
+
+#define SIG_DFL ((__sighandler_t)0)
+#define SIG_ERR ((__sighandler_t)-1)
+#define SIG_IGN ((__sighandler_t)1)
+
+/* C11-mandated signal names */
+#define SIGABRT 6
+#define SIGFPE 8
+#define SIGILL 4
+#define SIGINT 2
+#define SIGSEGV 11
+#define SIGTERM 15
+
+/* Common POSIX additions */
+#define SIGHUP 1
+#define SIGQUIT 3
+#define SIGTRAP 5
+#define SIGKILL 9
+#define SIGBUS 10
+#define SIGPIPE 13
+#define SIGALRM 14
+#define SIGUSR1 30
+#define SIGUSR2 31
+
+int raise(int sig);
+__sighandler_t signal(int sig, __sighandler_t func);
+
+#endif
diff --git a/rt/include/libc/stdio.h b/rt/include/libc/stdio.h
@@ -0,0 +1,107 @@
+/* stdio.h -- C11 7.21 -- Input/output
+ *
+ * Declarations only. Bodies are provided by the platform's libc at link
+ * time (e.g. libSystem.B.dylib on macOS, libc.so on Linux). FILE is opaque:
+ * programs may only hold FILE* and pass it back to the runtime. */
+#ifndef CFREE_LIBC_STDIO_H
+#define CFREE_LIBC_STDIO_H
+
+#include <stdarg.h>
+#include <stddef.h>
+
+typedef struct FILE FILE;
+typedef long fpos_t; /* opaque enough for ftell-equivalents */
+
+#define EOF (-1)
+#define BUFSIZ 1024
+
+#define SEEK_SET 0
+#define SEEK_CUR 1
+#define SEEK_END 2
+
+#define FILENAME_MAX 4096
+#define FOPEN_MAX 16
+#define TMP_MAX 32
+#define L_tmpnam 32
+
+#define _IOFBF 0
+#define _IOLBF 1
+#define _IONBF 2
+
+/* stdin / stdout / stderr are platform-divergent under the hood: macOS's
+ * libSystem exports `__stdinp` / `__stdoutp` / `__stderrp`, while glibc /
+ * musl export `stdin` / `stdout` / `stderr` as data globals (frequently
+ * TLS). The libcfree_hosted shim per platform provides these accessors;
+ * the platform-specific aliasing stays out of the header so this file
+ * remains ABI-neutral. */
+extern FILE *__cfree_stdin(void);
+extern FILE *__cfree_stdout(void);
+extern FILE *__cfree_stderr(void);
+#define stdin (__cfree_stdin())
+#define stdout (__cfree_stdout())
+#define stderr (__cfree_stderr())
+
+/* Operations on files */
+int remove(const char *filename);
+int rename(const char *old_, const char *new_);
+FILE *tmpfile(void);
+char *tmpnam(char *s);
+
+/* File access */
+int fclose(FILE *stream);
+int fflush(FILE *stream);
+FILE *fopen(const char *filename, const char *mode);
+FILE *freopen(const char *filename, const char *mode,
+ FILE *stream);
+void setbuf(FILE *stream, char *buf);
+int setvbuf(FILE *stream, char *buf, int mode, size_t size);
+
+/* Formatted input/output */
+int fprintf(FILE *stream, const char *fmt, ...);
+int fscanf(FILE *stream, const char *fmt, ...);
+int printf(const char *fmt, ...);
+int scanf(const char *fmt, ...);
+int snprintf(char *s, size_t n, const char *fmt, ...);
+int sprintf(char *s, const char *fmt, ...);
+int sscanf(const char *s, const char *fmt, ...);
+int vfprintf(FILE *stream, const char *fmt, va_list ap);
+int vfscanf(FILE *stream, const char *fmt, va_list ap);
+int vprintf(const char *fmt, va_list ap);
+int vscanf(const char *fmt, va_list ap);
+int vsnprintf(char *s, size_t n, const char *fmt,
+ va_list ap);
+int vsprintf(char *s, const char *fmt, va_list ap);
+int vsscanf(const char *s, const char *fmt, va_list ap);
+
+/* Character input/output */
+int fgetc(FILE *stream);
+char *fgets(char *s, int n, FILE *stream);
+int fputc(int c, FILE *stream);
+int fputs(const char *s, FILE *stream);
+int getc(FILE *stream);
+int getchar(void);
+int putc(int c, FILE *stream);
+int putchar(int c);
+int puts(const char *s);
+int ungetc(int c, FILE *stream);
+
+/* Direct input/output */
+size_t fread(void *ptr, size_t size, size_t nmemb,
+ FILE *stream);
+size_t fwrite(const void *ptr, size_t size, size_t nmemb,
+ FILE *stream);
+
+/* File positioning */
+int fgetpos(FILE *stream, fpos_t *pos);
+int fseek(FILE *stream, long offset, int whence);
+int fsetpos(FILE *stream, const fpos_t *pos);
+long ftell(FILE *stream);
+void rewind(FILE *stream);
+
+/* Error-handling */
+void clearerr(FILE *stream);
+int feof(FILE *stream);
+int ferror(FILE *stream);
+void perror(const char *s);
+
+#endif
diff --git a/rt/include/libc/stdlib.h b/rt/include/libc/stdlib.h
@@ -0,0 +1,73 @@
+/* stdlib.h -- C11 7.22 -- General utilities */
+#ifndef CFREE_LIBC_STDLIB_H
+#define CFREE_LIBC_STDLIB_H
+
+#include <stddef.h>
+
+#define EXIT_SUCCESS 0
+#define EXIT_FAILURE 1
+#define RAND_MAX 0x7fffffff
+#define MB_CUR_MAX 4
+
+typedef struct { int quot, rem; } div_t;
+typedef struct { long quot, rem; } ldiv_t;
+typedef struct { long long quot, rem; } lldiv_t;
+
+/* Numeric conversion */
+double atof(const char *nptr);
+int atoi(const char *nptr);
+long atol(const char *nptr);
+long long atoll(const char *nptr);
+double strtod(const char *nptr, char **endptr);
+float strtof(const char *nptr, char **endptr);
+long double strtold(const char *nptr, char **endptr);
+long strtol(const char *nptr, char **endptr, int base);
+long long strtoll(const char *nptr, char **endptr, int base);
+unsigned long strtoul(const char *nptr, char **endptr,
+ int base);
+unsigned long long strtoull(const char *nptr, char **endptr,
+ int base);
+
+/* Pseudo-random */
+int rand(void);
+void srand(unsigned seed);
+
+/* Memory management */
+void *aligned_alloc(size_t alignment, size_t size);
+void *calloc(size_t nmemb, size_t size);
+void free(void *ptr);
+void *malloc(size_t size);
+void *realloc(void *ptr, size_t size);
+
+/* Program environment */
+void abort(void);
+int atexit(void (*func)(void));
+int at_quick_exit(void (*func)(void));
+void exit(int status);
+void _Exit(int status);
+void quick_exit(int status);
+char *getenv(const char *name);
+int system(const char *command);
+
+/* Searching and sorting */
+void *bsearch(const void *key, const void *base, size_t nmemb, size_t size,
+ int (*compar)(const void *, const void *));
+void qsort(void *base, size_t nmemb, size_t size,
+ int (*compar)(const void *, const void *));
+
+/* Integer arithmetic */
+int abs(int j);
+long labs(long j);
+long long llabs(long long j);
+div_t div(int numer, int denom);
+ldiv_t ldiv(long numer, long denom);
+lldiv_t lldiv(long long numer, long long denom);
+
+/* Multibyte (declarations only; cfree treats wchar_t as platform width) */
+int mblen(const char *s, size_t n);
+int mbtowc(wchar_t *pwc, const char *s, size_t n);
+int wctomb(char *s, wchar_t wc);
+size_t mbstowcs(wchar_t *pwcs, const char *s, size_t n);
+size_t wcstombs(char *s, const wchar_t *pwcs, size_t n);
+
+#endif
diff --git a/rt/include/libc/string.h b/rt/include/libc/string.h
@@ -0,0 +1,39 @@
+/* string.h -- C11 7.24 -- String handling */
+#ifndef CFREE_LIBC_STRING_H
+#define CFREE_LIBC_STRING_H
+
+#include <stddef.h>
+
+/* Copying */
+void *memcpy(void *dest, const void *src, size_t n);
+void *memmove(void *dest, const void *src, size_t n);
+char *strcpy(char *dest, const char *src);
+char *strncpy(char *dest, const char *src, size_t n);
+
+/* Concatenation */
+char *strcat(char *dest, const char *src);
+char *strncat(char *dest, const char *src, size_t n);
+
+/* Comparison */
+int memcmp(const void *s1, const void *s2, size_t n);
+int strcmp(const char *s1, const char *s2);
+int strcoll(const char *s1, const char *s2);
+int strncmp(const char *s1, const char *s2, size_t n);
+size_t strxfrm(char *dest, const char *src, size_t n);
+
+/* Search */
+void *memchr(const void *s, int c, size_t n);
+char *strchr(const char *s, int c);
+size_t strcspn(const char *s, const char *reject);
+char *strpbrk(const char *s, const char *accept);
+char *strrchr(const char *s, int c);
+size_t strspn(const char *s, const char *accept);
+char *strstr(const char *haystack, const char *needle);
+char *strtok(char *s, const char *delim);
+
+/* Miscellaneous */
+void *memset(void *s, int c, size_t n);
+char *strerror(int errnum);
+size_t strlen(const char *s);
+
+#endif
diff --git a/rt/include/libc/time.h b/rt/include/libc/time.h
@@ -0,0 +1,46 @@
+/* time.h -- C11 7.27 -- Date and time
+ *
+ * time_t and clock_t are integer types of unspecified width. We choose
+ * long for both, matching every modern POSIX/Darwin platform. */
+#ifndef CFREE_LIBC_TIME_H
+#define CFREE_LIBC_TIME_H
+
+#include <stddef.h>
+
+typedef long time_t;
+typedef long clock_t;
+
+struct timespec {
+ time_t tv_sec;
+ long tv_nsec;
+};
+
+struct tm {
+ int tm_sec;
+ int tm_min;
+ int tm_hour;
+ int tm_mday;
+ int tm_mon;
+ int tm_year;
+ int tm_wday;
+ int tm_yday;
+ int tm_isdst;
+};
+
+#define CLOCKS_PER_SEC 1000000L
+#define TIME_UTC 1
+
+clock_t clock(void);
+double difftime(time_t end, time_t beginning);
+time_t mktime(struct tm *tp);
+time_t time(time_t *arg);
+int timespec_get(struct timespec *ts, int base);
+
+char *asctime(const struct tm *tp);
+char *ctime(const time_t *timer);
+struct tm *gmtime(const time_t *timer);
+struct tm *localtime(const time_t *timer);
+size_t strftime(char *s, size_t maxsize, const char *format,
+ const struct tm *tp);
+
+#endif
diff --git a/rt/lib/cfree_hosted/macos.c b/rt/lib/cfree_hosted/macos.c
@@ -0,0 +1,34 @@
+/* libcfree_hosted -- macOS shim
+ *
+ * Bridges the ABI-neutral names declared in rt/include/libc/ to the
+ * actual symbols libSystem.B.dylib exports on Darwin. cfree compiles
+ * this file; the resulting object goes into libcfree_hosted_macos.a,
+ * which programs link in alongside -lSystem when they include any of
+ * the libc/ headers that name a platform-divergent symbol.
+ *
+ * The set is intentionally tiny -- only the symbols whose ABI names
+ * differ between Darwin and the Linux libcs. Functions like printf,
+ * malloc, memcpy share names everywhere and need no shim. */
+
+/* FILE is opaque in libc/stdio.h. Use the same incomplete-struct form
+ * here so the function signatures line up; we never dereference. */
+struct FILE;
+
+/* libSystem-exported globals on Darwin. Apple's <stdio.h> wraps these
+ * behind `#define stdin __stdinp` macros for the same reason this shim
+ * exists -- the on-disk symbol names diverged from the C-source names
+ * and never converged back. */
+extern struct FILE *__stdinp;
+extern struct FILE *__stdoutp;
+extern struct FILE *__stderrp;
+
+struct FILE *__cfree_stdin(void) { return __stdinp; }
+struct FILE *__cfree_stdout(void) { return __stdoutp; }
+struct FILE *__cfree_stderr(void) { return __stderrp; }
+
+/* On Darwin, errno is reached via __error() returning a thread-local int*.
+ * Linux/glibc/musl use __errno_location() with the same shape; the shim
+ * keeps the accessor name uniform. */
+extern int *__error(void);
+
+int *__cfree_errno_location(void) { return __error(); }
diff --git a/src/link/link_layout.c b/src/link/link_layout.c
@@ -171,6 +171,21 @@ static void resolve_symbols(Linker* l, LinkImage* img) {
const ObjSym* s = e.sym;
LinkSymbol rec;
LinkSymId existing;
+ /* Same prune as elf_emit / macho_emit: an extern declaration that
+ * the TU never relocated against is not a real linker input. The
+ * in-memory cc->link path skips the file emitter, so we apply the
+ * same filter here. The "logical undef" predicate (no section AND
+ * not SK_ABS/SK_COMMON) covers both `SK_UNDEF` (already-normalized
+ * by the readers) and the SK_FUNC/SK_OBJ-with-no-section shape the
+ * cgtarget mints for `extern` declarations. */
+ {
+ int is_logical_undef = (s->section_id == OBJ_SEC_NONE) &&
+ (s->kind != SK_ABS) && (s->kind != SK_COMMON);
+ if (is_logical_undef && !s->referenced &&
+ (s->bind == SB_GLOBAL || s->bind == SB_WEAK)) {
+ continue;
+ }
+ }
/* "Defined" means: not SK_UNDEF AND has a backing storage — a
* containing section, an absolute value, or COMMON reservation.
* cgtarget paths emit SK_FUNC / SK_OBJ for an `extern`
diff --git a/src/obj/elf_emit.c b/src/obj/elf_emit.c
@@ -389,6 +389,12 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) {
const ObjSym* s = e.sym;
int is_local = (s->bind == SB_LOCAL);
if ((pass == 0) != is_local) continue;
+ /* Prune unreferenced UNDEF externals — they came from header
+ * `extern` decls the TU never touched. See ObjSym::referenced. */
+ if (s->kind == SK_UNDEF && !s->referenced &&
+ (s->bind == SB_GLOBAL || s->bind == SB_WEAK)) {
+ continue;
+ }
u32 nlen;
const char* nm = sym_to_str(c, s->name, &nlen);
u32 nameoff = nlen ? strtab_add(&strtab, nm, nlen) : 0;
diff --git a/src/obj/elf_read.c b/src/obj/elf_read.c
@@ -413,6 +413,7 @@ ObjBuilder* read_elf(Compiler* c, const char* name, const u8* data,
ObjSymId id =
obj_symbol_ex(ob, sn, (SymBind)bind, (SymVis)vis, (SymKind)kind,
sec_id, value, st_size, cmnalign);
+ obj_sym_mark_referenced(ob, id);
sym_elf_to_obj[i] = id;
}
}
@@ -678,8 +679,11 @@ ObjBuilder* read_elf_dso(Compiler* c, const char* name, const u8* data,
/* DSO exports land as defined symbols in OBJ_SEC_NONE with
* value=0. The consumer treats them as imports — see
* resolve_undefs in src/link/link_layout.c. */
- obj_symbol_ex(ob, sn, (SymBind)bind, (SymVis)vis, (SymKind)kind,
- OBJ_SEC_NONE, 0, 0, 0);
+ {
+ ObjSymId did = obj_symbol_ex(ob, sn, (SymBind)bind, (SymVis)vis,
+ (SymKind)kind, OBJ_SEC_NONE, 0, 0, 0);
+ obj_sym_mark_referenced(ob, did);
+ }
}
obj_finalize(ob);
diff --git a/src/obj/macho_emit.c b/src/obj/macho_emit.c
@@ -338,6 +338,14 @@ void emit_macho(Compiler* c, ObjBuilder* ob, Writer* w) {
int want = (pass == 0 && local) || (pass == 1 && extdef) ||
(pass == 2 && undef);
if (!want) continue;
+ /* Prune unreferenced UNDEF externals: the C frontend mints an
+ * ObjSym for every header-supplied `extern` declaration whether
+ * or not the TU references it. obj_reloc_ex flags the ones we
+ * actually depend on; the rest never reach the output symtab. */
+ if (undef && !s->referenced &&
+ (s->bind == SB_GLOBAL || s->bind == SB_WEAK)) {
+ continue;
+ }
MSym* ms = &msyms[nmsyms];
ms->obj_id = e.id;
@@ -435,7 +443,9 @@ void emit_macho(Compiler* c, ObjBuilder* ob, Writer* w) {
}
/* Re-derive without the common fudge by counting partition pass: we
* already wrote them in (locals,extdefs,undefs) order, so the prefix
- * counts are just the per-pass counts. */
+ * counts are just the per-pass counts. Mirror the spurious-UNDEF
+ * prune from the emit loop above so the LC_DYSYMTAB index counts
+ * line up with the symbols we actually wrote. */
nlocals = 0;
nextdefs = 0;
nundefs = 0;
@@ -444,7 +454,12 @@ void emit_macho(Compiler* c, ObjBuilder* ob, Writer* w) {
ObjSymEntry e;
while (obj_symiter_next(it, &e)) {
const ObjSym* s = e.sym;
- if (sym_is_undef(s))
+ int undef = sym_is_undef(s);
+ if (undef && !s->referenced &&
+ (s->bind == SB_GLOBAL || s->bind == SB_WEAK)) {
+ continue;
+ }
+ if (undef)
++nundefs;
else if (sym_is_extdef(s))
++nextdefs;
diff --git a/src/obj/macho_read.c b/src/obj/macho_read.c
@@ -294,6 +294,7 @@ ObjBuilder* read_macho(Compiler* c, const char* name, const u8* data,
ObjSymId id = obj_symbol_ex(ob, sn, (SymBind)bind, (SymVis)vis,
(SymKind)kind, sec_id, value, size, cmnalign);
+ obj_sym_mark_referenced(ob, id);
/* n_desc carries Mach-O attribute bits beyond what bind/vis/kind
* model — N_NO_DEAD_STRIP, N_REF_TO_WEAK, N_ARM_THUMB_DEF, etc.
* Mask off the bits we already round-trip via bind (N_WEAK_DEF /
@@ -525,7 +526,11 @@ ObjBuilder* read_macho_dso(Compiler* c, const char* name, const u8* data,
* default to NOTYPE. The consuming linker uses dso_export_is_func
* to peek at this for ELF; for Mach-O the `imported` decision flows
* through synthetic __got / __stubs regardless of kind. */
- obj_symbol_ex(ob, sn, bind, SV_DEFAULT, kind, OBJ_SEC_NONE, 0, 0, 0);
+ {
+ ObjSymId did =
+ obj_symbol_ex(ob, sn, bind, SV_DEFAULT, kind, OBJ_SEC_NONE, 0, 0, 0);
+ obj_sym_mark_referenced(ob, did);
+ }
}
obj_finalize(ob);
diff --git a/src/obj/obj.c b/src/obj/obj.c
@@ -251,6 +251,16 @@ void obj_reloc_ex(ObjBuilder* ob, ObjSecId section_id, u32 offset,
r->pair = (u8)pair;
r->sym = sym;
r->addend = addend;
+ /* Any reloc against this symbol is enough to retain it through the
+ * emit-time UNDEF prune. See ObjSym::referenced. */
+ obj_sym_mark_referenced(ob, sym);
+}
+
+void obj_sym_mark_referenced(ObjBuilder* ob, ObjSymId id) {
+ ObjSym* s;
+ if (id == OBJ_SYM_NONE) return;
+ s = Symbols_at(&ob->symbols, id);
+ if (s) s->referenced = 1;
}
ObjGroupId obj_group(ObjBuilder* ob, Sym name, ObjSymId signature, u32 flags) {
diff --git a/src/obj/obj.h b/src/obj/obj.h
@@ -252,6 +252,21 @@ typedef struct ObjSym {
u64 value; /* offset within section, or absolute */
u64 size;
u64 common_align; /* nonzero for SK_COMMON */
+ /* Lifecycle gate for spurious-UNDEF pruning at .o emit time.
+ *
+ * The C frontend mints an ObjSym for every `extern` declaration it
+ * parses (so a header like <stdio.h> creates 50+ ObjSyms in one TU).
+ * Most of those are never the target of any relocation. `referenced`
+ * tracks that distinction: obj_reloc_ex sets it on the target, and
+ * the file emitters (elf_emit / macho_emit) drop entries that are
+ * still SK_UNDEF + (SB_GLOBAL|SB_WEAK) + !referenced from the output
+ * symbol table.
+ *
+ * Definitions never need the gate — kind != SK_UNDEF for those, so
+ * the filter never considers them. Readers (elf_read, macho_read)
+ * mark every read-in symbol referenced=1 so a roundtrip preserves
+ * UNDEFs that came from another tool's output. */
+ u8 referenced;
} ObjSym;
typedef struct ObjGroup {
@@ -321,6 +336,12 @@ void obj_reloc(ObjBuilder*, ObjSecId section_id, u32 offset, RelocKind,
void obj_reloc_ex(ObjBuilder*, ObjSecId section_id, u32 offset, RelocKind,
ObjSymId sym, i64 addend, int explicit_addend, int pair);
+/* Force ObjSym::referenced = 1 on the named symbol. obj_reloc_ex calls this
+ * automatically; the readers (elf_read / macho_read) call it on every
+ * ingested symbol so a roundtrip preserves UNDEFs that another tool
+ * emitted into the input. */
+void obj_sym_mark_referenced(ObjBuilder*, ObjSymId);
+
ObjGroupId obj_group(ObjBuilder*, Sym name, ObjSymId signature, u32 flags);
void obj_group_add_section(ObjBuilder*, ObjGroupId group_id,
ObjSecId section_id);
diff --git a/src/pp/pp.c b/src/pp/pp.c
@@ -2682,6 +2682,123 @@ static void pp_register_static_predefined(Pp* pp) {
pp_define(pp, "__ATOMIC_SEQ_CST", "5");
}
+/* Target-dependent predefined macros consumed by rt/include/stddef.h and
+ * rt/include/stdint.h. The set mirrors the subset of GCC/Clang's __*_TYPE__
+ * / __*_MAX__ namespace that those headers reference. We split only on
+ * pointer width: ptr_size == 8 picks the LP64 model (every 64-bit target
+ * cfree supports), ptr_size == 4 picks ILP32. LLP64 (Windows x86-64) is
+ * not yet a supported target, so `long` always tracks pointer width here. */
+static void pp_register_target_predefined(Pp* pp) {
+ int lp64 = (pp->c->target.ptr_size == 8);
+
+ /* stddef.h base aliases */
+ pp_define(pp, "__SIZE_TYPE__", lp64 ? "unsigned long" : "unsigned int");
+ pp_define(pp, "__PTRDIFF_TYPE__", lp64 ? "long" : "int");
+ pp_define(pp, "__WCHAR_TYPE__", "int");
+
+ /* stdint.h exact-width aliases (widths <= 32 are model-independent) */
+ pp_define(pp, "__INT8_TYPE__", "signed char");
+ pp_define(pp, "__INT16_TYPE__", "short");
+ pp_define(pp, "__INT32_TYPE__", "int");
+ pp_define(pp, "__UINT8_TYPE__", "unsigned char");
+ pp_define(pp, "__UINT16_TYPE__", "unsigned short");
+ pp_define(pp, "__UINT32_TYPE__", "unsigned int");
+ pp_define(pp, "__INT64_TYPE__", lp64 ? "long" : "long long");
+ pp_define(pp, "__UINT64_TYPE__",
+ lp64 ? "unsigned long" : "unsigned long long");
+
+ /* Least-width == exact-width on every target cfree knows about */
+ pp_define(pp, "__INT_LEAST8_TYPE__", "signed char");
+ pp_define(pp, "__INT_LEAST16_TYPE__", "short");
+ pp_define(pp, "__INT_LEAST32_TYPE__", "int");
+ pp_define(pp, "__UINT_LEAST8_TYPE__", "unsigned char");
+ pp_define(pp, "__UINT_LEAST16_TYPE__", "unsigned short");
+ pp_define(pp, "__UINT_LEAST32_TYPE__", "unsigned int");
+ pp_define(pp, "__INT_LEAST64_TYPE__", lp64 ? "long" : "long long");
+ pp_define(pp, "__UINT_LEAST64_TYPE__",
+ lp64 ? "unsigned long" : "unsigned long long");
+
+ /* Fast types: fast8 stays at `signed char`; fast16/32/64 widen to the
+ * register-width integer so the operation fits in a single instruction. */
+ pp_define(pp, "__INT_FAST8_TYPE__", "signed char");
+ pp_define(pp, "__UINT_FAST8_TYPE__", "unsigned char");
+ pp_define(pp, "__INT_FAST8_MAX__", "127");
+ pp_define(pp, "__UINT_FAST8_MAX__", "255");
+ if (lp64) {
+ pp_define(pp, "__INT_FAST16_TYPE__", "long");
+ pp_define(pp, "__INT_FAST32_TYPE__", "long");
+ pp_define(pp, "__INT_FAST64_TYPE__", "long");
+ pp_define(pp, "__UINT_FAST16_TYPE__", "unsigned long");
+ pp_define(pp, "__UINT_FAST32_TYPE__", "unsigned long");
+ pp_define(pp, "__UINT_FAST64_TYPE__", "unsigned long");
+ pp_define(pp, "__INT_FAST16_MAX__", "9223372036854775807L");
+ pp_define(pp, "__INT_FAST32_MAX__", "9223372036854775807L");
+ pp_define(pp, "__INT_FAST64_MAX__", "9223372036854775807L");
+ pp_define(pp, "__UINT_FAST16_MAX__", "18446744073709551615UL");
+ pp_define(pp, "__UINT_FAST32_MAX__", "18446744073709551615UL");
+ pp_define(pp, "__UINT_FAST64_MAX__", "18446744073709551615UL");
+ } else {
+ pp_define(pp, "__INT_FAST16_TYPE__", "int");
+ pp_define(pp, "__INT_FAST32_TYPE__", "int");
+ pp_define(pp, "__INT_FAST64_TYPE__", "long long");
+ pp_define(pp, "__UINT_FAST16_TYPE__", "unsigned int");
+ pp_define(pp, "__UINT_FAST32_TYPE__", "unsigned int");
+ pp_define(pp, "__UINT_FAST64_TYPE__", "unsigned long long");
+ pp_define(pp, "__INT_FAST16_MAX__", "2147483647");
+ pp_define(pp, "__INT_FAST32_MAX__", "2147483647");
+ pp_define(pp, "__INT_FAST64_MAX__", "9223372036854775807LL");
+ pp_define(pp, "__UINT_FAST16_MAX__", "4294967295U");
+ pp_define(pp, "__UINT_FAST32_MAX__", "4294967295U");
+ pp_define(pp, "__UINT_FAST64_MAX__", "18446744073709551615ULL");
+ }
+
+ /* Pointer-holding integers + ptrdiff/size maxes */
+ if (lp64) {
+ pp_define(pp, "__INTPTR_TYPE__", "long");
+ pp_define(pp, "__UINTPTR_TYPE__", "unsigned long");
+ pp_define(pp, "__INTPTR_MAX__", "9223372036854775807L");
+ pp_define(pp, "__UINTPTR_MAX__", "18446744073709551615UL");
+ pp_define(pp, "__PTRDIFF_MAX__", "9223372036854775807L");
+ pp_define(pp, "__SIZE_MAX__", "18446744073709551615UL");
+ } else {
+ pp_define(pp, "__INTPTR_TYPE__", "int");
+ pp_define(pp, "__UINTPTR_TYPE__", "unsigned int");
+ pp_define(pp, "__INTPTR_MAX__", "2147483647");
+ pp_define(pp, "__UINTPTR_MAX__", "4294967295U");
+ pp_define(pp, "__PTRDIFF_MAX__", "2147483647");
+ pp_define(pp, "__SIZE_MAX__", "4294967295U");
+ }
+
+ /* Greatest-width integers + matching _C() suffix macros */
+ if (lp64) {
+ pp_define(pp, "__INTMAX_TYPE__", "long");
+ pp_define(pp, "__UINTMAX_TYPE__", "unsigned long");
+ pp_define(pp, "__INTMAX_MAX__", "9223372036854775807L");
+ pp_define(pp, "__UINTMAX_MAX__", "18446744073709551615UL");
+ pp_define(pp, "__INT64_C(c)", "c ## L");
+ pp_define(pp, "__UINT64_C(c)", "c ## UL");
+ pp_define(pp, "__INTMAX_C(c)", "c ## L");
+ pp_define(pp, "__UINTMAX_C(c)", "c ## UL");
+ } else {
+ pp_define(pp, "__INTMAX_TYPE__", "long long");
+ pp_define(pp, "__UINTMAX_TYPE__", "unsigned long long");
+ pp_define(pp, "__INTMAX_MAX__", "9223372036854775807LL");
+ pp_define(pp, "__UINTMAX_MAX__", "18446744073709551615ULL");
+ pp_define(pp, "__INT64_C(c)", "c ## LL");
+ pp_define(pp, "__UINT64_C(c)", "c ## ULL");
+ pp_define(pp, "__INTMAX_C(c)", "c ## LL");
+ pp_define(pp, "__UINTMAX_C(c)", "c ## ULL");
+ }
+
+ /* wchar_t / wint_t / sig_atomic_t are all `int` in cfree's model */
+ pp_define(pp, "__WCHAR_MAX__", "2147483647");
+ pp_define(pp, "__WCHAR_MIN__", "(-__WCHAR_MAX__ - 1)");
+ pp_define(pp, "__WINT_MAX__", "2147483647");
+ pp_define(pp, "__WINT_MIN__", "(-__WINT_MAX__ - 1)");
+ pp_define(pp, "__SIG_ATOMIC_MAX__", "2147483647");
+ pp_define(pp, "__SIG_ATOMIC_MIN__", "(-__SIG_ATOMIC_MAX__ - 1)");
+}
+
Pp* pp_new(Compiler* c) {
Heap* h = (Heap*)c->env->heap;
Pp* pp = (Pp*)h->alloc(h, sizeof(*pp), _Alignof(Pp));
@@ -2700,6 +2817,7 @@ Pp* pp_new(Compiler* c) {
pp_intern_keywords(pp);
compute_date_time(pp);
pp_register_static_predefined(pp);
+ pp_register_target_predefined(pp);
return pp;
}
diff --git a/test/libc/cases/10_fprintf_streams.c b/test/libc/cases/10_fprintf_streams.c
@@ -0,0 +1,14 @@
+/* fprintf to stdout / stderr. Exercises the libcfree_hosted accessor
+ * macros for stdin/stdout/stderr -- a stock `#define stdout (...)` route
+ * that all three host libcs (Darwin libSystem, glibc, musl) reach via
+ * different underlying symbols. */
+
+#include <stdio.h>
+
+int main(void) {
+ fprintf(stderr, "from stderr\n");
+ fprintf(stdout, "from stdout\n");
+ fflush(stdout);
+ fflush(stderr);
+ return 0;
+}
diff --git a/test/libc/cases/10_fprintf_streams.stdout b/test/libc/cases/10_fprintf_streams.stdout
@@ -0,0 +1 @@
+from stdout
diff --git a/test/libc/cases/11_strings.c b/test/libc/cases/11_strings.c
@@ -0,0 +1,22 @@
+/* string.h surface: strlen / strcmp / strchr / strstr / memcpy / memset. */
+
+#include <stdio.h>
+#include <string.h>
+
+int main(void) {
+ const char *s = "hello, world";
+ if (strlen(s) != 12) return 1;
+ if (strcmp(s, "hello, world") != 0) return 2;
+ if (strcmp(s, "hello") <= 0) return 3;
+ if (strchr(s, ',') != s + 5) return 4;
+ if (strstr(s, "world") != s + 7) return 5;
+
+ char buf[16];
+ memset(buf, '#', sizeof(buf));
+ memcpy(buf, "hi", 2);
+ buf[2] = 0;
+ if (strcmp(buf, "hi") != 0) return 6;
+
+ printf("strings ok\n");
+ return 0;
+}
diff --git a/test/libc/cases/11_strings.stdout b/test/libc/cases/11_strings.stdout
@@ -0,0 +1 @@
+strings ok
diff --git a/test/libc/cases/12_stdlib_convert.c b/test/libc/cases/12_stdlib_convert.c
@@ -0,0 +1,16 @@
+/* stdlib.h numeric conversion + abs. */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(void) {
+ if (atoi("42") != 42) return 1;
+ if (atoi("-7") != -7) return 2;
+ if (strtol("0xcafe", NULL, 16) != 0xcafe) return 3;
+ if (strtol("777", NULL, 8) != 511) return 4;
+ if (abs(-13) != 13) return 5;
+ if (labs(-1234567890L) != 1234567890L) return 6;
+
+ printf("stdlib ok\n");
+ return 0;
+}
diff --git a/test/libc/cases/12_stdlib_convert.stdout b/test/libc/cases/12_stdlib_convert.stdout
@@ -0,0 +1 @@
+stdlib ok
diff --git a/test/libc/cases/13_malloc.c b/test/libc/cases/13_malloc.c
@@ -0,0 +1,16 @@
+/* malloc / free + snprintf into the buffer. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int main(void) {
+ char *buf = malloc(64);
+ if (!buf) return 1;
+ int n = snprintf(buf, 64, "alloc %d bytes", 64);
+ if (n != 14) return 2;
+ if (strcmp(buf, "alloc 64 bytes") != 0) return 3;
+ free(buf);
+ printf("malloc ok\n");
+ return 0;
+}
diff --git a/test/libc/cases/13_malloc.stdout b/test/libc/cases/13_malloc.stdout
@@ -0,0 +1 @@
+malloc ok
diff --git a/test/libc/cases/14_errno_fopen.c b/test/libc/cases/14_errno_fopen.c
@@ -0,0 +1,17 @@
+/* errno + strerror via a deliberately failing fopen. errno's numeric
+ * value for ENOENT differs across platforms but strerror's text contains
+ * the canonical phrase, so we substring-match the human message. */
+
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+
+int main(void) {
+ FILE *f = fopen("/cfree/no/such/path/at/all", "r");
+ if (f) return 1;
+ if (errno == 0) return 2;
+ const char *msg = strerror(errno);
+ if (!msg || !*msg) return 3;
+ printf("fopen failed: %s\n", msg);
+ return 0;
+}
diff --git a/test/libc/cases/14_errno_fopen.stdout b/test/libc/cases/14_errno_fopen.stdout
@@ -0,0 +1 @@
+fopen failed:
diff --git a/test/libc/cases/15_ctype.c b/test/libc/cases/15_ctype.c
@@ -0,0 +1,16 @@
+/* ctype.h classifiers + tolower/toupper. */
+
+#include <stdio.h>
+#include <ctype.h>
+
+int main(void) {
+ if (!isdigit('7') || isdigit('a')) return 1;
+ if (!isalpha('Q') || isalpha('5')) return 2;
+ if (!isspace(' ') || isspace('x')) return 3;
+ if (!isupper('Z') || isupper('z')) return 4;
+ if (!islower('z') || islower('Z')) return 5;
+ if (tolower('Z') != 'z') return 6;
+ if (toupper('z') != 'Z') return 7;
+ printf("ctype ok\n");
+ return 0;
+}
diff --git a/test/libc/cases/15_ctype.stdout b/test/libc/cases/15_ctype.stdout
@@ -0,0 +1 @@
+ctype ok
diff --git a/test/libc/run.sh b/test/libc/run.sh
@@ -0,0 +1,183 @@
+#!/usr/bin/env bash
+# test/libc/run.sh -- exercise cfree's rt/include/libc headers + the
+# per-OS libcfree_hosted shim on the host. Each case is compiled by
+# cfree-cc against the libc header set, linked against the platform's
+# C library plus the hosted shim, and executed on the host.
+#
+# This sits parallel to test/libc/{musl,glibc}/run.sh: those run inside
+# containers against extracted Linux sysroots; this one targets the
+# host directly. macOS is the only host wired today; other hosts skip.
+#
+# Each case file may carry:
+# <name>.expected -- numeric exit code, default 0
+# <name>.stdout -- exact-substring match against captured stdout
+#
+# Cases that aren't part of the host-libc surface (raw-syscall, unistd.h)
+# are filtered out by name. Run with CFREE_LIBC_KEEP=1 to leave
+# intermediates in build/host-libc/<case>/.
+
+set -u
+
+ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
+CASES_DIR="$ROOT/test/libc/cases"
+BUILD_DIR="$ROOT/build/host-libc"
+CFREE="$ROOT/build/cfree"
+
+color_red() { printf '\033[31m%s\033[0m' "$1"; }
+color_grn() { printf '\033[32m%s\033[0m' "$1"; }
+color_yel() { printf '\033[33m%s\033[0m' "$1"; }
+
+note_skip() { printf ' %s %s -- %s\n' "$(color_yel SKIP)" "$1" "$2"; }
+note_fail() { printf ' %s %s\n' "$(color_red FAIL)" "$1"; }
+note_pass() { printf ' %s %s\n' "$(color_grn PASS)" "$1"; }
+
+# Host-platform setup. Each host needs:
+# - HOSTED_OBJ -- cfree-hosted shim object for stdin/stdout/stderr/errno
+# - LDFLAGS -- extra link flags (e.g. -L $SDK/usr/lib)
+# - LDLIBS -- link arguments after the source (e.g. -lSystem)
+# - TARGET -- cfree-cc -target triple
+HOSTED_OBJ=""
+LDFLAGS=()
+LDLIBS=()
+TARGET=""
+
+uname_s="$(uname -s 2>/dev/null || echo unknown)"
+case "$uname_s" in
+ Darwin)
+ arch_raw="$(uname -m 2>/dev/null || true)"
+ case "$arch_raw" in
+ arm64|aarch64) TARGET="aarch64-darwin" ;;
+ x86_64) TARGET="x86_64-darwin" ;;
+ *) printf 'unknown Darwin arch: %s\n' "$arch_raw" >&2; exit 2 ;;
+ esac
+ if ! command -v xcrun >/dev/null 2>&1; then
+ printf 'xcrun missing -- Xcode CLT required\n' >&2
+ exit 2
+ fi
+ SDK="$(xcrun --show-sdk-path 2>/dev/null || true)"
+ if [ -z "$SDK" ] || [ ! -d "$SDK" ]; then
+ printf 'xcrun --show-sdk-path failed\n' >&2
+ exit 2
+ fi
+ HOSTED_OBJ="$ROOT/build/cfree_hosted/macos.o"
+ LDFLAGS=(-L "$SDK/usr/lib")
+ LDLIBS=(-lSystem)
+ ;;
+ Linux)
+ note_skip "all" "host-libc shim for Linux not wired yet"
+ exit 0
+ ;;
+ *)
+ note_skip "all" "host-libc not supported on $uname_s"
+ exit 0
+ ;;
+esac
+
+if [ ! -x "$CFREE" ]; then
+ printf 'cfree driver missing at %s -- run `make` first\n' "$CFREE" >&2
+ exit 2
+fi
+if [ ! -f "$HOSTED_OBJ" ]; then
+ printf 'hosted shim missing at %s -- run `make hosted-macos`\n' \
+ "$HOSTED_OBJ" >&2
+ exit 2
+fi
+
+mkdir -p "$BUILD_DIR"
+
+# Cases under test/libc/cases/ are shared with the musl/glibc runners,
+# which test surface (raw syscalls via inline asm, <unistd.h>) we don't
+# ship in rt/include/libc. Skip those by base-name; only files matching
+# our explicit allowlist run.
+case_supported() {
+ case "$1" in
+ 03_printf_hello|10_*|11_*|12_*|13_*|14_*|15_*) return 0 ;;
+ *) return 1 ;;
+ esac
+}
+
+PASS=0
+FAIL=0
+SKIP=0
+FAIL_NAMES=()
+
+shopt -s nullglob
+
+printf 'Running host-libc cases (%s)...\n' "$TARGET"
+
+for src in "$CASES_DIR"/*.c; do
+ name="$(basename "$src" .c)"
+ if ! case_supported "$name"; then
+ note_skip "$name" "not in host-libc surface (uses unistd/raw syscall)"
+ SKIP=$((SKIP + 1))
+ continue
+ fi
+
+ work="$BUILD_DIR/$name"
+ mkdir -p "$work"
+
+ expected=0
+ [ -f "$CASES_DIR/${name}.expected" ] && \
+ expected="$(tr -d '[:space:]' < "$CASES_DIR/${name}.expected")"
+
+ expect_stdout=""
+ [ -f "$CASES_DIR/${name}.stdout" ] && \
+ expect_stdout="$(cat "$CASES_DIR/${name}.stdout")"
+
+ exe="$work/${name}.exe"
+ if ! "$CFREE" cc -target "$TARGET" \
+ -isystem "$ROOT/rt/include/libc" \
+ -isystem "$ROOT/rt/include" \
+ -e main \
+ "${LDFLAGS[@]}" -o "$exe" "$src" "$HOSTED_OBJ" "${LDLIBS[@]}" \
+ >"$work/build.out" 2>"$work/build.err"; then
+ note_fail "$name (build)"
+ sed 's/^/ | /' "$work/build.err" | head -10
+ FAIL=$((FAIL + 1))
+ FAIL_NAMES+=("$name (build)")
+ continue
+ fi
+ chmod +x "$exe" 2>/dev/null
+
+ "$exe" >"$work/run.out" 2>"$work/run.err"
+ rc=$?
+
+ if [ "$rc" -ne "$expected" ]; then
+ note_fail "$name (rc=$rc, want $expected)"
+ [ -s "$work/run.err" ] && sed 's/^/ err| /' "$work/run.err" | head -5
+ [ -s "$work/run.out" ] && sed 's/^/ out| /' "$work/run.out" | head -5
+ FAIL=$((FAIL + 1))
+ FAIL_NAMES+=("$name (rc)")
+ continue
+ fi
+
+ if [ -n "$expect_stdout" ]; then
+ if ! grep -qF -- "$expect_stdout" "$work/run.out"; then
+ note_fail "$name (stdout mismatch)"
+ printf ' expected substring:\n'
+ printf '%s\n' "$expect_stdout" | sed 's/^/ | /'
+ printf ' got:\n'
+ sed 's/^/ | /' "$work/run.out" | head -10
+ FAIL=$((FAIL + 1))
+ FAIL_NAMES+=("$name (stdout)")
+ continue
+ fi
+ fi
+
+ note_pass "$name"
+ PASS=$((PASS + 1))
+done
+
+if [ "$FAIL" -gt 0 ]; then
+ printf '\nFailed:\n'
+ for n in "${FAIL_NAMES[@]}"; do printf ' %s\n' "$n"; done
+fi
+
+printf '\nResults: %s pass, %s fail, %s skip\n' "$PASS" "$FAIL" "$SKIP"
+
+# Keep build dir unless asked.
+if [ -z "${CFREE_LIBC_KEEP:-}" ]; then
+ rm -rf "$BUILD_DIR"
+fi
+
+exit "$FAIL"
diff --git a/test/test.mk b/test/test.mk
@@ -24,7 +24,7 @@
# against the public cfree.h surface; reuses cfree-roundtrip,
# link-exe-runner, and jit-runner.
-.PHONY: test test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg test-dwarf test-debug test-parse test-parse-err test-musl test-glibc test-lib-deps test-smoke-x64 test-smoke-rv64
+.PHONY: test test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg test-dwarf test-debug test-parse test-parse-err test-libc test-musl test-glibc test-lib-deps test-smoke-x64 test-smoke-rv64
test: test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg test-dwarf test-debug test-parse test-parse-err test-lib-deps
@@ -187,6 +187,14 @@ test-musl: bin rt-aarch64-linux $(MUSL_SYSROOT_MARKER)
test-glibc: bin rt-aarch64-linux $(GLIBC_SYSROOT_MARKER)
@bash test/libc/glibc/run.sh
+# test-libc: end-to-end host-libc tests. Compiles each test/libc/cases/
+# case with cfree-cc against rt/include/libc, links it against the host's
+# C library plus the libcfree_hosted shim, and executes on the host.
+# macOS is the only host wired today; other hosts skip with a SKIP report.
+# Excluded from default `test` until non-Darwin hosts get a shim.
+test-libc: bin hosted-macos
+ @bash test/libc/run.sh
+
# Fail if libcfree.a depends on any external symbol not in the allowlist.
# Drift in either direction (new dep, or stale entry) is a failure.
LIB_DEPS_ACTUAL = build/libcfree.deps.txt