commit 7bb17ff74249cc529f25e34b5b31b0e5cb3f520e
parent 7c7a63e8d3974ae74e72eecf861f92915a74b352
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 4 May 2026 08:08:19 -0700
mes-libc: track `l` modifier in printf so plain %d/%c reads int
vfprintf and vsnprintf read every integer / char variadic via
va_arg(ap, long). On amd64 SysV an int arg is stored as a 32-bit
write into an 8-byte reg-save slot — upper bits unspecified, and tcc
doesn't sign-extend ints into the slot. Reading as long then leaks
the garbage upper bits, so printf("%d", -42) prints 4294967254
(= 0x00000000_FFFFFFD6). Track the `l` length modifier through the
spec parse and dispatch the va_arg type from there: int / unsigned int
for plain %d/%i/%o vs %u/%x/%X, long only when `l` was seen. %c also
moves to va_arg(ap, int) (char promotes to int in varargs). aarch64 /
riscv64 worked by accident — their ABIs / va_list lowerings extend
int slots — but the fix is correct everywhere. Locked in by
tests/cc-libc/18-printf-int-promo, which covers INT_MIN, %u with bit
31 set, %ld, and %c alongside the original 17-atoi case.
Diffstat:
7 files changed, 199 insertions(+), 0 deletions(-)
diff --git a/scripts/libc-flatten.sh b/scripts/libc-flatten.sh
@@ -143,6 +143,20 @@ apply_simple_patch \
"$STAGE/stdio/snprintf.c" \
"$PATCHES/snprintf-mes-varargs.before" \
"$PATCHES/snprintf-mes-varargs.after"
+# stdio/vfprintf.c and stdio/vsnprintf.c read every integer / char
+# variadic via `va_arg(ap, long)`. On amd64 SysV an `int` arg occupies
+# an 8-byte reg-save slot whose upper 32 bits are unspecified — tcc's
+# codegen (and most other compilers') doesn't sign-extend ints into
+# the slot. Reading as `long` then leaks the garbage upper bits. Track
+# the `l` length modifier and dispatch the va_arg type accordingly.
+apply_simple_patch \
+ "$STAGE/stdio/vfprintf.c" \
+ "$PATCHES/printf-int-promo.before" \
+ "$PATCHES/printf-int-promo.after"
+apply_simple_patch \
+ "$STAGE/stdio/vsnprintf.c" \
+ "$PATCHES/vsnprintf-int-promo.before" \
+ "$PATCHES/vsnprintf-int-promo.after"
# --- (3) flatten via host preprocessor --------------------------------
HOST_CC=${HOST_CC:-cc}
diff --git a/tests/cc-libc/18-printf-int-promo.c b/tests/cc-libc/18-printf-int-promo.c
@@ -0,0 +1,28 @@
+/* printf %d / %c / %u with plain `int` args. amd64 SysV stores int
+ * varargs as a 32-bit write into an 8-byte reg-save slot — upper bits
+ * are unspecified (tcc does not sign-extend ints into the slot).
+ * mes-libc's vfprintf used to read every integer spec via
+ * `va_arg(ap, long)`, so the upper bits leaked: -42 printed as
+ * 4294967254 (= 0x00000000_FFFFFFD6). The fix tracks the `l` length
+ * modifier and reads with the right width. Other arches passed by
+ * accident (their ABIs / va_list lowerings extend int slots), but
+ * the fix is correct everywhere. */
+extern int atoi (char const *s);
+extern int printf (char const *fmt, ...);
+
+int
+main (void)
+{
+ printf ("%d\n", atoi ("-42")); /* -42 */
+ printf ("%d\n", -2147483647 - 1); /* INT_MIN: -2147483648 */
+ printf ("%d\n", 2147483647); /* INT_MAX: 2147483647 */
+ printf ("%u\n", (unsigned int)-1); /* 4294967295 */
+ printf ("%x\n", (unsigned int)-1); /* ffffffff */
+ printf ("%c\n", 'Z'); /* Z */
+
+ /* %ld must still read a full long. Pass an intentionally wide
+ value so a buggy "always int" fix would fail this case. */
+ long big = 4294967296L + 7L; /* 0x100000007 */
+ printf ("%ld\n", big); /* 4294967303 */
+ return 0;
+}
diff --git a/tests/cc-libc/18-printf-int-promo.expected b/tests/cc-libc/18-printf-int-promo.expected
@@ -0,0 +1,7 @@
+-42
+-2147483648
+2147483647
+4294967295
+ffffffff
+Z
+4294967303
diff --git a/vendor/mes-libc/patches/printf-int-promo.after b/vendor/mes-libc/patches/printf-int-promo.after
@@ -0,0 +1,43 @@
+ /* boot2: track the `l` length modifier so %d/%c read the
+ correct width. amd64 SysV stores int varargs as a 32-bit
+ write into an 8-byte reg-save slot — upper bits unspecified.
+ Reading via va_arg(ap, long) for plain %d / %c picks up
+ garbage. long==int64 on all our LP64 targets, so `ll` and
+ `l` collapse to one case. */
+ int long_p = 0;
+ if (c == 'l')
+ {
+ long_p = 1;
+ c = *++p;
+ }
+ if (c == 'l')
+ c = *++p;
+ switch (c)
+ {
+ case '%':
+ {
+ fputc (*p, f);
+ count++;
+ break;
+ }
+ case 'c':
+ {
+ char _c;
+ _c = va_arg (ap, int);
+ fputc (_c, f);
+ break;
+ }
+ case 'd':
+ case 'i':
+ case 'o':
+ case 'u':
+ case 'x':
+ case 'X':
+ {
+ long d;
+ if (long_p)
+ d = va_arg (ap, long);
+ else if (c == 'd' || c == 'i' || c == 'o')
+ d = va_arg (ap, int);
+ else
+ d = va_arg (ap, unsigned int);
diff --git a/vendor/mes-libc/patches/printf-int-promo.before b/vendor/mes-libc/patches/printf-int-promo.before
@@ -0,0 +1,30 @@
+ if (c == 'l')
+ c = *++p;
+ if (c == 'l')
+ {
+ eputs ("vfprintf: skipping second: l\n");
+ c = *++p;
+ }
+ switch (c)
+ {
+ case '%':
+ {
+ fputc (*p, f);
+ count++;
+ break;
+ }
+ case 'c':
+ {
+ char _c;
+ _c = va_arg (ap, long);
+ fputc (_c, f);
+ break;
+ }
+ case 'd':
+ case 'i':
+ case 'o':
+ case 'u':
+ case 'x':
+ case 'X':
+ {
+ long d = va_arg (ap, long);
diff --git a/vendor/mes-libc/patches/vsnprintf-int-promo.after b/vendor/mes-libc/patches/vsnprintf-int-promo.after
@@ -0,0 +1,43 @@
+ /* boot2: track the `l` length modifier so %d / %c read the
+ correct width. See printf-int-promo.after for rationale. */
+ int long_p = 0;
+ if (c == 'l')
+ {
+ long_p = 1;
+ c = *++p;
+ }
+ if (c == 'l')
+ c = *++p;
+ if (c == 'l')
+ c = *++p;
+ switch (c)
+ {
+ case '%':
+ {
+ if (count < size)
+ *str++ = *p;
+ count++;
+ break;
+ }
+ case 'c':
+ {
+ c = va_arg (ap, int);
+ if (count < size)
+ *str++ = c;
+ count++;
+ break;
+ }
+ case 'd':
+ case 'i':
+ case 'o':
+ case 'u':
+ case 'x':
+ case 'X':
+ {
+ long d;
+ if (long_p)
+ d = va_arg (ap, long);
+ else if (c == 'd' || c == 'i' || c == 'o')
+ d = va_arg (ap, int);
+ else
+ d = va_arg (ap, unsigned int);
diff --git a/vendor/mes-libc/patches/vsnprintf-int-promo.before b/vendor/mes-libc/patches/vsnprintf-int-promo.before
@@ -0,0 +1,34 @@
+ if (c == 'l')
+ c = *++p;
+ if (c == 'l')
+ c = *++p;
+ if (c == 'l')
+ {
+ eputs ("vsnprintf: skipping second: l\n");
+ c = *++p;
+ }
+ switch (c)
+ {
+ case '%':
+ {
+ if (count < size)
+ *str++ = *p;
+ count++;
+ break;
+ }
+ case 'c':
+ {
+ c = va_arg (ap, long);
+ if (count < size)
+ *str++ = c;
+ count++;
+ break;
+ }
+ case 'd':
+ case 'i':
+ case 'o':
+ case 'u':
+ case 'x':
+ case 'X':
+ {
+ long d = va_arg (ap, long);