commit c612ee4b1ae6980f51ef14c0a4d990c6580c59aa
parent 97c83d00c094c070d8dae05e5a06ec7cd3968dcc
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Wed, 6 May 2026 12:39:55 -0700
AT.1 + AT.4: inline mes-libc abtol fix; arch.h primitive set + macros
AT.1: widen mes-libc abtol's accumulator to `long long` directly in
vendor/mes-libc/mes/abtol.c (the existing libc-flatten patch already
did this as `long`; inlining drops the patch and the libc-flatten
stanza). Revert the .long lo,hi pair workaround for boot_gdt64 in
seed-kernel/arch/amd64/kernel.S now that 64-bit `.quad` literals parse
correctly through tcc3.
AT.4: rewrite seed-kernel/arch/{amd64,riscv64}/arch.h to match
aarch64's pattern — small primitive set in kernel.S, arch_*() API
synthesized as macros. New primitives: cpu_pause(kind),
amd64_fence(kind), amd64_read_cr2() / riscv_fence(kind),
riscv_read_stval(). Drops 11 amd64 and 9 riscv64 dedicated extern
functions in favor of macro composition. Pure-C macros for
arch_read_user_sp / arch_write_user_sp / arch_mmio_ptr (the AT.3
side-wins) on both amd64 and riscv64.
Validated: aarch64 podman boot6 byte-identical after re-flatten,
confirming the `int → long long` swap is a no-op on LP64 hosts.
amd64/riscv64 byte output changes intentionally (workarounds gone).
Diffstat:
8 files changed, 111 insertions(+), 127 deletions(-)
diff --git a/scripts/libc-flatten.sh b/scripts/libc-flatten.sh
@@ -158,17 +158,6 @@ apply_simple_patch \
"$STAGE/stdio/vsnprintf.c" \
"$PATCHES/vsnprintf-int-promo.before" \
"$PATCHES/vsnprintf-int-promo.after"
-# abtol uses an `int` accumulator, which overflows for values that don't
-# fit in 32-bit signed (e.g. 0x80200000 — riscv64 OpenSBI kernel base).
-# strtol/strtoul/strtoull all bottom out here, so the overflow propagates
-# through everywhere mes-libc's number parsers are called. Concretely,
-# tcc3 (linked against mes-libc) mishandles `-Wl,-Ttext=0x80200000` and
-# emits an ELF with sign-extended vaddr=0xffffffff80200000 that QEMU
-# rejects. Switching the accumulator to `long` fixes the parse.
-apply_simple_patch \
- "$STAGE/mes/abtol.c" \
- "$PATCHES/abtol-long-accumulator.before" \
- "$PATCHES/abtol-long-accumulator.after"
# --- (3) flatten via host preprocessor --------------------------------
HOST_CC=${HOST_CC:-cc}
diff --git a/seed-kernel/arch/amd64/arch.h b/seed-kernel/arch/amd64/arch.h
@@ -61,23 +61,31 @@ struct trapframe {
#define ARCH_TF_PC(tf) ((tf)->pc)
#define ARCH_IS_SYSCALL(cause) ((cause) == 0)
+enum { BAR_WMB, BAR_RMB };
+enum { PAUSE_PAUSE, PAUSE_HLT };
+
+extern u64 saved_user_sp;
+extern void cpu_pause(int kind);
+extern void amd64_fence(int kind);
+extern u64 amd64_read_cr2(void);
extern void arch_setup_mmu(void);
extern void arch_swap_user_pool(int which);
-extern u64 arch_read_user_sp(void);
-extern void arch_write_user_sp(u64 v);
-extern u64 arch_fault_addr(void);
-extern void arch_pause(void);
-extern void arch_idle_forever(void);
-extern volatile u8 *arch_mmio_ptr(u64 pa);
-extern void arch_wmb(void);
-extern void arch_rmb(void);
-extern void arch_icache_sync(void);
-extern void arch_icache_context_sync(void);
extern void arch_system_off(void);
extern void eret_to_user(u64 entry, u64 sp);
extern void amd64_outb(u16 port, u8 val);
extern u8 amd64_inb(u16 port);
+#define arch_read_user_sp() (saved_user_sp)
+#define arch_write_user_sp(v) (saved_user_sp = (v))
+#define arch_fault_addr() amd64_read_cr2()
+#define arch_pause() cpu_pause(PAUSE_PAUSE)
+#define arch_idle_forever() do { for (;;) cpu_pause(PAUSE_HLT); } while (0)
+#define arch_mmio_ptr(pa) ((volatile u8 *)(ARCH_DEVICE_ALIAS_BASE + (u64)(pa)))
+#define arch_wmb() amd64_fence(BAR_WMB)
+#define arch_rmb() amd64_fence(BAR_RMB)
+#define arch_icache_sync() do {} while (0)
+#define arch_icache_context_sync() do {} while (0)
+
static inline void arch_clear_to_user_entry(struct trapframe *tf, u64 entry) {
for (int i = 0; i < ARCH_TRAPFRAME_NREGS; i++) tf->x[i] = 0;
tf->pc = entry;
diff --git a/seed-kernel/arch/amd64/kernel.S b/seed-kernel/arch/amd64/kernel.S
@@ -275,54 +275,40 @@ eret_to_user:
xorq %r15, %r15
.byte 0x48,0xcf /* iretq */
-.globl arch_read_user_sp
-arch_read_user_sp:
- movq saved_user_sp(%rip), %rax
- ret
-
-.globl arch_write_user_sp
-arch_write_user_sp:
- movq %rdi, saved_user_sp(%rip)
- ret
+/* C-callable thunks. arch.h synthesizes the arch_*() API on top of these
+ * (mirrors aarch64's sysreg_read/arm64_barrier/cpu_pause primitive set). */
-.globl arch_fault_addr
-arch_fault_addr:
- movq %cr2, %rax
- ret
+/* PAUSE_* — matches arch.h enum. */
+#define PAUSE_PAUSE 0
+#define PAUSE_HLT 1
-.globl arch_pause
-arch_pause:
+.globl cpu_pause
+cpu_pause:
+ cmpl $PAUSE_HLT, %edi
+ je .Lp_hlt
pause
ret
-
-.globl arch_idle_forever
-arch_idle_forever:
-1:
+.Lp_hlt:
hlt
- jmp 1b
-
-.globl arch_mmio_ptr
-arch_mmio_ptr:
- movq $0xffff800000000000, %rax
- addq %rdi, %rax
ret
-.globl arch_wmb
-arch_wmb:
+/* BAR_* — matches arch.h enum. */
+#define BAR_WMB 0
+#define BAR_RMB 1
+
+.globl amd64_fence
+amd64_fence:
+ cmpl $BAR_RMB, %edi
+ je .Lf_rmb
sfence
ret
-
-.globl arch_rmb
-arch_rmb:
+.Lf_rmb:
lfence
ret
-.globl arch_icache_sync
-arch_icache_sync:
- ret
-
-.globl arch_icache_context_sync
-arch_icache_context_sync:
+.globl amd64_read_cr2
+amd64_read_cr2:
+ movq %cr2, %rax
ret
.globl arch_system_off
@@ -405,12 +391,10 @@ amd64_serial_init:
.align 8
boot_gdt64:
/* null, 64-bit code (P=1,DPL=0,S=1,type=A; G=1,L=1; limit=0xfffff),
- * 64-bit data (P=1,DPL=0,S=1,type=2; G=1,limit=0xfffff). Encoded as
- * pairs of .long because tcc 0.9.26's assembler truncates a single
- * `.quad` literal to 32 bits when the high half is non-zero. */
- .long 0, 0
- .long 0x0000ffff, 0x00af9a00
- .long 0x0000ffff, 0x00af9200
+ * 64-bit data (P=1,DPL=0,S=1,type=2; G=1,limit=0xfffff). */
+ .quad 0
+ .quad 0x00af9a000000ffff
+ .quad 0x00af92000000ffff
boot_gdt64_ptr:
.word boot_gdt64_ptr - boot_gdt64 - 1
.long boot_gdt64
diff --git a/seed-kernel/arch/riscv64/arch.h b/seed-kernel/arch/riscv64/arch.h
@@ -45,21 +45,31 @@ struct trapframe {
#define ARCH_TF_PC(tf) ((tf)->pc)
#define ARCH_IS_SYSCALL(cause) ((cause) == 8)
+enum { BAR_WMB, BAR_RMB, BAR_ICACHE, BAR_ICACHE_CTX };
+enum { PAUSE_NOP, PAUSE_WFI };
+
+extern u64 saved_user_sp;
+extern void cpu_pause(int kind);
+extern void riscv_fence(int kind);
+extern u64 riscv_read_stval(void);
+extern void riscv_write_satp(u64 v);
+extern void riscv_set_sum(void);
extern void arch_setup_mmu(void);
extern void arch_swap_user_pool(int which);
-extern u64 arch_read_user_sp(void);
-extern void arch_write_user_sp(u64 v);
-extern u64 arch_fault_addr(void);
-extern void arch_pause(void);
-extern void arch_idle_forever(void);
-extern volatile u8 *arch_mmio_ptr(u64 pa);
-extern void arch_wmb(void);
-extern void arch_rmb(void);
-extern void arch_icache_sync(void);
-extern void arch_icache_context_sync(void);
extern void arch_system_off(void);
extern void eret_to_user(u64 entry, u64 sp);
+#define arch_read_user_sp() (saved_user_sp)
+#define arch_write_user_sp(v) (saved_user_sp = (v))
+#define arch_fault_addr() riscv_read_stval()
+#define arch_pause() cpu_pause(PAUSE_NOP)
+#define arch_idle_forever() do { for (;;) cpu_pause(PAUSE_WFI); } while (0)
+#define arch_mmio_ptr(pa) ((volatile u8 *)(ARCH_DEVICE_ALIAS_BASE + (u64)(pa)))
+#define arch_wmb() riscv_fence(BAR_WMB)
+#define arch_rmb() riscv_fence(BAR_RMB)
+#define arch_icache_sync() riscv_fence(BAR_ICACHE)
+#define arch_icache_context_sync() riscv_fence(BAR_ICACHE_CTX)
+
static inline void arch_clear_to_user_entry(struct trapframe *tf, u64 entry) {
for (int i = 0; i < ARCH_TRAPFRAME_NREGS; i++) tf->x[i] = 0;
tf->pc = entry;
diff --git a/seed-kernel/arch/riscv64/kernel.S b/seed-kernel/arch/riscv64/kernel.S
@@ -238,64 +238,64 @@ eret_to_user:
li t6, 0
SRET
-.globl arch_read_user_sp
-arch_read_user_sp:
- LA(t0, saved_user_sp)
- LD(a0, t0, 0)
- RET
-
-.globl arch_write_user_sp
-arch_write_user_sp:
- LA(t0, saved_user_sp)
- SD(a0, t0, 0)
- RET
-
-.globl arch_fault_addr
-arch_fault_addr:
- CSRR_A0_STVAL
- RET
+/* C-callable thunks. arch.h synthesizes the arch_*() API on top of these
+ * (mirrors aarch64's sysreg_read/arm64_barrier/cpu_pause primitive set). */
-.globl arch_pause
-arch_pause:
+/* PAUSE_* — matches arch.h enum. */
+.globl cpu_pause
+cpu_pause:
+ li t0, 1 /* PAUSE_WFI */
+#ifdef __TINYC__
+ beq a0, t0, 12
+#else
+ beq a0, t0, 1f
+#endif
NOP
RET
-
-.globl arch_idle_forever
-arch_idle_forever:
1:
wfi
- J(1b)
-
-.globl arch_mmio_ptr
-arch_mmio_ptr:
- /* Device alias offset = ARCH_DEVICE_ALIAS_BASE = 1 << 33.
- * Must match the L2 slot picked in arch/riscv64/mmu.c. */
- li t0, 1
- slli t0, t0, 33
- add a0, a0, t0
RET
-.globl arch_wmb
-arch_wmb:
+/* BAR_* — matches arch.h enum. */
+.globl riscv_fence
+riscv_fence:
+ li t0, 1 /* BAR_RMB */
+#ifdef __TINYC__
+ beq a0, t0, 28
+#else
+ beq a0, t0, 1f
+#endif
+ li t0, 2 /* BAR_ICACHE */
+#ifdef __TINYC__
+ beq a0, t0, 28
+#else
+ beq a0, t0, 2f
+#endif
+ li t0, 3 /* BAR_ICACHE_CTX */
+#ifdef __TINYC__
+ beq a0, t0, 28
+#else
+ beq a0, t0, 3f
+#endif
+ /* default: BAR_WMB */
FENCE_W_W
RET
-
-.globl arch_rmb
-arch_rmb:
+1:
FENCE_R_R
RET
-
-.globl arch_icache_sync
-arch_icache_sync:
+2:
FENCE_I
RET
-
-.globl arch_icache_context_sync
-arch_icache_context_sync:
+3:
SFENCE_VMA
FENCE_I
RET
+.globl riscv_read_stval
+riscv_read_stval:
+ CSRR_A0_STVAL
+ RET
+
.globl riscv_write_satp
riscv_write_satp:
CSRW_SATP_A0
diff --git a/vendor/mes-libc/mes/abtol.c b/vendor/mes-libc/mes/abtol.c
@@ -26,7 +26,12 @@ long
abtol (char const **p, int base)
{
char const *s = p[0];
- int i = 0;
+ /* `long long` (≥64-bit) accumulator so values that don't fit in 32-bit
+ * signed don't sign-extend through the parse. Affects every mes-libc
+ * number parser (strtol/strtoul/strtoull, vfprintf field widths, …).
+ * Without it, tcc3 mishandles `-Wl,-Ttext=0x80200000` (riscv64 OpenSBI
+ * kernel base) and `.quad 0x00af9a000000ffff` in amd64 kernel.S. */
+ long long i = 0;
int sign_p = 0;
int m = '0';
if (base == 0)
diff --git a/vendor/mes-libc/patches/abtol-long-accumulator.after b/vendor/mes-libc/patches/abtol-long-accumulator.after
@@ -1,9 +0,0 @@
- char const *s = p[0];
- /* Use a `long` accumulator so values that don't fit in 32-bit signed
- * (e.g. 0x80200000 — riscv64's OpenSBI kernel base) don't overflow
- * to a sign-extended negative. Affects strtol/strtoul/strtoull,
- * which all bottom out in this routine. Without this, tcc3 mishandles
- * `-Wl,-Ttext=0x80200000` and emits an ELF with vaddr=0xffffffff80200000.
- */
- long i = 0;
- int sign_p = 0;
diff --git a/vendor/mes-libc/patches/abtol-long-accumulator.before b/vendor/mes-libc/patches/abtol-long-accumulator.before
@@ -1,3 +0,0 @@
- char const *s = p[0];
- int i = 0;
- int sign_p = 0;