commit 4a1e722026a5a30bd68d2975cb39e93833df4867
parent e9e24687d1c5cced306a34171c12c5965af24126
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 5 May 2026 23:02:57 -0700
Refactor seed-kernel arch layout
Diffstat:
10 files changed, 446 insertions(+), 279 deletions(-)
diff --git a/scripts/boot6-gen-runscm.sh b/scripts/boot6-gen-runscm.sh
@@ -64,9 +64,13 @@ cat > "$OUT" <<EOF
"kernel.S -> kernel-asm.o")
(write-string stdout "boot6: tcc3 -c kernel.c\n")
-(must (run "in/tcc3" $KCFLAGS "-c" "-o" "out/kernel.o" "in/kernel.c")
+(must (run "in/tcc3" $KCFLAGS "-Iin" "-c" "-o" "out/kernel.o" "in/kernel.c")
"kernel.c -> kernel.o")
+(write-string stdout "boot6: tcc3 -c mmu.c\n")
+(must (run "in/tcc3" $KCFLAGS "-Iin" "-c" "-o" "out/mmu.o" "in/mmu.c")
+ "mmu.c -> mmu.o")
+
(write-string stdout "boot6: tcc3 -c mem.c\n")
(must (run "in/tcc3" $KCFLAGS "-c" "-o" "out/mem.o" "in/mem.c")
"mem.c -> mem.o")
@@ -76,7 +80,7 @@ cat > "$OUT" <<EOF
"-Wl,-Ttext=0x40080000"
"-Wl,--oformat=binary"
"-o" "out/Image"
- "out/kernel-asm.o" "out/kernel.o" "out/mem.o")
+ "out/kernel-asm.o" "out/kernel.o" "out/mmu.o" "out/mem.o")
"link Image")
(write-string stdout "boot6: ALL-OK\n")
diff --git a/scripts/boot6.sh b/scripts/boot6.sh
@@ -13,12 +13,15 @@
## build/$ARCH/boot4/tcc3 — boot4's verified self-host tcc
## (compiler + linker)
## build/$ARCH/boot2/scheme1 — driver runtime
-## seed-kernel/kernel.S — boot stub, vector table, asm thunks,
+## seed-kernel/arch/aarch64/kernel.S
+## — boot stub, vector table, asm thunks,
## trailing 64 KB stack reserved as
## plain `.bss` (kstack_top is the end
## label of that reservation)
## seed-kernel/kernel.c — DTB parse, MMU bring-up, syscalls,
## virtio-blk, tmpfs, ELF loader
+## seed-kernel/arch/aarch64/mmu.c
+## — arm64 page-table setup and pool swap
## tcc-cc/mem.c — memcpy/memset/memmove/memcmp
##
## ─── Tools ────────────────────────────────────────────────────────────
@@ -62,7 +65,7 @@ STAGE=build/$ARCH/.boot6-stage
# ── prerequisites ─────────────────────────────────────────────────────
[ -x "$BOOT4/tcc3" ] || { echo "[boot6 $ARCH] missing $BOOT4/tcc3 (run scripts/boot4.sh $ARCH)" >&2; exit 1; }
[ -x "$BOOT2/scheme1" ] || { echo "[boot6 $ARCH] missing $BOOT2/scheme1 (run scripts/boot2.sh $ARCH)" >&2; exit 1; }
-for f in seed-kernel/kernel.S seed-kernel/kernel.c tcc-cc/mem.c; do
+for f in seed-kernel/arch/aarch64/kernel.S seed-kernel/arch/aarch64/mmu.c seed-kernel/arch/aarch64/arch.h seed-kernel/kernel.c tcc-cc/mem.c; do
[ -f "$f" ] || { echo "[boot6 $ARCH] missing $f" >&2; exit 1; }
done
@@ -93,8 +96,10 @@ runscm_prelude scheme1/prelude.scm
runscm_runscm "$RUNSCM"
runscm_input tcc3 "$BOOT4/tcc3"
-runscm_input kernel.S seed-kernel/kernel.S
+runscm_input kernel.S seed-kernel/arch/aarch64/kernel.S
runscm_input kernel.c seed-kernel/kernel.c
+runscm_input arch.h seed-kernel/arch/aarch64/arch.h
+runscm_input mmu.c seed-kernel/arch/aarch64/mmu.c
runscm_input mem.c tcc-cc/mem.c
runscm_export Image
diff --git a/seed-kernel/Makefile b/seed-kernel/Makefile
@@ -1,12 +1,12 @@
-# seed-kernel — minimal arm64 OS that satisfies docs/OS.md Tier 1.
-#
-# Build runs inside boot2-alpine-gcc:aarch64 (already arm64-native), so
-# everything compiles with the host toolchain — no cross prefixes.
+# seed-kernel — minimal OS that satisfies docs/OS.md Tier 1/2.
-CC := gcc
-LD := ld
+ARCH ?= aarch64
+CC ?= gcc
+LD ?= ld
+OBJCOPY ?= objcopy
OUT := build
-KOBJS := $(OUT)/kasm.o $(OUT)/kernel.o $(OUT)/mem.o
+ARCHDIR := arch/$(ARCH)
+KOBJS := $(OUT)/kasm.o $(OUT)/kernel.o $(OUT)/mmu.o $(OUT)/mem.o
KIMAGE := $(OUT)/kernel.elf
KBIN := $(OUT)/Image
USER := $(OUT)/init
@@ -24,7 +24,14 @@ OUT_IMG_SIZE := 268435456
CFLAGS_COMMON := -nostdlib -ffreestanding -fno-stack-protector \
-fno-pic -static -Wall -Wextra -O2 -mcmodel=large \
-fno-asynchronous-unwind-tables -fno-unwind-tables
-KCFLAGS := $(CFLAGS_COMMON) -mgeneral-regs-only
+KCFLAGS := $(CFLAGS_COMMON) -I$(ARCHDIR)
+
+ifeq ($(ARCH),aarch64)
+KCFLAGS += -mgeneral-regs-only
+USER_ARCH_CFLAGS := -mgeneral-regs-only
+else
+$(error seed-kernel backend '$(ARCH)' is staged but not boot-wired yet; use ARCH=aarch64)
+endif
.PHONY: all clean kernel user initramfs
all: $(KBIN) $(INITRAMFS) $(INITRAMFS_FORK) $(IN_IMG) $(IN_IMG_FORK)
@@ -32,10 +39,13 @@ all: $(KBIN) $(INITRAMFS) $(INITRAMFS_FORK) $(IN_IMG) $(IN_IMG_FORK)
$(OUT):
mkdir -p $(OUT)
-$(OUT)/kasm.o: kernel.S | $(OUT)
+$(OUT)/kasm.o: $(ARCHDIR)/kernel.S | $(OUT)
+ $(CC) $(KCFLAGS) -c -o $@ $<
+
+$(OUT)/kernel.o: kernel.c $(ARCHDIR)/arch.h | $(OUT)
$(CC) $(KCFLAGS) -c -o $@ $<
-$(OUT)/kernel.o: kernel.c | $(OUT)
+$(OUT)/mmu.o: $(ARCHDIR)/mmu.c $(ARCHDIR)/arch.h | $(OUT)
$(CC) $(KCFLAGS) -c -o $@ $<
# Shared mem helpers (memcpy/memset/memmove/memcmp). Lives in
@@ -45,21 +55,21 @@ $(OUT)/kernel.o: kernel.c | $(OUT)
$(OUT)/mem.o: ../tcc-cc/mem.c | $(OUT)
$(CC) $(KCFLAGS) -c -o $@ $<
-$(KIMAGE): $(KOBJS) kernel.lds
- $(LD) -nostdlib -static -T kernel.lds -o $@ $(KOBJS)
+$(KIMAGE): $(KOBJS) $(ARCHDIR)/kernel.lds
+ $(LD) -nostdlib -static -T $(ARCHDIR)/kernel.lds -o $@ $(KOBJS)
# Strip ELF down to a flat binary that QEMU's -kernel can load.
$(KBIN): $(KIMAGE)
- objcopy -O binary $< $@
+ $(OBJCOPY) -O binary $< $@
$(USER): user/hello.c user/user.lds | $(OUT)
- $(CC) $(CFLAGS_COMMON) -mgeneral-regs-only -T user/user.lds -o $@ $<
+ $(CC) $(CFLAGS_COMMON) $(USER_ARCH_CFLAGS) -T user/user.lds -o $@ $<
$(USER_FORK): user/forktest.c user/user.lds | $(OUT)
- $(CC) $(CFLAGS_COMMON) -mgeneral-regs-only -T user/user.lds -o $@ $<
+ $(CC) $(CFLAGS_COMMON) $(USER_ARCH_CFLAGS) -T user/user.lds -o $@ $<
$(USER_CHILD): user/child.c user/user.lds | $(OUT)
- $(CC) $(CFLAGS_COMMON) -mgeneral-regs-only -T user/user.lds -o $@ $<
+ $(CC) $(CFLAGS_COMMON) $(USER_ARCH_CFLAGS) -T user/user.lds -o $@ $<
$(INITRAMFS): $(USER)
cd $(OUT) && printf 'init\n' | cpio -o -H newc > initramfs.cpio
diff --git a/seed-kernel/arch/aarch64/arch.h b/seed-kernel/arch/aarch64/arch.h
@@ -0,0 +1,86 @@
+/* aarch64 seed-kernel backend contract. */
+#ifndef SEED_ARCH_H
+#define SEED_ARCH_H
+
+#define ARCH_NAME "aarch64"
+#define ARCH_ELF_MACHINE 0xb7
+#define ARCH_ELF_MACHINE_NAME "aarch64"
+
+#define ARCH_DEVICE_ALIAS_BASE 0x100000000UL
+#define ARCH_UART0_PA 0x09000000UL
+#define ARCH_KERNEL_HEAP_END 0x4b000000UL
+
+#define ARCH_USER_POOL_A_PA 0x4c000000UL
+#define ARCH_USER_POOL_B_PA 0x7c000000UL
+#define ARCH_USER_POOL_SIZE 0x30000000UL
+#define ARCH_USER_VA_LO 0x00200000UL
+#define ARCH_USER_VA_HI 0x30200000UL
+#define ARCH_USER_POOL_FIRST_SLOT 1
+#define ARCH_USER_POOL_LAST_SLOT 384
+
+#define ARCH_SYS_unlinkat 35
+#define ARCH_SYS_openat 56
+#define ARCH_SYS_close 57
+#define ARCH_SYS_lseek 62
+#define ARCH_SYS_read 63
+#define ARCH_SYS_write 64
+#define ARCH_SYS_exit_group 93
+#define ARCH_SYS_waitid 95
+#define ARCH_SYS_brk 214
+#define ARCH_SYS_spawn 1024
+
+#define ARCH_TRAPFRAME_NREGS 31
+struct trapframe {
+ u64 x[ARCH_TRAPFRAME_NREGS];
+ u64 elr;
+ u64 spsr;
+};
+
+#define ARCH_SYSCALL_ARG(tf, i) ((tf)->x[(i)])
+#define ARCH_SYSCALL_NR(tf) ((tf)->x[8])
+#define ARCH_SET_RET(tf, v) ((tf)->x[0] = (u64)(v))
+#define ARCH_SET_PC(tf, v) ((tf)->elr = (u64)(v))
+#define ARCH_TF_PC(tf) ((tf)->elr)
+#define ARCH_IS_SYSCALL(cause) ((((u32)(((cause) >> 26) & 0x3f))) == 0x15)
+
+enum { SR_MAIR_EL1, SR_TCR_EL1, SR_TTBR0_EL1, SR_SCTLR_EL1,
+ SR_CPACR_EL1, SR_SP_EL0, SR_FAR_EL1 };
+enum { BAR_DSB_SY, BAR_DSB_ISH, BAR_DMB_ISH, BAR_DMB_ISHST, BAR_ISB };
+enum { PAUSE_WFE, PAUSE_WFI, PAUSE_YIELD };
+
+extern u64 sysreg_read(int id);
+extern void sysreg_write(int id, u64 v);
+extern void arm64_barrier(int kind);
+extern void arm64_ic_iallu(void);
+extern void arm64_tlbi_vmalle1(void);
+extern void cpu_pause(int kind);
+extern u64 arm64_psci_call(int conduit, u64 fnid);
+extern void eret_to_user(u64 entry, u64 sp);
+extern void arch_setup_mmu(void);
+extern void arch_swap_user_pool(int which);
+
+#define arch_read_user_sp() sysreg_read(SR_SP_EL0)
+#define arch_write_user_sp(v) sysreg_write(SR_SP_EL0, (v))
+#define arch_fault_addr() sysreg_read(SR_FAR_EL1)
+#define arch_pause() cpu_pause(PAUSE_WFE)
+#define arch_idle_forever() do { for (;;) cpu_pause(PAUSE_WFI); } while (0)
+#define arch_mmio_ptr(pa) ((volatile u8 *)(ARCH_DEVICE_ALIAS_BASE + (u64)(pa)))
+#define arch_wmb() arm64_barrier(BAR_DMB_ISHST)
+#define arch_rmb() arm64_barrier(BAR_DMB_ISH)
+#define arch_icache_sync() do { arm64_barrier(BAR_DSB_SY); arm64_ic_iallu(); arm64_barrier(BAR_DSB_SY); arm64_barrier(BAR_ISB); } while (0)
+#define arch_icache_context_sync() do { arm64_ic_iallu(); arm64_barrier(BAR_DSB_ISH); arm64_barrier(BAR_ISB); } while (0)
+#define arch_system_off() do { arm64_psci_call(0, 0x84000008); arm64_psci_call(1, 0x84000008); } while (0)
+
+static inline void arch_clear_to_user_entry(struct trapframe *tf, u64 entry) {
+ for (int i = 0; i < ARCH_TRAPFRAME_NREGS; i++) tf->x[i] = 0;
+ tf->elr = entry;
+}
+
+static inline void arch_console_putc(char c) {
+ volatile u32 *dr = (volatile u32 *)(ARCH_DEVICE_ALIAS_BASE + ARCH_UART0_PA + 0x00);
+ volatile u32 *fr = (volatile u32 *)(ARCH_DEVICE_ALIAS_BASE + ARCH_UART0_PA + 0x18);
+ while (*fr & (1u << 5)) { }
+ *dr = (u32)(u8)c;
+}
+
+#endif
diff --git a/seed-kernel/kernel.S b/seed-kernel/arch/aarch64/kernel.S
diff --git a/seed-kernel/kernel.lds b/seed-kernel/arch/aarch64/kernel.lds
diff --git a/seed-kernel/arch/aarch64/mmu.c b/seed-kernel/arch/aarch64/mmu.c
@@ -0,0 +1,81 @@
+/* aarch64 stage-1 MMU setup for QEMU virt. */
+
+typedef unsigned long u64;
+typedef unsigned int u32;
+typedef unsigned char u8;
+
+#include "arch.h"
+
+__attribute__((aligned(4096))) static u64 l1_pt[512];
+__attribute__((aligned(4096))) static u64 l2_user[512];
+
+static u64 pool_pa(int which) {
+ return which ? ARCH_USER_POOL_B_PA : ARCH_USER_POOL_A_PA;
+}
+
+void arch_setup_mmu(void) {
+ /* Block-descriptor attribute bits (block at L1 = bit[1]=0).
+ * V(0)=1, block(1)=0, AttrIdx[4:2]=Attr0(Normal)/Attr1(Device),
+ * NS(5)=0, AP[7:6]=00 (RW EL1 only), SH[9:8]=11 (ISH), AF(10)=1,
+ * nG(11)=0 -> 0x701 (Normal) / 0x705 (Device-nGnRnE).
+ * Block descriptors at L2 use the same bit layout. */
+ u64 normal = 0x701;
+ u64 device = 0x705;
+
+ for (int i = 0; i < 512; i++) l1_pt[i] = 0;
+
+ l2_user[0] = 0;
+ for (int i = ARCH_USER_POOL_FIRST_SLOT; i <= ARCH_USER_POOL_LAST_SLOT; i++) {
+ u64 pa = ARCH_USER_POOL_A_PA + (u64)(i - ARCH_USER_POOL_FIRST_SLOT) * 0x200000UL;
+ l2_user[i] = pa | normal;
+ }
+ for (int i = ARCH_USER_POOL_LAST_SLOT + 1; i < 512; i++) {
+ u64 pa = (u64)i * 0x200000UL;
+ l2_user[i] = pa | device;
+ }
+
+ l1_pt[0] = (u64)l2_user | 0x3UL;
+ l1_pt[1] = 0x40000000UL | normal;
+ l1_pt[2] = 0x80000000UL | normal;
+ l1_pt[3] = 0xc0000000UL | normal;
+ l1_pt[4] = 0x00000000UL | device;
+
+ sysreg_write(SR_MAIR_EL1, 0x00000000000000ffUL);
+
+ u64 tcr = (u64)25 /* T0SZ: 39-bit VA */
+ | ((u64)1 << 8) /* IRGN0 = WBWA */
+ | ((u64)1 << 10) /* ORGN0 = WBWA */
+ | ((u64)3 << 12) /* SH0 = inner shareable */
+ | ((u64)0 << 14) /* TG0 = 4KB */
+ | ((u64)1 << 23) /* EPD1 = disable TTBR1 walks */
+ | ((u64)2 << 32); /* IPS = 40-bit phys */
+ sysreg_write(SR_TCR_EL1, tcr);
+ sysreg_write(SR_TTBR0_EL1, (u64)l1_pt);
+
+ arm64_ic_iallu();
+ arm64_barrier(BAR_DSB_ISH);
+ arm64_tlbi_vmalle1();
+ arm64_barrier(BAR_DSB_ISH);
+ arm64_barrier(BAR_ISB);
+
+ u64 sctlr = sysreg_read(SR_SCTLR_EL1);
+ sctlr &= ~(u64)((1 << 1) | (1 << 19));
+ sctlr |= (u64)((1 << 0) | (1 << 2) | (1 << 12));
+ sysreg_write(SR_SCTLR_EL1, sctlr);
+ arm64_barrier(BAR_ISB);
+
+ sysreg_write(SR_CPACR_EL1, (u64)3 << 20);
+ arm64_barrier(BAR_ISB);
+}
+
+void arch_swap_user_pool(int which) {
+ u64 normal = 0x701;
+ u64 base = pool_pa(which);
+ for (int i = ARCH_USER_POOL_FIRST_SLOT; i <= ARCH_USER_POOL_LAST_SLOT; i++) {
+ l2_user[i] = (base + (u64)(i - ARCH_USER_POOL_FIRST_SLOT) * 0x200000UL) | normal;
+ }
+ arm64_barrier(BAR_DSB_ISH);
+ arm64_tlbi_vmalle1();
+ arm64_barrier(BAR_DSB_ISH);
+ arm64_barrier(BAR_ISB);
+}
diff --git a/seed-kernel/arch/amd64/arch.h b/seed-kernel/arch/amd64/arch.h
@@ -0,0 +1,75 @@
+/* amd64 seed-kernel backend constants.
+ * Low-level boot/trap/MMU and block transport are kept out of common C. */
+#ifndef SEED_ARCH_H
+#define SEED_ARCH_H
+
+#define ARCH_NAME "amd64"
+#define ARCH_ELF_MACHINE 0x3e
+#define ARCH_ELF_MACHINE_NAME "amd64"
+
+#define ARCH_DEVICE_ALIAS_BASE 0xffff800000000000UL
+#define ARCH_UART0_PA 0x000003f8UL
+#define ARCH_KERNEL_HEAP_END 0x04b00000UL
+
+#define ARCH_USER_POOL_A_PA 0x04c00000UL
+#define ARCH_USER_POOL_B_PA 0x34c00000UL
+#define ARCH_USER_POOL_SIZE 0x30000000UL
+#define ARCH_USER_VA_LO 0x00200000UL
+#define ARCH_USER_VA_HI 0x30200000UL
+#define ARCH_USER_POOL_FIRST_SLOT 1
+#define ARCH_USER_POOL_LAST_SLOT 384
+
+#define ARCH_SYS_read 0
+#define ARCH_SYS_write 1
+#define ARCH_SYS_close 3
+#define ARCH_SYS_lseek 8
+#define ARCH_SYS_brk 12
+#define ARCH_SYS_exit_group 60
+#define ARCH_SYS_waitid 247
+#define ARCH_SYS_openat 257
+#define ARCH_SYS_unlinkat 263
+#define ARCH_SYS_spawn 1024
+
+struct trapframe {
+ u64 x[24];
+ u64 pc;
+ u64 flags;
+};
+
+#define ARCH_TRAPFRAME_NREGS 24
+#define ARCH_SYSCALL_ARG(tf, i) ((i) == 0 ? (tf)->x[0] : \
+ (i) == 1 ? (tf)->x[1] : \
+ (i) == 2 ? (tf)->x[2] : \
+ (i) == 3 ? (tf)->x[3] : \
+ (i) == 4 ? (tf)->x[4] : (tf)->x[5])
+#define ARCH_SYSCALL_NR(tf) ((tf)->x[6])
+#define ARCH_SET_RET(tf, v) ((tf)->x[6] = (u64)(v))
+#define ARCH_SET_PC(tf, v) ((tf)->pc = (u64)(v))
+#define ARCH_TF_PC(tf) ((tf)->pc)
+#define ARCH_IS_SYSCALL(cause) ((cause) == 0)
+
+extern void arch_setup_mmu(void);
+extern void arch_swap_user_pool(int which);
+extern u64 arch_read_user_sp(void);
+extern void arch_write_user_sp(u64 v);
+extern u64 arch_fault_addr(void);
+extern void arch_pause(void);
+extern void arch_idle_forever(void);
+extern volatile u8 *arch_mmio_ptr(u64 pa);
+extern void arch_wmb(void);
+extern void arch_rmb(void);
+extern void arch_icache_sync(void);
+extern void arch_icache_context_sync(void);
+extern void arch_system_off(void);
+extern void eret_to_user(u64 entry, u64 sp);
+
+static inline void arch_clear_to_user_entry(struct trapframe *tf, u64 entry) {
+ for (int i = 0; i < ARCH_TRAPFRAME_NREGS; i++) tf->x[i] = 0;
+ tf->pc = entry;
+}
+
+static inline void arch_console_putc(char c) {
+ (void)c;
+}
+
+#endif
diff --git a/seed-kernel/arch/riscv64/arch.h b/seed-kernel/arch/riscv64/arch.h
@@ -0,0 +1,74 @@
+/* riscv64 seed-kernel backend constants.
+ * Low-level boot/trap/MMU files are intentionally isolated under this
+ * directory; common kernel.c only depends on the contract below. */
+#ifndef SEED_ARCH_H
+#define SEED_ARCH_H
+
+#define ARCH_NAME "riscv64"
+#define ARCH_ELF_MACHINE 0xf3
+#define ARCH_ELF_MACHINE_NAME "riscv64"
+
+#define ARCH_DEVICE_ALIAS_BASE 0x100000000UL
+#define ARCH_UART0_PA 0x10000000UL
+#define ARCH_KERNEL_HEAP_END 0x8b000000UL
+
+#define ARCH_USER_POOL_A_PA 0x8c000000UL
+#define ARCH_USER_POOL_B_PA 0xbc000000UL
+#define ARCH_USER_POOL_SIZE 0x30000000UL
+#define ARCH_USER_VA_LO 0x00200000UL
+#define ARCH_USER_VA_HI 0x30200000UL
+#define ARCH_USER_POOL_FIRST_SLOT 1
+#define ARCH_USER_POOL_LAST_SLOT 384
+
+#define ARCH_SYS_unlinkat 35
+#define ARCH_SYS_openat 56
+#define ARCH_SYS_close 57
+#define ARCH_SYS_lseek 62
+#define ARCH_SYS_read 63
+#define ARCH_SYS_write 64
+#define ARCH_SYS_exit_group 93
+#define ARCH_SYS_waitid 95
+#define ARCH_SYS_brk 214
+#define ARCH_SYS_spawn 1024
+
+struct trapframe {
+ u64 x[32];
+ u64 pc;
+ u64 status;
+};
+
+#define ARCH_TRAPFRAME_NREGS 32
+#define ARCH_SYSCALL_ARG(tf, i) ((tf)->x[10 + (i)])
+#define ARCH_SYSCALL_NR(tf) ((tf)->x[17])
+#define ARCH_SET_RET(tf, v) ((tf)->x[10] = (u64)(v))
+#define ARCH_SET_PC(tf, v) ((tf)->pc = (u64)(v))
+#define ARCH_TF_PC(tf) ((tf)->pc)
+#define ARCH_IS_SYSCALL(cause) ((cause) == 8)
+
+extern void arch_setup_mmu(void);
+extern void arch_swap_user_pool(int which);
+extern u64 arch_read_user_sp(void);
+extern void arch_write_user_sp(u64 v);
+extern u64 arch_fault_addr(void);
+extern void arch_pause(void);
+extern void arch_idle_forever(void);
+extern volatile u8 *arch_mmio_ptr(u64 pa);
+extern void arch_wmb(void);
+extern void arch_rmb(void);
+extern void arch_icache_sync(void);
+extern void arch_icache_context_sync(void);
+extern void arch_system_off(void);
+extern void eret_to_user(u64 entry, u64 sp);
+
+static inline void arch_clear_to_user_entry(struct trapframe *tf, u64 entry) {
+ for (int i = 0; i < ARCH_TRAPFRAME_NREGS; i++) tf->x[i] = 0;
+ tf->pc = entry;
+}
+
+static inline void arch_console_putc(char c) {
+ volatile u8 *uart = (volatile u8 *)(ARCH_DEVICE_ALIAS_BASE + ARCH_UART0_PA);
+ while (!(uart[5] & 0x20)) { }
+ uart[0] = (u8)c;
+}
+
+#endif
diff --git a/seed-kernel/kernel.c b/seed-kernel/kernel.c
@@ -1,11 +1,12 @@
/* seed kernel — minimal OS satisfying docs/OS.md Tier 1.
*
- * Boots via Linux arm64 boot protocol (-kernel + two virtio-blk-MMIO
- * disks), parses the DTB to find virtio_mmio nodes + memory, brings up
+ * Boots through an arch backend with two virtio-blk-MMIO disks, parses
+ * the DTB to find virtio_mmio nodes + memory, brings up
* a small polling virtio-blk driver, reads the cpio newc archive from
* blk0 (read-only) into the in-memory tmpfs, loads /init (a static
- * aarch64 ELF), and ERETs into it at EL1t. SVC traps land in trap_sync()
- * and dispatch the eight Tier-1 syscalls. On exit, the tmpfs is
+ * target ELF), and enters it through the arch trap-return path. Syscall
+ * traps land in trap_sync() and dispatch Tier-1/Tier-2 syscalls. On exit,
+ * the tmpfs is
* serialised to blk1 in a small SEEDFS table for the host extractor.
*/
@@ -16,22 +17,12 @@ typedef unsigned long u64;
typedef long i64;
typedef int i32;
-/* ─── PL011 console ─────────────────────────────────────────────────────── */
+#include "arch.h"
-/* The PL011 lives at PA 0x09000000 on QEMU virt. Once the MMU comes up the
- * kernel reaches it through the device alias mapped into VA 4 GB..5 GB
- * (L1[4]). That keeps the entire low 1 GB of VA available as user RAM —
- * device MMIO at user-space VAs would otherwise collide with the boot2
- * chain's BSS, which can run past 256 MB. */
-#define DEVICE_ALIAS_BASE 0x100000000UL
-#define UART0 (DEVICE_ALIAS_BASE + 0x09000000UL)
-#define UART_DR ((volatile u32 *)(UART0 + 0x00))
-#define UART_FR ((volatile u32 *)(UART0 + 0x18))
-#define UART_FR_TXFF (1u << 5)
+/* ─── Console ───────────────────────────────────────────────────────────── */
static void uart_putc(char c) {
- while (*UART_FR & UART_FR_TXFF) { }
- *UART_DR = (u32)(u8)c;
+ arch_console_putc(c);
}
static void uart_puts(const char *s) {
@@ -56,23 +47,7 @@ static void uart_putd(i64 v) {
while (i--) uart_putc(buf[i]);
}
-/* ─── kernel.S thunk API ────────────────────────────────────────────────
- * SR_*, BAR_*, PAUSE_* ids must match the #defines at the bottom of
- * kernel.S in declaration order. */
-enum { SR_MAIR_EL1, SR_TCR_EL1, SR_TTBR0_EL1, SR_SCTLR_EL1,
- SR_CPACR_EL1, SR_SP_EL0, SR_FAR_EL1 };
-enum { BAR_DSB_SY, BAR_DSB_ISH, BAR_DMB_ISH, BAR_DMB_ISHST, BAR_ISB };
-enum { PAUSE_WFE, PAUSE_WFI, PAUSE_YIELD };
-
-extern u64 sysreg_read(int id);
-extern void sysreg_write(int id, u64 v);
-extern void arm64_barrier(int kind);
-extern void arm64_ic_iallu(void);
-extern void arm64_tlbi_vmalle1(void);
-extern void cpu_pause(int kind);
-extern u64 arm64_psci_call(int conduit, u64 fnid);
-
-__attribute__((noreturn)) static void hang(void) { for (;;) cpu_pause(PAUSE_WFE); }
+__attribute__((noreturn)) static void hang(void) { for (;;) arch_pause(); }
/* ─── Tiny libc-ish helpers ─────────────────────────────────────────────── */
@@ -107,126 +82,17 @@ static void mem_set(void *d, int c, u64 n) {
for (u64 i = 0; i < n; i++) dd[i] = (u8)c;
}
-/* ─── MMU bring-up ──────────────────────────────────────────────────────── */
-/* Two-level page table:
- * L1[0] → l2_user table descriptor (VA 0..1 GB, 2 MB blocks)
- * L1[1..3] = Normal 1 GB blocks identity-mapping VA 1..4 GB (RAM + high MMIO)
- * L1[4] = Device 1 GB block at PA 0 (VA 4..5 GB mirrors PA 0..1 GB as
- * Device-nGnRnE — the kernel's only path to UART/GIC/virtio/PCI
- * once we hand the low 1 GB over to user code).
- *
- * The l2_user table carves the low 1 GB into:
- * slot 0 (VA 0..2 MB) invalid — NULL pointer traps
- * slots 1..N (VA 2 MB..USER_VA_HI) Normal user RAM, backed by one
- * of two 768 MB physical pools
- * (USER_POOL_A_PA / USER_POOL_B_PA);
- * sys_spawn swaps which is mapped
- * here without any mem_cpy.
- * N=384 (slots 1..384, 768 MB) gives
- * tcc-boot2's 512 MB BSS plus brk room.
- * slots N+1..511 (VA USER_VA_HI..1G) Device-identity, kept for safety —
- * nothing user-side touches them, and
- * the kernel uses the high alias.
- *
- * With MMU on + Normal memory, unaligned loads/stores work — gcc's auto-
- * vectorised 64-bit load in be64() stops trapping. */
-__attribute__((aligned(4096))) static u64 l1_pt[512];
-__attribute__((aligned(4096))) static u64 l2_user[512];
-
-/* Two physical pools (A, B) backing the user low-VA window. On spawn
- * we swap l2_user[] from one to the other, TLB-invalidate, and load the
- * new ELF into the other pool — no memory copy. The atomic spawn syscall
- * (no userspace gap between fork and exec) means the child never reads
- * the parent's pool, so no snapshot is needed. MAX_PROC_DEPTH=1 means
- * two pools is sufficient (the prelude only forks one level deep).
- *
- * With QEMU -m 2048M (RAM 0x40000000–0xc0000000), the layout is:
- * 0x40000000–0x4c000000 kernel image + kheap (192 MB)
- * 0x4c000000–0x7c000000 user RAM pool A (768 MB)
- * 0x7c000000–0xac000000 user RAM pool B (768 MB)
- * 0xac000000–0xc0000000 spare (320 MB)
- */
-#define USER_POOL_A_PA 0x4c000000UL
-#define USER_POOL_B_PA 0x7c000000UL
-#define USER_POOL_SIZE 0x30000000UL /* 768 MB */
-#define USER_VA_LO 0x00200000UL /* slot 1 — first mapped 2 MB block */
-#define USER_VA_HI 0x30200000UL /* slot 385 — first device-only block */
-#define USER_POOL_FIRST_SLOT 1
-#define USER_POOL_LAST_SLOT 384 /* USER_POOL_SIZE / 2 MB */
+/* User address-space constants are supplied by the arch backend. */
+#define USER_POOL_A_PA ARCH_USER_POOL_A_PA
+#define USER_POOL_B_PA ARCH_USER_POOL_B_PA
+#define USER_POOL_SIZE ARCH_USER_POOL_SIZE
+#define USER_VA_LO ARCH_USER_VA_LO
+#define USER_VA_HI ARCH_USER_VA_HI
+#define USER_POOL_FIRST_SLOT ARCH_USER_POOL_FIRST_SLOT
+#define USER_POOL_LAST_SLOT ARCH_USER_POOL_LAST_SLOT
/* 0 = pool A is currently mapped at user VAs; 1 = pool B. */
static int current_pool = 0;
-static u64 pool_pa(int which) { return which ? USER_POOL_B_PA : USER_POOL_A_PA; }
-
-static void setup_mmu(void) {
- /* Block-descriptor attribute bits (block at L1 = bit[1]=0).
- * V(0)=1, block(1)=0, AttrIdx[4:2]=Attr0(Normal)/Attr1(Device),
- * NS(5)=0, AP[7:6]=00 (RW EL1 only), SH[9:8]=11 (ISH), AF(10)=1,
- * nG(11)=0 → 0x701 (Normal) / 0x705 (Device-nGnRnE).
- * Block descriptors at L2 use the same bit layout. */
- u64 normal = 0x701;
- u64 device = 0x705;
-
- for (int i = 0; i < 512; i++) l1_pt[i] = 0;
-
- /* L2 user table: slot 0 invalid; slots 1..USER_POOL_LAST_SLOT Normal
- * RAM backed by pool A initially; slots above that Device-identity. */
- l2_user[0] = 0;
- for (int i = USER_POOL_FIRST_SLOT; i <= USER_POOL_LAST_SLOT; i++) {
- u64 pa = USER_POOL_A_PA + (u64)(i - USER_POOL_FIRST_SLOT) * 0x200000UL;
- l2_user[i] = pa | normal;
- }
- for (int i = USER_POOL_LAST_SLOT + 1; i < 512; i++) {
- u64 pa = (u64)i * 0x200000UL;
- l2_user[i] = pa | device;
- }
-
- /* L1[0] table descriptor → l2_user. Table-desc encoding at L1 is
- * bits [1:0] = 0b11, bits [47:12] = next-level table PA. */
- l1_pt[0] = (u64)l2_user | 0x3UL;
- l1_pt[1] = 0x40000000UL | normal;
- l1_pt[2] = 0x80000000UL | normal;
- l1_pt[3] = 0xc0000000UL | normal;
- /* L1[4]: Device 1 GB block aliasing PA 0..1 GB into VA 4 GB..5 GB so
- * the kernel can still reach UART/GIC/virtio after we hand the low 1
- * GB over to user mappings. */
- l1_pt[4] = 0x00000000UL | device;
-
- /* MAIR: Attr0 = 0xff (Normal WB-WA), Attr1 = 0x00 (Device-nGnRnE) */
- u64 mair = 0x00000000000000ffUL;
- sysreg_write(SR_MAIR_EL1, mair);
-
- u64 tcr = (u64)25 /* T0SZ: 39-bit VA */
- | ((u64)1 << 8) /* IRGN0 = WBWA */
- | ((u64)1 << 10) /* ORGN0 = WBWA */
- | ((u64)3 << 12) /* SH0 = inner shareable */
- | ((u64)0 << 14) /* TG0 = 4KB */
- | ((u64)1 << 23) /* EPD1 = disable TTBR1 walks */
- | ((u64)2 << 32); /* IPS = 40-bit phys */
- sysreg_write(SR_TCR_EL1, tcr);
- sysreg_write(SR_TTBR0_EL1, (u64)l1_pt);
-
- arm64_ic_iallu();
- arm64_barrier(BAR_DSB_ISH);
- arm64_tlbi_vmalle1();
- arm64_barrier(BAR_DSB_ISH);
- arm64_barrier(BAR_ISB);
-
- u64 sctlr = sysreg_read(SR_SCTLR_EL1);
- sctlr &= ~(u64)((1 << 1) | (1 << 19)); /* clear A (alignment), WXN */
- sctlr |= (u64)((1 << 0) /* M — MMU on */
- | (1 << 2) /* C — D-cache on */
- | (1 << 12)); /* I — I-cache on */
- sysreg_write(SR_SCTLR_EL1, sctlr);
- arm64_barrier(BAR_ISB);
-
- /* CPACR_EL1.FPEN = 0b11: don't trap FP/ASIMD from EL0 or EL1.
- * tcc-built user binaries (notably the self-rebuilt tcc1) emit FP
- * register saves in their start glue; default FPEN=00 traps those
- * to EL1 with EC=0x07. */
- sysreg_write(SR_CPACR_EL1, (u64)3 << 20);
- arm64_barrier(BAR_ISB);
-}
/* ─── Kernel heap (bump allocator) ──────────────────────────────────────── */
@@ -583,17 +449,17 @@ static int blk_request_one(int devi, u32 type, u64 sector, void *buf, u64 nsec)
u16 head = 0;
u16 ai = avail->idx;
avail->ring[ai % VQ_SIZE] = head;
- arm64_barrier(BAR_DMB_ISHST);
+ arch_wmb();
avail->idx = ai + 1;
- arm64_barrier(BAR_DMB_ISHST);
+ arch_wmb();
mmio_w32(d, VIRTIO_MMIO_QUEUE_NOTIFY, 0);
/* Poll used.idx — single in-flight, advances by exactly one. */
while (used->idx == ai) {
- cpu_pause(PAUSE_YIELD);
+ arch_pause();
}
- arm64_barrier(BAR_DMB_ISH);
+ arch_rmb();
/* Acknowledge any pending interrupt status (we don't service IRQs but
* the device sets these bits anyway). */
@@ -645,7 +511,7 @@ static void blk_init(struct dtb_info *dt) {
/* Stage into blk_devs[n_blocks] so blk_init_one's index-derived
* vq page assignment is correct from the start. */
struct blk_dev *d = &blk_devs[n_blocks];
- d->regs = (volatile u8 *)(DEVICE_ALIAS_BASE + pa);
+ d->regs = arch_mmio_ptr(pa);
d->capacity_sectors = 0;
d->present = 0;
int r = blk_init_one(d);
@@ -865,11 +731,11 @@ static u64 load_elf(const u8 *elf) {
eh->e_ident[2] == 'L' && eh->e_ident[3] == 'F')) {
uart_puts("ELF: bad magic\n"); return 0;
}
- if (eh->e_machine != 0xb7) { /* EM_AARCH64 */
- uart_puts("ELF: not aarch64\n"); return 0;
+ if (eh->e_machine != ARCH_ELF_MACHINE) {
+ uart_puts("ELF: not "); uart_puts(ARCH_ELF_MACHINE_NAME); uart_puts("\n"); return 0;
}
- /* p_flags (R/W/X) are deliberately ignored: the L2 user mapping is one
- * giant Normal-memory RWX-at-EL1 region (see setup_mmu). OS.md
+ /* p_flags (R/W/X) are deliberately ignored: the user mapping is one
+ * giant Normal-memory RWX region (see arch_setup_mmu). OS.md
* §"Memory model" permits this — there's no W^X enforcement in the
* contract, and tcc-boot2 never JITs.
*
@@ -901,10 +767,7 @@ static u64 load_elf(const u8 *elf) {
/* Round up to 16 bytes so callers can use it directly as brk_base. */
g_user_image_end = (hi + 15) & ~15UL;
/* I-cache sync (cheap insurance even with caches off). */
- arm64_barrier(BAR_DSB_SY);
- arm64_ic_iallu();
- arm64_barrier(BAR_DSB_SY);
- arm64_barrier(BAR_ISB);
+ arch_icache_sync();
return eh->e_entry;
}
@@ -935,23 +798,22 @@ static u64 brk_max;
#define AT_FDCWD (-100)
-#define SYS_unlinkat 35
-#define SYS_openat 56
-#define SYS_close 57
-#define SYS_lseek 62
-#define SYS_read 63
-#define SYS_write 64
-#define SYS_exit_group 93
-#define SYS_waitid 95
-#define SYS_brk 214
-/* Private syscall number, deliberately outside the Linux aarch64 range
- * (last allocated is 462 = futex_requeue, plus a small reserved tail).
+#define SYS_unlinkat ARCH_SYS_unlinkat
+#define SYS_openat ARCH_SYS_openat
+#define SYS_close ARCH_SYS_close
+#define SYS_lseek ARCH_SYS_lseek
+#define SYS_read ARCH_SYS_read
+#define SYS_write ARCH_SYS_write
+#define SYS_exit_group ARCH_SYS_exit_group
+#define SYS_waitid ARCH_SYS_waitid
+#define SYS_brk ARCH_SYS_brk
+/* Private syscall number, deliberately outside the normal Linux range.
* The scheme1 prelude probes (sys-spawn) once at init: on Linux this
* number is unmapped so the probe gets -ENOSYS and the prelude falls
* back to the classic clone+execve path; on the seed kernel the probe
* succeeds (or returns -ENOENT for a missing file) and the prelude uses
* sys-spawn for every (run …) thereafter. */
-#define SYS_spawn 1024
+#define SYS_spawn ARCH_SYS_spawn
#define ECHILD 10
#define EAGAIN 11
@@ -1065,26 +927,26 @@ static i64 sys_unlinkat(int dirfd, const char *path, int flags) {
*
* sys_spawn → capture path+argv into kernel buffers (still reading from
* the parent's pool); push parent state (regs, brk, fd
- * table, current pool) onto proc_stack; remap l2_user[] to
+ * table, current pool) onto proc_stack; remap user VAs to
* the alternate pool with NO COPY (the child won't read any
* byte of the parent's pool — load_elf overwrites just the
* PT_LOAD ranges and build_user_stack writes the top of the
* user VA window); load_elf into the alternate pool, reset
- * brk, build the user stack, rewrite tf so eret enters the
- * new program at its entry with the new sp_el0.
+ * brk, build the user stack, rewrite tf so trap return
+ * enters the new program at its entry with the new stack.
* sys_exit → if proc_stack non-empty: stash exit code in last_child,
- * swap l2_user[] back to the parent's pool (no copy — the
+ * swap user VAs back to the parent's pool (no copy — the
* parent's pool was never written by the child), restore
- * regs/brk/fds, ic iallu (the user VAs now resolve to
- * different physical pages), set tf so eret resumes the
- * parent's spawn() call with x0 = pid. If proc_stack empty:
- * real exit (dump tmpfs, PSCI off).
+ * regs/brk/fds, cache-sync (the user VAs now resolve to
+ * different physical pages), set tf so trap return resumes
+ * the parent's spawn() call with child pid. If proc_stack
+ * empty: real exit (dump tmpfs, arch shutdown).
* sys_waitid → return last_child's exit code via the siginfo struct.
*
* No actual concurrency. The "parent" is suspended at the moment of spawn
* and resumed only when the child calls exit_group.
*
- * Memory cost per spawn: zero copy. l2_user[] rewrite + TLBI + ic iallu
+ * Memory cost per spawn: zero copy. User-map rewrite + TLB/cache sync
* + load_elf (which copies just the new image's PT_LOAD bytes, typically
* ~1 MB for tcc). This replaces the previous clone/execve design which
* paid one 768 MB mem_cpy per fork to seed the child's pool with parent
@@ -1095,12 +957,6 @@ static i64 sys_unlinkat(int dirfd, const char *path, int flags) {
* syscall closes that window entirely.
*/
-struct trapframe {
- u64 x[31];
- u64 elr;
- u64 spsr;
-};
-
/* Forward decls for state defined further down. boot5's per-source
* compile passes ~25 argv entries / ~750 bytes, but the final
* `tcc -ar rcs libc.a obj1 … obj1263` call passes ~1300 entries totalling
@@ -1116,13 +972,11 @@ static int tokenise(char *src, char **argv, int cap);
struct proc_save {
int active;
u64 child_pid;
- /* Saved trap-frame state — enough to resume the parent at the SVC
- * instruction following its sys_spawn. x[0] is overwritten with
- * child_pid at restore time so the parent sees a non-zero return. */
- u64 regs[31];
- u64 elr;
- u64 spsr;
- u64 sp_el0;
+ /* Saved trap-frame state — enough to resume the parent at the trap
+ * instruction following its sys_spawn. The return register is
+ * overwritten with child_pid at restore time. */
+ struct trapframe tf;
+ u64 user_sp;
/* Per-process state at the moment of spawn. brk_base is saved alongside
* brk_cur because sys_spawn resets it above the new image's end-of-bss;
* the parent's value comes back with the parent's pool. */
@@ -1132,19 +986,9 @@ struct proc_save {
int pool_save; /* parent's user pool (0=A, 1=B) */
};
-/* Rewrite the user-VA L2 entries to point at pool `which`, then flush TLB.
- * The kernel runs from a high-VA alias (L1[1..3] for RAM, L1[4] for MMIO),
- * so the swap doesn't disturb the kernel's own translations. */
+/* Rewrite the user-VA mapping to point at pool `which`, then flush TLB. */
static void swap_user_pool(int which) {
- u64 normal = 0x701;
- u64 base = pool_pa(which);
- for (int i = USER_POOL_FIRST_SLOT; i <= USER_POOL_LAST_SLOT; i++) {
- l2_user[i] = (base + (u64)(i - USER_POOL_FIRST_SLOT) * 0x200000UL) | normal;
- }
- arm64_barrier(BAR_DSB_ISH);
- arm64_tlbi_vmalle1();
- arm64_barrier(BAR_DSB_ISH);
- arm64_barrier(BAR_ISB);
+ arch_swap_user_pool(which);
current_pool = which;
}
@@ -1216,14 +1060,12 @@ static i64 sys_spawn(struct trapframe *tf, const char *path, char **argv) {
/* Save parent state — regs, brk, fd table, which pool the parent ran
* in. After sys_exit_or_resume_parent restores from this frame, the
- * parent's spawn() call returns with x[0] = child_pid. */
+ * parent's spawn() call returns with child_pid. */
struct proc_save *p = &proc_stack[proc_depth];
p->active = 1;
p->child_pid = g_next_pid++;
- for (int i = 0; i < 31; i++) p->regs[i] = tf->x[i];
- p->elr = tf->elr;
- p->spsr = tf->spsr;
- p->sp_el0 = sysreg_read(SR_SP_EL0);
+ p->tf = *tf;
+ p->user_sp = arch_read_user_sp();
p->brk_base_save = brk_base;
p->brk_cur_save = brk_cur;
for (int i = 0; i < MAX_FD; i++) p->fdtab_save[i] = fdtab[i];
@@ -1260,13 +1102,13 @@ static i64 sys_spawn(struct trapframe *tf, const char *path, char **argv) {
* entry with a clean register state and the new stack. The parent's
* regs sit on proc_stack until sys_exit_or_resume_parent restores
* them on child exit. */
- for (int i = 0; i < 31; i++) tf->x[i] = 0;
- tf->elr = entry;
- /* sp_el0 isn't in the trap frame — set it directly; it survives
- * until eret since the kernel uses SP_ELx while in trap_sync. */
- sysreg_write(SR_SP_EL0, new_sp);
- /* Returning 0; dispatcher writes tf->x[0] = 0. The child's _start
- * reads argc/argv from the stack, so x[0] is don't-care. */
+ arch_clear_to_user_entry(tf, entry);
+ /* Some backends keep the user stack pointer outside the saved
+ * trapframe, so set it through the arch hook. */
+ arch_write_user_sp(new_sp);
+ /* Returning 0; dispatcher writes the arch return register. The child's
+ * _start reads argc/argv from the stack, so the return register is
+ * don't-care. */
return 0;
}
@@ -1404,10 +1246,8 @@ static void sys_exit_final(int code) {
g_exited = 1;
dump_tmpfs_blk();
uart_puts("\n[seed] user exit_group("); uart_putd(code); uart_puts(")\n");
- /* Try PSCI SYSTEM_OFF so QEMU exits cleanly; fall back to spin. */
- arm64_psci_call(0 /*HVC*/, 0x84000008);
- arm64_psci_call(1 /*SMC*/, 0x84000008);
- for (;;) cpu_pause(PAUSE_WFI);
+ arch_system_off();
+ arch_idle_forever();
}
/* Dispatcher-side exit_group: pops proc_stack and resumes the parent's
@@ -1429,23 +1269,17 @@ static int sys_exit_or_resume_parent(struct trapframe *tf, int code) {
brk_base = p->brk_base_save;
brk_cur = p->brk_cur_save;
for (int i = 0; i < MAX_FD; i++) fdtab[i] = p->fdtab_save[i];
- /* Restore registers (overwriting x[0] with child_pid, since the
- * dispatcher will write tf->x[0] = (u64)r before eret — we want
- * the parent's sys_spawn to see child_pid as the syscall return). */
- for (int i = 0; i < 31; i++) tf->x[i] = p->regs[i];
- tf->elr = p->elr;
- tf->spsr = p->spsr;
- sysreg_write(SR_SP_EL0, p->sp_el0);
+ /* Restore registers; the dispatcher writes the child pid into the
+ * arch return register below. */
+ *tf = p->tf;
+ arch_write_user_sp(p->user_sp);
/* I-cache invalidation. The parent's pool was never written, so
* its instruction bytes (in DRAM) are byte-identical to what was
* originally fetched. But the same user VAs were just used to
* fetch the child's instructions from the other physical pool;
- * aarch64 I-caches may hold lines tagged by VA whose translation
- * just changed. `ic iallu` invalidates by VA so subsequent fetches
- * miss and re-walk through the freshly-swapped L2. */
- arm64_ic_iallu();
- arm64_barrier(BAR_DSB_ISH);
- arm64_barrier(BAR_ISB);
+ * I-caches may hold lines tagged by VA whose translation just
+ * changed, so the arch backend invalidates whatever is needed. */
+ arch_icache_context_sync();
return (int)p->child_pid; /* >0: tells dispatcher to write this as r */
}
sys_exit_final(code);
@@ -1459,11 +1293,11 @@ void trap_kernel(u64 esr, struct trapframe *tf);
void trap_unhandled(u64 esr, struct trapframe *tf);
i64 trap_sync(u64 esr, struct trapframe *tf) {
- u32 ec = (u32)((esr >> 26) & 0x3f);
- if (ec == 0x15) { /* SVC, AArch64 */
- u64 nr = tf->x[8];
- u64 a0 = tf->x[0], a1 = tf->x[1], a2 = tf->x[2];
- u64 a3 = tf->x[3], a4 = tf->x[4], a5 = tf->x[5];
+ if (ARCH_IS_SYSCALL(esr)) {
+ u64 nr = ARCH_SYSCALL_NR(tf);
+ u64 a0 = ARCH_SYSCALL_ARG(tf, 0), a1 = ARCH_SYSCALL_ARG(tf, 1);
+ u64 a2 = ARCH_SYSCALL_ARG(tf, 2), a3 = ARCH_SYSCALL_ARG(tf, 3);
+ u64 a4 = ARCH_SYSCALL_ARG(tf, 4), a5 = ARCH_SYSCALL_ARG(tf, 5);
i64 r;
switch (nr) {
case SYS_read: r = sys_read((int)a0, (void *)a1, a2); break;
@@ -1478,10 +1312,9 @@ i64 trap_sync(u64 esr, struct trapframe *tf) {
case SYS_exit_group:
r = sys_exit_or_resume_parent(tf, (int)a0);
/* If we resumed the parent, sys_exit_or_resume_parent has
- * rewritten tf->x[0..30] and tf->elr — overriding tf->x[0]
- * below would corrupt the parent's register state. */
+ * rewritten the trapframe; set only the arch return register. */
if (proc_depth >= 0 && r != 0) {
- tf->x[0] = (u64)r;
+ ARCH_SET_RET(tf, r);
return 0;
}
break;
@@ -1489,22 +1322,22 @@ i64 trap_sync(u64 esr, struct trapframe *tf) {
uart_puts("[seed] ENOSYS "); uart_putd((i64)nr); uart_puts("\n");
r = -38; /* ENOSYS */
}
- tf->x[0] = (u64)r;
+ ARCH_SET_RET(tf, r);
(void)a4; (void)a5;
return 0;
}
uart_puts("[seed] PANIC: user sync, ESR="); uart_putx(esr);
- uart_puts(" ELR="); uart_putx(tf->elr);
+ uart_puts(" ELR="); uart_putx(ARCH_TF_PC(tf));
uart_puts(" FAR=");
- u64 far = sysreg_read(SR_FAR_EL1); uart_putx(far);
+ u64 far = arch_fault_addr(); uart_putx(far);
uart_puts("\n");
hang();
}
void trap_kernel(u64 esr, struct trapframe *tf) {
- u64 far = sysreg_read(SR_FAR_EL1);
+ u64 far = arch_fault_addr();
uart_puts("[seed] PANIC: kernel sync, ESR="); uart_putx(esr);
- uart_puts(" ELR="); uart_putx(tf->elr);
+ uart_puts(" ELR="); uart_putx(ARCH_TF_PC(tf));
uart_puts(" FAR="); uart_putx(far);
uart_puts("\n");
hang();
@@ -1512,15 +1345,13 @@ void trap_kernel(u64 esr, struct trapframe *tf) {
void trap_unhandled(u64 esr, struct trapframe *tf) {
uart_puts("[seed] PANIC: unhandled exception, ESR="); uart_putx(esr);
- uart_puts(" ELR="); uart_putx(tf->elr);
+ uart_puts(" ELR="); uart_putx(ARCH_TF_PC(tf));
uart_puts("\n");
hang();
}
/* ─── User stack setup + entry ──────────────────────────────────────────── */
-extern void eret_to_user(u64 entry, u64 sp);
-
/* Tokenise `src` in place (whitespace separators) into argv slots.
* Writes pointers into argv[0..argc-1] and returns argc. Stops at cap. */
static int tokenise(char *src, char **argv, int cap) {
@@ -1574,16 +1405,17 @@ static u64 build_user_stack(u64 stack_top, int argc, char **argv) {
/* ─── kmain ─────────────────────────────────────────────────────────────── */
void kmain(u64 dtb_phys) {
- setup_mmu();
+ arch_setup_mmu();
/* Bring up heap immediately — placed at a 16MB-aligned offset above
* our image, well clear of BSS/stack. Without -initrd reserving the
* 0x44000000–0x4b000000 region, the full 176 MB is ours from boot. */
u64 image_end = (u64)_end;
kheap_ptr = (u8 *)((image_end + 0xfffful) & ~0xfffful);
- kheap_end = (u8 *)0x4b000000UL; /* 176MB of heap */
+ kheap_end = (u8 *)ARCH_KERNEL_HEAP_END;
- uart_puts("\n[seed] arm64 boot, x0/dtb="); uart_putx(dtb_phys); uart_puts("\n");
+ uart_puts("\n[seed] "); uart_puts(ARCH_NAME); uart_puts(" boot, dtb=");
+ uart_putx(dtb_phys); uart_puts("\n");
struct dtb_info dt = {0};
parse_dtb((const void *)dtb_phys, &dt);