boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 4a1e722026a5a30bd68d2975cb39e93833df4867
parent e9e24687d1c5cced306a34171c12c5965af24126
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue,  5 May 2026 23:02:57 -0700

Refactor seed-kernel arch layout

Diffstat:
Mscripts/boot6-gen-runscm.sh | 8++++++--
Mscripts/boot6.sh | 11++++++++---
Mseed-kernel/Makefile | 42++++++++++++++++++++++++++----------------
Aseed-kernel/arch/aarch64/arch.h | 86+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Rseed-kernel/kernel.S -> seed-kernel/arch/aarch64/kernel.S | 0
Rseed-kernel/kernel.lds -> seed-kernel/arch/aarch64/kernel.lds | 0
Aseed-kernel/arch/aarch64/mmu.c | 81+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aseed-kernel/arch/amd64/arch.h | 75+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aseed-kernel/arch/riscv64/arch.h | 74++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mseed-kernel/kernel.c | 348+++++++++++++++++++++----------------------------------------------------------
10 files changed, 446 insertions(+), 279 deletions(-)

diff --git a/scripts/boot6-gen-runscm.sh b/scripts/boot6-gen-runscm.sh @@ -64,9 +64,13 @@ cat > "$OUT" <<EOF "kernel.S -> kernel-asm.o") (write-string stdout "boot6: tcc3 -c kernel.c\n") -(must (run "in/tcc3" $KCFLAGS "-c" "-o" "out/kernel.o" "in/kernel.c") +(must (run "in/tcc3" $KCFLAGS "-Iin" "-c" "-o" "out/kernel.o" "in/kernel.c") "kernel.c -> kernel.o") +(write-string stdout "boot6: tcc3 -c mmu.c\n") +(must (run "in/tcc3" $KCFLAGS "-Iin" "-c" "-o" "out/mmu.o" "in/mmu.c") + "mmu.c -> mmu.o") + (write-string stdout "boot6: tcc3 -c mem.c\n") (must (run "in/tcc3" $KCFLAGS "-c" "-o" "out/mem.o" "in/mem.c") "mem.c -> mem.o") @@ -76,7 +80,7 @@ cat > "$OUT" <<EOF "-Wl,-Ttext=0x40080000" "-Wl,--oformat=binary" "-o" "out/Image" - "out/kernel-asm.o" "out/kernel.o" "out/mem.o") + "out/kernel-asm.o" "out/kernel.o" "out/mmu.o" "out/mem.o") "link Image") (write-string stdout "boot6: ALL-OK\n") diff --git a/scripts/boot6.sh b/scripts/boot6.sh @@ -13,12 +13,15 @@ ## build/$ARCH/boot4/tcc3 — boot4's verified self-host tcc ## (compiler + linker) ## build/$ARCH/boot2/scheme1 — driver runtime -## seed-kernel/kernel.S — boot stub, vector table, asm thunks, +## seed-kernel/arch/aarch64/kernel.S +## — boot stub, vector table, asm thunks, ## trailing 64 KB stack reserved as ## plain `.bss` (kstack_top is the end ## label of that reservation) ## seed-kernel/kernel.c — DTB parse, MMU bring-up, syscalls, ## virtio-blk, tmpfs, ELF loader +## seed-kernel/arch/aarch64/mmu.c +## — arm64 page-table setup and pool swap ## tcc-cc/mem.c — memcpy/memset/memmove/memcmp ## ## ─── Tools ──────────────────────────────────────────────────────────── @@ -62,7 +65,7 @@ STAGE=build/$ARCH/.boot6-stage # ── prerequisites ───────────────────────────────────────────────────── [ -x "$BOOT4/tcc3" ] || { echo "[boot6 $ARCH] missing $BOOT4/tcc3 (run scripts/boot4.sh $ARCH)" >&2; exit 1; } [ -x "$BOOT2/scheme1" ] || { echo "[boot6 $ARCH] missing $BOOT2/scheme1 (run scripts/boot2.sh $ARCH)" >&2; exit 1; } -for f in seed-kernel/kernel.S seed-kernel/kernel.c tcc-cc/mem.c; do +for f in seed-kernel/arch/aarch64/kernel.S seed-kernel/arch/aarch64/mmu.c seed-kernel/arch/aarch64/arch.h seed-kernel/kernel.c tcc-cc/mem.c; do [ -f "$f" ] || { echo "[boot6 $ARCH] missing $f" >&2; exit 1; } done @@ -93,8 +96,10 @@ runscm_prelude scheme1/prelude.scm runscm_runscm "$RUNSCM" runscm_input tcc3 "$BOOT4/tcc3" -runscm_input kernel.S seed-kernel/kernel.S +runscm_input kernel.S seed-kernel/arch/aarch64/kernel.S runscm_input kernel.c seed-kernel/kernel.c +runscm_input arch.h seed-kernel/arch/aarch64/arch.h +runscm_input mmu.c seed-kernel/arch/aarch64/mmu.c runscm_input mem.c tcc-cc/mem.c runscm_export Image diff --git a/seed-kernel/Makefile b/seed-kernel/Makefile @@ -1,12 +1,12 @@ -# seed-kernel — minimal arm64 OS that satisfies docs/OS.md Tier 1. -# -# Build runs inside boot2-alpine-gcc:aarch64 (already arm64-native), so -# everything compiles with the host toolchain — no cross prefixes. +# seed-kernel — minimal OS that satisfies docs/OS.md Tier 1/2. -CC := gcc -LD := ld +ARCH ?= aarch64 +CC ?= gcc +LD ?= ld +OBJCOPY ?= objcopy OUT := build -KOBJS := $(OUT)/kasm.o $(OUT)/kernel.o $(OUT)/mem.o +ARCHDIR := arch/$(ARCH) +KOBJS := $(OUT)/kasm.o $(OUT)/kernel.o $(OUT)/mmu.o $(OUT)/mem.o KIMAGE := $(OUT)/kernel.elf KBIN := $(OUT)/Image USER := $(OUT)/init @@ -24,7 +24,14 @@ OUT_IMG_SIZE := 268435456 CFLAGS_COMMON := -nostdlib -ffreestanding -fno-stack-protector \ -fno-pic -static -Wall -Wextra -O2 -mcmodel=large \ -fno-asynchronous-unwind-tables -fno-unwind-tables -KCFLAGS := $(CFLAGS_COMMON) -mgeneral-regs-only +KCFLAGS := $(CFLAGS_COMMON) -I$(ARCHDIR) + +ifeq ($(ARCH),aarch64) +KCFLAGS += -mgeneral-regs-only +USER_ARCH_CFLAGS := -mgeneral-regs-only +else +$(error seed-kernel backend '$(ARCH)' is staged but not boot-wired yet; use ARCH=aarch64) +endif .PHONY: all clean kernel user initramfs all: $(KBIN) $(INITRAMFS) $(INITRAMFS_FORK) $(IN_IMG) $(IN_IMG_FORK) @@ -32,10 +39,13 @@ all: $(KBIN) $(INITRAMFS) $(INITRAMFS_FORK) $(IN_IMG) $(IN_IMG_FORK) $(OUT): mkdir -p $(OUT) -$(OUT)/kasm.o: kernel.S | $(OUT) +$(OUT)/kasm.o: $(ARCHDIR)/kernel.S | $(OUT) + $(CC) $(KCFLAGS) -c -o $@ $< + +$(OUT)/kernel.o: kernel.c $(ARCHDIR)/arch.h | $(OUT) $(CC) $(KCFLAGS) -c -o $@ $< -$(OUT)/kernel.o: kernel.c | $(OUT) +$(OUT)/mmu.o: $(ARCHDIR)/mmu.c $(ARCHDIR)/arch.h | $(OUT) $(CC) $(KCFLAGS) -c -o $@ $< # Shared mem helpers (memcpy/memset/memmove/memcmp). Lives in @@ -45,21 +55,21 @@ $(OUT)/kernel.o: kernel.c | $(OUT) $(OUT)/mem.o: ../tcc-cc/mem.c | $(OUT) $(CC) $(KCFLAGS) -c -o $@ $< -$(KIMAGE): $(KOBJS) kernel.lds - $(LD) -nostdlib -static -T kernel.lds -o $@ $(KOBJS) +$(KIMAGE): $(KOBJS) $(ARCHDIR)/kernel.lds + $(LD) -nostdlib -static -T $(ARCHDIR)/kernel.lds -o $@ $(KOBJS) # Strip ELF down to a flat binary that QEMU's -kernel can load. $(KBIN): $(KIMAGE) - objcopy -O binary $< $@ + $(OBJCOPY) -O binary $< $@ $(USER): user/hello.c user/user.lds | $(OUT) - $(CC) $(CFLAGS_COMMON) -mgeneral-regs-only -T user/user.lds -o $@ $< + $(CC) $(CFLAGS_COMMON) $(USER_ARCH_CFLAGS) -T user/user.lds -o $@ $< $(USER_FORK): user/forktest.c user/user.lds | $(OUT) - $(CC) $(CFLAGS_COMMON) -mgeneral-regs-only -T user/user.lds -o $@ $< + $(CC) $(CFLAGS_COMMON) $(USER_ARCH_CFLAGS) -T user/user.lds -o $@ $< $(USER_CHILD): user/child.c user/user.lds | $(OUT) - $(CC) $(CFLAGS_COMMON) -mgeneral-regs-only -T user/user.lds -o $@ $< + $(CC) $(CFLAGS_COMMON) $(USER_ARCH_CFLAGS) -T user/user.lds -o $@ $< $(INITRAMFS): $(USER) cd $(OUT) && printf 'init\n' | cpio -o -H newc > initramfs.cpio diff --git a/seed-kernel/arch/aarch64/arch.h b/seed-kernel/arch/aarch64/arch.h @@ -0,0 +1,86 @@ +/* aarch64 seed-kernel backend contract. */ +#ifndef SEED_ARCH_H +#define SEED_ARCH_H + +#define ARCH_NAME "aarch64" +#define ARCH_ELF_MACHINE 0xb7 +#define ARCH_ELF_MACHINE_NAME "aarch64" + +#define ARCH_DEVICE_ALIAS_BASE 0x100000000UL +#define ARCH_UART0_PA 0x09000000UL +#define ARCH_KERNEL_HEAP_END 0x4b000000UL + +#define ARCH_USER_POOL_A_PA 0x4c000000UL +#define ARCH_USER_POOL_B_PA 0x7c000000UL +#define ARCH_USER_POOL_SIZE 0x30000000UL +#define ARCH_USER_VA_LO 0x00200000UL +#define ARCH_USER_VA_HI 0x30200000UL +#define ARCH_USER_POOL_FIRST_SLOT 1 +#define ARCH_USER_POOL_LAST_SLOT 384 + +#define ARCH_SYS_unlinkat 35 +#define ARCH_SYS_openat 56 +#define ARCH_SYS_close 57 +#define ARCH_SYS_lseek 62 +#define ARCH_SYS_read 63 +#define ARCH_SYS_write 64 +#define ARCH_SYS_exit_group 93 +#define ARCH_SYS_waitid 95 +#define ARCH_SYS_brk 214 +#define ARCH_SYS_spawn 1024 + +#define ARCH_TRAPFRAME_NREGS 31 +struct trapframe { + u64 x[ARCH_TRAPFRAME_NREGS]; + u64 elr; + u64 spsr; +}; + +#define ARCH_SYSCALL_ARG(tf, i) ((tf)->x[(i)]) +#define ARCH_SYSCALL_NR(tf) ((tf)->x[8]) +#define ARCH_SET_RET(tf, v) ((tf)->x[0] = (u64)(v)) +#define ARCH_SET_PC(tf, v) ((tf)->elr = (u64)(v)) +#define ARCH_TF_PC(tf) ((tf)->elr) +#define ARCH_IS_SYSCALL(cause) ((((u32)(((cause) >> 26) & 0x3f))) == 0x15) + +enum { SR_MAIR_EL1, SR_TCR_EL1, SR_TTBR0_EL1, SR_SCTLR_EL1, + SR_CPACR_EL1, SR_SP_EL0, SR_FAR_EL1 }; +enum { BAR_DSB_SY, BAR_DSB_ISH, BAR_DMB_ISH, BAR_DMB_ISHST, BAR_ISB }; +enum { PAUSE_WFE, PAUSE_WFI, PAUSE_YIELD }; + +extern u64 sysreg_read(int id); +extern void sysreg_write(int id, u64 v); +extern void arm64_barrier(int kind); +extern void arm64_ic_iallu(void); +extern void arm64_tlbi_vmalle1(void); +extern void cpu_pause(int kind); +extern u64 arm64_psci_call(int conduit, u64 fnid); +extern void eret_to_user(u64 entry, u64 sp); +extern void arch_setup_mmu(void); +extern void arch_swap_user_pool(int which); + +#define arch_read_user_sp() sysreg_read(SR_SP_EL0) +#define arch_write_user_sp(v) sysreg_write(SR_SP_EL0, (v)) +#define arch_fault_addr() sysreg_read(SR_FAR_EL1) +#define arch_pause() cpu_pause(PAUSE_WFE) +#define arch_idle_forever() do { for (;;) cpu_pause(PAUSE_WFI); } while (0) +#define arch_mmio_ptr(pa) ((volatile u8 *)(ARCH_DEVICE_ALIAS_BASE + (u64)(pa))) +#define arch_wmb() arm64_barrier(BAR_DMB_ISHST) +#define arch_rmb() arm64_barrier(BAR_DMB_ISH) +#define arch_icache_sync() do { arm64_barrier(BAR_DSB_SY); arm64_ic_iallu(); arm64_barrier(BAR_DSB_SY); arm64_barrier(BAR_ISB); } while (0) +#define arch_icache_context_sync() do { arm64_ic_iallu(); arm64_barrier(BAR_DSB_ISH); arm64_barrier(BAR_ISB); } while (0) +#define arch_system_off() do { arm64_psci_call(0, 0x84000008); arm64_psci_call(1, 0x84000008); } while (0) + +static inline void arch_clear_to_user_entry(struct trapframe *tf, u64 entry) { + for (int i = 0; i < ARCH_TRAPFRAME_NREGS; i++) tf->x[i] = 0; + tf->elr = entry; +} + +static inline void arch_console_putc(char c) { + volatile u32 *dr = (volatile u32 *)(ARCH_DEVICE_ALIAS_BASE + ARCH_UART0_PA + 0x00); + volatile u32 *fr = (volatile u32 *)(ARCH_DEVICE_ALIAS_BASE + ARCH_UART0_PA + 0x18); + while (*fr & (1u << 5)) { } + *dr = (u32)(u8)c; +} + +#endif diff --git a/seed-kernel/kernel.S b/seed-kernel/arch/aarch64/kernel.S diff --git a/seed-kernel/kernel.lds b/seed-kernel/arch/aarch64/kernel.lds diff --git a/seed-kernel/arch/aarch64/mmu.c b/seed-kernel/arch/aarch64/mmu.c @@ -0,0 +1,81 @@ +/* aarch64 stage-1 MMU setup for QEMU virt. */ + +typedef unsigned long u64; +typedef unsigned int u32; +typedef unsigned char u8; + +#include "arch.h" + +__attribute__((aligned(4096))) static u64 l1_pt[512]; +__attribute__((aligned(4096))) static u64 l2_user[512]; + +static u64 pool_pa(int which) { + return which ? ARCH_USER_POOL_B_PA : ARCH_USER_POOL_A_PA; +} + +void arch_setup_mmu(void) { + /* Block-descriptor attribute bits (block at L1 = bit[1]=0). + * V(0)=1, block(1)=0, AttrIdx[4:2]=Attr0(Normal)/Attr1(Device), + * NS(5)=0, AP[7:6]=00 (RW EL1 only), SH[9:8]=11 (ISH), AF(10)=1, + * nG(11)=0 -> 0x701 (Normal) / 0x705 (Device-nGnRnE). + * Block descriptors at L2 use the same bit layout. */ + u64 normal = 0x701; + u64 device = 0x705; + + for (int i = 0; i < 512; i++) l1_pt[i] = 0; + + l2_user[0] = 0; + for (int i = ARCH_USER_POOL_FIRST_SLOT; i <= ARCH_USER_POOL_LAST_SLOT; i++) { + u64 pa = ARCH_USER_POOL_A_PA + (u64)(i - ARCH_USER_POOL_FIRST_SLOT) * 0x200000UL; + l2_user[i] = pa | normal; + } + for (int i = ARCH_USER_POOL_LAST_SLOT + 1; i < 512; i++) { + u64 pa = (u64)i * 0x200000UL; + l2_user[i] = pa | device; + } + + l1_pt[0] = (u64)l2_user | 0x3UL; + l1_pt[1] = 0x40000000UL | normal; + l1_pt[2] = 0x80000000UL | normal; + l1_pt[3] = 0xc0000000UL | normal; + l1_pt[4] = 0x00000000UL | device; + + sysreg_write(SR_MAIR_EL1, 0x00000000000000ffUL); + + u64 tcr = (u64)25 /* T0SZ: 39-bit VA */ + | ((u64)1 << 8) /* IRGN0 = WBWA */ + | ((u64)1 << 10) /* ORGN0 = WBWA */ + | ((u64)3 << 12) /* SH0 = inner shareable */ + | ((u64)0 << 14) /* TG0 = 4KB */ + | ((u64)1 << 23) /* EPD1 = disable TTBR1 walks */ + | ((u64)2 << 32); /* IPS = 40-bit phys */ + sysreg_write(SR_TCR_EL1, tcr); + sysreg_write(SR_TTBR0_EL1, (u64)l1_pt); + + arm64_ic_iallu(); + arm64_barrier(BAR_DSB_ISH); + arm64_tlbi_vmalle1(); + arm64_barrier(BAR_DSB_ISH); + arm64_barrier(BAR_ISB); + + u64 sctlr = sysreg_read(SR_SCTLR_EL1); + sctlr &= ~(u64)((1 << 1) | (1 << 19)); + sctlr |= (u64)((1 << 0) | (1 << 2) | (1 << 12)); + sysreg_write(SR_SCTLR_EL1, sctlr); + arm64_barrier(BAR_ISB); + + sysreg_write(SR_CPACR_EL1, (u64)3 << 20); + arm64_barrier(BAR_ISB); +} + +void arch_swap_user_pool(int which) { + u64 normal = 0x701; + u64 base = pool_pa(which); + for (int i = ARCH_USER_POOL_FIRST_SLOT; i <= ARCH_USER_POOL_LAST_SLOT; i++) { + l2_user[i] = (base + (u64)(i - ARCH_USER_POOL_FIRST_SLOT) * 0x200000UL) | normal; + } + arm64_barrier(BAR_DSB_ISH); + arm64_tlbi_vmalle1(); + arm64_barrier(BAR_DSB_ISH); + arm64_barrier(BAR_ISB); +} diff --git a/seed-kernel/arch/amd64/arch.h b/seed-kernel/arch/amd64/arch.h @@ -0,0 +1,75 @@ +/* amd64 seed-kernel backend constants. + * Low-level boot/trap/MMU and block transport are kept out of common C. */ +#ifndef SEED_ARCH_H +#define SEED_ARCH_H + +#define ARCH_NAME "amd64" +#define ARCH_ELF_MACHINE 0x3e +#define ARCH_ELF_MACHINE_NAME "amd64" + +#define ARCH_DEVICE_ALIAS_BASE 0xffff800000000000UL +#define ARCH_UART0_PA 0x000003f8UL +#define ARCH_KERNEL_HEAP_END 0x04b00000UL + +#define ARCH_USER_POOL_A_PA 0x04c00000UL +#define ARCH_USER_POOL_B_PA 0x34c00000UL +#define ARCH_USER_POOL_SIZE 0x30000000UL +#define ARCH_USER_VA_LO 0x00200000UL +#define ARCH_USER_VA_HI 0x30200000UL +#define ARCH_USER_POOL_FIRST_SLOT 1 +#define ARCH_USER_POOL_LAST_SLOT 384 + +#define ARCH_SYS_read 0 +#define ARCH_SYS_write 1 +#define ARCH_SYS_close 3 +#define ARCH_SYS_lseek 8 +#define ARCH_SYS_brk 12 +#define ARCH_SYS_exit_group 60 +#define ARCH_SYS_waitid 247 +#define ARCH_SYS_openat 257 +#define ARCH_SYS_unlinkat 263 +#define ARCH_SYS_spawn 1024 + +struct trapframe { + u64 x[24]; + u64 pc; + u64 flags; +}; + +#define ARCH_TRAPFRAME_NREGS 24 +#define ARCH_SYSCALL_ARG(tf, i) ((i) == 0 ? (tf)->x[0] : \ + (i) == 1 ? (tf)->x[1] : \ + (i) == 2 ? (tf)->x[2] : \ + (i) == 3 ? (tf)->x[3] : \ + (i) == 4 ? (tf)->x[4] : (tf)->x[5]) +#define ARCH_SYSCALL_NR(tf) ((tf)->x[6]) +#define ARCH_SET_RET(tf, v) ((tf)->x[6] = (u64)(v)) +#define ARCH_SET_PC(tf, v) ((tf)->pc = (u64)(v)) +#define ARCH_TF_PC(tf) ((tf)->pc) +#define ARCH_IS_SYSCALL(cause) ((cause) == 0) + +extern void arch_setup_mmu(void); +extern void arch_swap_user_pool(int which); +extern u64 arch_read_user_sp(void); +extern void arch_write_user_sp(u64 v); +extern u64 arch_fault_addr(void); +extern void arch_pause(void); +extern void arch_idle_forever(void); +extern volatile u8 *arch_mmio_ptr(u64 pa); +extern void arch_wmb(void); +extern void arch_rmb(void); +extern void arch_icache_sync(void); +extern void arch_icache_context_sync(void); +extern void arch_system_off(void); +extern void eret_to_user(u64 entry, u64 sp); + +static inline void arch_clear_to_user_entry(struct trapframe *tf, u64 entry) { + for (int i = 0; i < ARCH_TRAPFRAME_NREGS; i++) tf->x[i] = 0; + tf->pc = entry; +} + +static inline void arch_console_putc(char c) { + (void)c; +} + +#endif diff --git a/seed-kernel/arch/riscv64/arch.h b/seed-kernel/arch/riscv64/arch.h @@ -0,0 +1,74 @@ +/* riscv64 seed-kernel backend constants. + * Low-level boot/trap/MMU files are intentionally isolated under this + * directory; common kernel.c only depends on the contract below. */ +#ifndef SEED_ARCH_H +#define SEED_ARCH_H + +#define ARCH_NAME "riscv64" +#define ARCH_ELF_MACHINE 0xf3 +#define ARCH_ELF_MACHINE_NAME "riscv64" + +#define ARCH_DEVICE_ALIAS_BASE 0x100000000UL +#define ARCH_UART0_PA 0x10000000UL +#define ARCH_KERNEL_HEAP_END 0x8b000000UL + +#define ARCH_USER_POOL_A_PA 0x8c000000UL +#define ARCH_USER_POOL_B_PA 0xbc000000UL +#define ARCH_USER_POOL_SIZE 0x30000000UL +#define ARCH_USER_VA_LO 0x00200000UL +#define ARCH_USER_VA_HI 0x30200000UL +#define ARCH_USER_POOL_FIRST_SLOT 1 +#define ARCH_USER_POOL_LAST_SLOT 384 + +#define ARCH_SYS_unlinkat 35 +#define ARCH_SYS_openat 56 +#define ARCH_SYS_close 57 +#define ARCH_SYS_lseek 62 +#define ARCH_SYS_read 63 +#define ARCH_SYS_write 64 +#define ARCH_SYS_exit_group 93 +#define ARCH_SYS_waitid 95 +#define ARCH_SYS_brk 214 +#define ARCH_SYS_spawn 1024 + +struct trapframe { + u64 x[32]; + u64 pc; + u64 status; +}; + +#define ARCH_TRAPFRAME_NREGS 32 +#define ARCH_SYSCALL_ARG(tf, i) ((tf)->x[10 + (i)]) +#define ARCH_SYSCALL_NR(tf) ((tf)->x[17]) +#define ARCH_SET_RET(tf, v) ((tf)->x[10] = (u64)(v)) +#define ARCH_SET_PC(tf, v) ((tf)->pc = (u64)(v)) +#define ARCH_TF_PC(tf) ((tf)->pc) +#define ARCH_IS_SYSCALL(cause) ((cause) == 8) + +extern void arch_setup_mmu(void); +extern void arch_swap_user_pool(int which); +extern u64 arch_read_user_sp(void); +extern void arch_write_user_sp(u64 v); +extern u64 arch_fault_addr(void); +extern void arch_pause(void); +extern void arch_idle_forever(void); +extern volatile u8 *arch_mmio_ptr(u64 pa); +extern void arch_wmb(void); +extern void arch_rmb(void); +extern void arch_icache_sync(void); +extern void arch_icache_context_sync(void); +extern void arch_system_off(void); +extern void eret_to_user(u64 entry, u64 sp); + +static inline void arch_clear_to_user_entry(struct trapframe *tf, u64 entry) { + for (int i = 0; i < ARCH_TRAPFRAME_NREGS; i++) tf->x[i] = 0; + tf->pc = entry; +} + +static inline void arch_console_putc(char c) { + volatile u8 *uart = (volatile u8 *)(ARCH_DEVICE_ALIAS_BASE + ARCH_UART0_PA); + while (!(uart[5] & 0x20)) { } + uart[0] = (u8)c; +} + +#endif diff --git a/seed-kernel/kernel.c b/seed-kernel/kernel.c @@ -1,11 +1,12 @@ /* seed kernel — minimal OS satisfying docs/OS.md Tier 1. * - * Boots via Linux arm64 boot protocol (-kernel + two virtio-blk-MMIO - * disks), parses the DTB to find virtio_mmio nodes + memory, brings up + * Boots through an arch backend with two virtio-blk-MMIO disks, parses + * the DTB to find virtio_mmio nodes + memory, brings up * a small polling virtio-blk driver, reads the cpio newc archive from * blk0 (read-only) into the in-memory tmpfs, loads /init (a static - * aarch64 ELF), and ERETs into it at EL1t. SVC traps land in trap_sync() - * and dispatch the eight Tier-1 syscalls. On exit, the tmpfs is + * target ELF), and enters it through the arch trap-return path. Syscall + * traps land in trap_sync() and dispatch Tier-1/Tier-2 syscalls. On exit, + * the tmpfs is * serialised to blk1 in a small SEEDFS table for the host extractor. */ @@ -16,22 +17,12 @@ typedef unsigned long u64; typedef long i64; typedef int i32; -/* ─── PL011 console ─────────────────────────────────────────────────────── */ +#include "arch.h" -/* The PL011 lives at PA 0x09000000 on QEMU virt. Once the MMU comes up the - * kernel reaches it through the device alias mapped into VA 4 GB..5 GB - * (L1[4]). That keeps the entire low 1 GB of VA available as user RAM — - * device MMIO at user-space VAs would otherwise collide with the boot2 - * chain's BSS, which can run past 256 MB. */ -#define DEVICE_ALIAS_BASE 0x100000000UL -#define UART0 (DEVICE_ALIAS_BASE + 0x09000000UL) -#define UART_DR ((volatile u32 *)(UART0 + 0x00)) -#define UART_FR ((volatile u32 *)(UART0 + 0x18)) -#define UART_FR_TXFF (1u << 5) +/* ─── Console ───────────────────────────────────────────────────────────── */ static void uart_putc(char c) { - while (*UART_FR & UART_FR_TXFF) { } - *UART_DR = (u32)(u8)c; + arch_console_putc(c); } static void uart_puts(const char *s) { @@ -56,23 +47,7 @@ static void uart_putd(i64 v) { while (i--) uart_putc(buf[i]); } -/* ─── kernel.S thunk API ──────────────────────────────────────────────── - * SR_*, BAR_*, PAUSE_* ids must match the #defines at the bottom of - * kernel.S in declaration order. */ -enum { SR_MAIR_EL1, SR_TCR_EL1, SR_TTBR0_EL1, SR_SCTLR_EL1, - SR_CPACR_EL1, SR_SP_EL0, SR_FAR_EL1 }; -enum { BAR_DSB_SY, BAR_DSB_ISH, BAR_DMB_ISH, BAR_DMB_ISHST, BAR_ISB }; -enum { PAUSE_WFE, PAUSE_WFI, PAUSE_YIELD }; - -extern u64 sysreg_read(int id); -extern void sysreg_write(int id, u64 v); -extern void arm64_barrier(int kind); -extern void arm64_ic_iallu(void); -extern void arm64_tlbi_vmalle1(void); -extern void cpu_pause(int kind); -extern u64 arm64_psci_call(int conduit, u64 fnid); - -__attribute__((noreturn)) static void hang(void) { for (;;) cpu_pause(PAUSE_WFE); } +__attribute__((noreturn)) static void hang(void) { for (;;) arch_pause(); } /* ─── Tiny libc-ish helpers ─────────────────────────────────────────────── */ @@ -107,126 +82,17 @@ static void mem_set(void *d, int c, u64 n) { for (u64 i = 0; i < n; i++) dd[i] = (u8)c; } -/* ─── MMU bring-up ──────────────────────────────────────────────────────── */ -/* Two-level page table: - * L1[0] → l2_user table descriptor (VA 0..1 GB, 2 MB blocks) - * L1[1..3] = Normal 1 GB blocks identity-mapping VA 1..4 GB (RAM + high MMIO) - * L1[4] = Device 1 GB block at PA 0 (VA 4..5 GB mirrors PA 0..1 GB as - * Device-nGnRnE — the kernel's only path to UART/GIC/virtio/PCI - * once we hand the low 1 GB over to user code). - * - * The l2_user table carves the low 1 GB into: - * slot 0 (VA 0..2 MB) invalid — NULL pointer traps - * slots 1..N (VA 2 MB..USER_VA_HI) Normal user RAM, backed by one - * of two 768 MB physical pools - * (USER_POOL_A_PA / USER_POOL_B_PA); - * sys_spawn swaps which is mapped - * here without any mem_cpy. - * N=384 (slots 1..384, 768 MB) gives - * tcc-boot2's 512 MB BSS plus brk room. - * slots N+1..511 (VA USER_VA_HI..1G) Device-identity, kept for safety — - * nothing user-side touches them, and - * the kernel uses the high alias. - * - * With MMU on + Normal memory, unaligned loads/stores work — gcc's auto- - * vectorised 64-bit load in be64() stops trapping. */ -__attribute__((aligned(4096))) static u64 l1_pt[512]; -__attribute__((aligned(4096))) static u64 l2_user[512]; - -/* Two physical pools (A, B) backing the user low-VA window. On spawn - * we swap l2_user[] from one to the other, TLB-invalidate, and load the - * new ELF into the other pool — no memory copy. The atomic spawn syscall - * (no userspace gap between fork and exec) means the child never reads - * the parent's pool, so no snapshot is needed. MAX_PROC_DEPTH=1 means - * two pools is sufficient (the prelude only forks one level deep). - * - * With QEMU -m 2048M (RAM 0x40000000–0xc0000000), the layout is: - * 0x40000000–0x4c000000 kernel image + kheap (192 MB) - * 0x4c000000–0x7c000000 user RAM pool A (768 MB) - * 0x7c000000–0xac000000 user RAM pool B (768 MB) - * 0xac000000–0xc0000000 spare (320 MB) - */ -#define USER_POOL_A_PA 0x4c000000UL -#define USER_POOL_B_PA 0x7c000000UL -#define USER_POOL_SIZE 0x30000000UL /* 768 MB */ -#define USER_VA_LO 0x00200000UL /* slot 1 — first mapped 2 MB block */ -#define USER_VA_HI 0x30200000UL /* slot 385 — first device-only block */ -#define USER_POOL_FIRST_SLOT 1 -#define USER_POOL_LAST_SLOT 384 /* USER_POOL_SIZE / 2 MB */ +/* User address-space constants are supplied by the arch backend. */ +#define USER_POOL_A_PA ARCH_USER_POOL_A_PA +#define USER_POOL_B_PA ARCH_USER_POOL_B_PA +#define USER_POOL_SIZE ARCH_USER_POOL_SIZE +#define USER_VA_LO ARCH_USER_VA_LO +#define USER_VA_HI ARCH_USER_VA_HI +#define USER_POOL_FIRST_SLOT ARCH_USER_POOL_FIRST_SLOT +#define USER_POOL_LAST_SLOT ARCH_USER_POOL_LAST_SLOT /* 0 = pool A is currently mapped at user VAs; 1 = pool B. */ static int current_pool = 0; -static u64 pool_pa(int which) { return which ? USER_POOL_B_PA : USER_POOL_A_PA; } - -static void setup_mmu(void) { - /* Block-descriptor attribute bits (block at L1 = bit[1]=0). - * V(0)=1, block(1)=0, AttrIdx[4:2]=Attr0(Normal)/Attr1(Device), - * NS(5)=0, AP[7:6]=00 (RW EL1 only), SH[9:8]=11 (ISH), AF(10)=1, - * nG(11)=0 → 0x701 (Normal) / 0x705 (Device-nGnRnE). - * Block descriptors at L2 use the same bit layout. */ - u64 normal = 0x701; - u64 device = 0x705; - - for (int i = 0; i < 512; i++) l1_pt[i] = 0; - - /* L2 user table: slot 0 invalid; slots 1..USER_POOL_LAST_SLOT Normal - * RAM backed by pool A initially; slots above that Device-identity. */ - l2_user[0] = 0; - for (int i = USER_POOL_FIRST_SLOT; i <= USER_POOL_LAST_SLOT; i++) { - u64 pa = USER_POOL_A_PA + (u64)(i - USER_POOL_FIRST_SLOT) * 0x200000UL; - l2_user[i] = pa | normal; - } - for (int i = USER_POOL_LAST_SLOT + 1; i < 512; i++) { - u64 pa = (u64)i * 0x200000UL; - l2_user[i] = pa | device; - } - - /* L1[0] table descriptor → l2_user. Table-desc encoding at L1 is - * bits [1:0] = 0b11, bits [47:12] = next-level table PA. */ - l1_pt[0] = (u64)l2_user | 0x3UL; - l1_pt[1] = 0x40000000UL | normal; - l1_pt[2] = 0x80000000UL | normal; - l1_pt[3] = 0xc0000000UL | normal; - /* L1[4]: Device 1 GB block aliasing PA 0..1 GB into VA 4 GB..5 GB so - * the kernel can still reach UART/GIC/virtio after we hand the low 1 - * GB over to user mappings. */ - l1_pt[4] = 0x00000000UL | device; - - /* MAIR: Attr0 = 0xff (Normal WB-WA), Attr1 = 0x00 (Device-nGnRnE) */ - u64 mair = 0x00000000000000ffUL; - sysreg_write(SR_MAIR_EL1, mair); - - u64 tcr = (u64)25 /* T0SZ: 39-bit VA */ - | ((u64)1 << 8) /* IRGN0 = WBWA */ - | ((u64)1 << 10) /* ORGN0 = WBWA */ - | ((u64)3 << 12) /* SH0 = inner shareable */ - | ((u64)0 << 14) /* TG0 = 4KB */ - | ((u64)1 << 23) /* EPD1 = disable TTBR1 walks */ - | ((u64)2 << 32); /* IPS = 40-bit phys */ - sysreg_write(SR_TCR_EL1, tcr); - sysreg_write(SR_TTBR0_EL1, (u64)l1_pt); - - arm64_ic_iallu(); - arm64_barrier(BAR_DSB_ISH); - arm64_tlbi_vmalle1(); - arm64_barrier(BAR_DSB_ISH); - arm64_barrier(BAR_ISB); - - u64 sctlr = sysreg_read(SR_SCTLR_EL1); - sctlr &= ~(u64)((1 << 1) | (1 << 19)); /* clear A (alignment), WXN */ - sctlr |= (u64)((1 << 0) /* M — MMU on */ - | (1 << 2) /* C — D-cache on */ - | (1 << 12)); /* I — I-cache on */ - sysreg_write(SR_SCTLR_EL1, sctlr); - arm64_barrier(BAR_ISB); - - /* CPACR_EL1.FPEN = 0b11: don't trap FP/ASIMD from EL0 or EL1. - * tcc-built user binaries (notably the self-rebuilt tcc1) emit FP - * register saves in their start glue; default FPEN=00 traps those - * to EL1 with EC=0x07. */ - sysreg_write(SR_CPACR_EL1, (u64)3 << 20); - arm64_barrier(BAR_ISB); -} /* ─── Kernel heap (bump allocator) ──────────────────────────────────────── */ @@ -583,17 +449,17 @@ static int blk_request_one(int devi, u32 type, u64 sector, void *buf, u64 nsec) u16 head = 0; u16 ai = avail->idx; avail->ring[ai % VQ_SIZE] = head; - arm64_barrier(BAR_DMB_ISHST); + arch_wmb(); avail->idx = ai + 1; - arm64_barrier(BAR_DMB_ISHST); + arch_wmb(); mmio_w32(d, VIRTIO_MMIO_QUEUE_NOTIFY, 0); /* Poll used.idx — single in-flight, advances by exactly one. */ while (used->idx == ai) { - cpu_pause(PAUSE_YIELD); + arch_pause(); } - arm64_barrier(BAR_DMB_ISH); + arch_rmb(); /* Acknowledge any pending interrupt status (we don't service IRQs but * the device sets these bits anyway). */ @@ -645,7 +511,7 @@ static void blk_init(struct dtb_info *dt) { /* Stage into blk_devs[n_blocks] so blk_init_one's index-derived * vq page assignment is correct from the start. */ struct blk_dev *d = &blk_devs[n_blocks]; - d->regs = (volatile u8 *)(DEVICE_ALIAS_BASE + pa); + d->regs = arch_mmio_ptr(pa); d->capacity_sectors = 0; d->present = 0; int r = blk_init_one(d); @@ -865,11 +731,11 @@ static u64 load_elf(const u8 *elf) { eh->e_ident[2] == 'L' && eh->e_ident[3] == 'F')) { uart_puts("ELF: bad magic\n"); return 0; } - if (eh->e_machine != 0xb7) { /* EM_AARCH64 */ - uart_puts("ELF: not aarch64\n"); return 0; + if (eh->e_machine != ARCH_ELF_MACHINE) { + uart_puts("ELF: not "); uart_puts(ARCH_ELF_MACHINE_NAME); uart_puts("\n"); return 0; } - /* p_flags (R/W/X) are deliberately ignored: the L2 user mapping is one - * giant Normal-memory RWX-at-EL1 region (see setup_mmu). OS.md + /* p_flags (R/W/X) are deliberately ignored: the user mapping is one + * giant Normal-memory RWX region (see arch_setup_mmu). OS.md * §"Memory model" permits this — there's no W^X enforcement in the * contract, and tcc-boot2 never JITs. * @@ -901,10 +767,7 @@ static u64 load_elf(const u8 *elf) { /* Round up to 16 bytes so callers can use it directly as brk_base. */ g_user_image_end = (hi + 15) & ~15UL; /* I-cache sync (cheap insurance even with caches off). */ - arm64_barrier(BAR_DSB_SY); - arm64_ic_iallu(); - arm64_barrier(BAR_DSB_SY); - arm64_barrier(BAR_ISB); + arch_icache_sync(); return eh->e_entry; } @@ -935,23 +798,22 @@ static u64 brk_max; #define AT_FDCWD (-100) -#define SYS_unlinkat 35 -#define SYS_openat 56 -#define SYS_close 57 -#define SYS_lseek 62 -#define SYS_read 63 -#define SYS_write 64 -#define SYS_exit_group 93 -#define SYS_waitid 95 -#define SYS_brk 214 -/* Private syscall number, deliberately outside the Linux aarch64 range - * (last allocated is 462 = futex_requeue, plus a small reserved tail). +#define SYS_unlinkat ARCH_SYS_unlinkat +#define SYS_openat ARCH_SYS_openat +#define SYS_close ARCH_SYS_close +#define SYS_lseek ARCH_SYS_lseek +#define SYS_read ARCH_SYS_read +#define SYS_write ARCH_SYS_write +#define SYS_exit_group ARCH_SYS_exit_group +#define SYS_waitid ARCH_SYS_waitid +#define SYS_brk ARCH_SYS_brk +/* Private syscall number, deliberately outside the normal Linux range. * The scheme1 prelude probes (sys-spawn) once at init: on Linux this * number is unmapped so the probe gets -ENOSYS and the prelude falls * back to the classic clone+execve path; on the seed kernel the probe * succeeds (or returns -ENOENT for a missing file) and the prelude uses * sys-spawn for every (run …) thereafter. */ -#define SYS_spawn 1024 +#define SYS_spawn ARCH_SYS_spawn #define ECHILD 10 #define EAGAIN 11 @@ -1065,26 +927,26 @@ static i64 sys_unlinkat(int dirfd, const char *path, int flags) { * * sys_spawn → capture path+argv into kernel buffers (still reading from * the parent's pool); push parent state (regs, brk, fd - * table, current pool) onto proc_stack; remap l2_user[] to + * table, current pool) onto proc_stack; remap user VAs to * the alternate pool with NO COPY (the child won't read any * byte of the parent's pool — load_elf overwrites just the * PT_LOAD ranges and build_user_stack writes the top of the * user VA window); load_elf into the alternate pool, reset - * brk, build the user stack, rewrite tf so eret enters the - * new program at its entry with the new sp_el0. + * brk, build the user stack, rewrite tf so trap return + * enters the new program at its entry with the new stack. * sys_exit → if proc_stack non-empty: stash exit code in last_child, - * swap l2_user[] back to the parent's pool (no copy — the + * swap user VAs back to the parent's pool (no copy — the * parent's pool was never written by the child), restore - * regs/brk/fds, ic iallu (the user VAs now resolve to - * different physical pages), set tf so eret resumes the - * parent's spawn() call with x0 = pid. If proc_stack empty: - * real exit (dump tmpfs, PSCI off). + * regs/brk/fds, cache-sync (the user VAs now resolve to + * different physical pages), set tf so trap return resumes + * the parent's spawn() call with child pid. If proc_stack + * empty: real exit (dump tmpfs, arch shutdown). * sys_waitid → return last_child's exit code via the siginfo struct. * * No actual concurrency. The "parent" is suspended at the moment of spawn * and resumed only when the child calls exit_group. * - * Memory cost per spawn: zero copy. l2_user[] rewrite + TLBI + ic iallu + * Memory cost per spawn: zero copy. User-map rewrite + TLB/cache sync * + load_elf (which copies just the new image's PT_LOAD bytes, typically * ~1 MB for tcc). This replaces the previous clone/execve design which * paid one 768 MB mem_cpy per fork to seed the child's pool with parent @@ -1095,12 +957,6 @@ static i64 sys_unlinkat(int dirfd, const char *path, int flags) { * syscall closes that window entirely. */ -struct trapframe { - u64 x[31]; - u64 elr; - u64 spsr; -}; - /* Forward decls for state defined further down. boot5's per-source * compile passes ~25 argv entries / ~750 bytes, but the final * `tcc -ar rcs libc.a obj1 … obj1263` call passes ~1300 entries totalling @@ -1116,13 +972,11 @@ static int tokenise(char *src, char **argv, int cap); struct proc_save { int active; u64 child_pid; - /* Saved trap-frame state — enough to resume the parent at the SVC - * instruction following its sys_spawn. x[0] is overwritten with - * child_pid at restore time so the parent sees a non-zero return. */ - u64 regs[31]; - u64 elr; - u64 spsr; - u64 sp_el0; + /* Saved trap-frame state — enough to resume the parent at the trap + * instruction following its sys_spawn. The return register is + * overwritten with child_pid at restore time. */ + struct trapframe tf; + u64 user_sp; /* Per-process state at the moment of spawn. brk_base is saved alongside * brk_cur because sys_spawn resets it above the new image's end-of-bss; * the parent's value comes back with the parent's pool. */ @@ -1132,19 +986,9 @@ struct proc_save { int pool_save; /* parent's user pool (0=A, 1=B) */ }; -/* Rewrite the user-VA L2 entries to point at pool `which`, then flush TLB. - * The kernel runs from a high-VA alias (L1[1..3] for RAM, L1[4] for MMIO), - * so the swap doesn't disturb the kernel's own translations. */ +/* Rewrite the user-VA mapping to point at pool `which`, then flush TLB. */ static void swap_user_pool(int which) { - u64 normal = 0x701; - u64 base = pool_pa(which); - for (int i = USER_POOL_FIRST_SLOT; i <= USER_POOL_LAST_SLOT; i++) { - l2_user[i] = (base + (u64)(i - USER_POOL_FIRST_SLOT) * 0x200000UL) | normal; - } - arm64_barrier(BAR_DSB_ISH); - arm64_tlbi_vmalle1(); - arm64_barrier(BAR_DSB_ISH); - arm64_barrier(BAR_ISB); + arch_swap_user_pool(which); current_pool = which; } @@ -1216,14 +1060,12 @@ static i64 sys_spawn(struct trapframe *tf, const char *path, char **argv) { /* Save parent state — regs, brk, fd table, which pool the parent ran * in. After sys_exit_or_resume_parent restores from this frame, the - * parent's spawn() call returns with x[0] = child_pid. */ + * parent's spawn() call returns with child_pid. */ struct proc_save *p = &proc_stack[proc_depth]; p->active = 1; p->child_pid = g_next_pid++; - for (int i = 0; i < 31; i++) p->regs[i] = tf->x[i]; - p->elr = tf->elr; - p->spsr = tf->spsr; - p->sp_el0 = sysreg_read(SR_SP_EL0); + p->tf = *tf; + p->user_sp = arch_read_user_sp(); p->brk_base_save = brk_base; p->brk_cur_save = brk_cur; for (int i = 0; i < MAX_FD; i++) p->fdtab_save[i] = fdtab[i]; @@ -1260,13 +1102,13 @@ static i64 sys_spawn(struct trapframe *tf, const char *path, char **argv) { * entry with a clean register state and the new stack. The parent's * regs sit on proc_stack until sys_exit_or_resume_parent restores * them on child exit. */ - for (int i = 0; i < 31; i++) tf->x[i] = 0; - tf->elr = entry; - /* sp_el0 isn't in the trap frame — set it directly; it survives - * until eret since the kernel uses SP_ELx while in trap_sync. */ - sysreg_write(SR_SP_EL0, new_sp); - /* Returning 0; dispatcher writes tf->x[0] = 0. The child's _start - * reads argc/argv from the stack, so x[0] is don't-care. */ + arch_clear_to_user_entry(tf, entry); + /* Some backends keep the user stack pointer outside the saved + * trapframe, so set it through the arch hook. */ + arch_write_user_sp(new_sp); + /* Returning 0; dispatcher writes the arch return register. The child's + * _start reads argc/argv from the stack, so the return register is + * don't-care. */ return 0; } @@ -1404,10 +1246,8 @@ static void sys_exit_final(int code) { g_exited = 1; dump_tmpfs_blk(); uart_puts("\n[seed] user exit_group("); uart_putd(code); uart_puts(")\n"); - /* Try PSCI SYSTEM_OFF so QEMU exits cleanly; fall back to spin. */ - arm64_psci_call(0 /*HVC*/, 0x84000008); - arm64_psci_call(1 /*SMC*/, 0x84000008); - for (;;) cpu_pause(PAUSE_WFI); + arch_system_off(); + arch_idle_forever(); } /* Dispatcher-side exit_group: pops proc_stack and resumes the parent's @@ -1429,23 +1269,17 @@ static int sys_exit_or_resume_parent(struct trapframe *tf, int code) { brk_base = p->brk_base_save; brk_cur = p->brk_cur_save; for (int i = 0; i < MAX_FD; i++) fdtab[i] = p->fdtab_save[i]; - /* Restore registers (overwriting x[0] with child_pid, since the - * dispatcher will write tf->x[0] = (u64)r before eret — we want - * the parent's sys_spawn to see child_pid as the syscall return). */ - for (int i = 0; i < 31; i++) tf->x[i] = p->regs[i]; - tf->elr = p->elr; - tf->spsr = p->spsr; - sysreg_write(SR_SP_EL0, p->sp_el0); + /* Restore registers; the dispatcher writes the child pid into the + * arch return register below. */ + *tf = p->tf; + arch_write_user_sp(p->user_sp); /* I-cache invalidation. The parent's pool was never written, so * its instruction bytes (in DRAM) are byte-identical to what was * originally fetched. But the same user VAs were just used to * fetch the child's instructions from the other physical pool; - * aarch64 I-caches may hold lines tagged by VA whose translation - * just changed. `ic iallu` invalidates by VA so subsequent fetches - * miss and re-walk through the freshly-swapped L2. */ - arm64_ic_iallu(); - arm64_barrier(BAR_DSB_ISH); - arm64_barrier(BAR_ISB); + * I-caches may hold lines tagged by VA whose translation just + * changed, so the arch backend invalidates whatever is needed. */ + arch_icache_context_sync(); return (int)p->child_pid; /* >0: tells dispatcher to write this as r */ } sys_exit_final(code); @@ -1459,11 +1293,11 @@ void trap_kernel(u64 esr, struct trapframe *tf); void trap_unhandled(u64 esr, struct trapframe *tf); i64 trap_sync(u64 esr, struct trapframe *tf) { - u32 ec = (u32)((esr >> 26) & 0x3f); - if (ec == 0x15) { /* SVC, AArch64 */ - u64 nr = tf->x[8]; - u64 a0 = tf->x[0], a1 = tf->x[1], a2 = tf->x[2]; - u64 a3 = tf->x[3], a4 = tf->x[4], a5 = tf->x[5]; + if (ARCH_IS_SYSCALL(esr)) { + u64 nr = ARCH_SYSCALL_NR(tf); + u64 a0 = ARCH_SYSCALL_ARG(tf, 0), a1 = ARCH_SYSCALL_ARG(tf, 1); + u64 a2 = ARCH_SYSCALL_ARG(tf, 2), a3 = ARCH_SYSCALL_ARG(tf, 3); + u64 a4 = ARCH_SYSCALL_ARG(tf, 4), a5 = ARCH_SYSCALL_ARG(tf, 5); i64 r; switch (nr) { case SYS_read: r = sys_read((int)a0, (void *)a1, a2); break; @@ -1478,10 +1312,9 @@ i64 trap_sync(u64 esr, struct trapframe *tf) { case SYS_exit_group: r = sys_exit_or_resume_parent(tf, (int)a0); /* If we resumed the parent, sys_exit_or_resume_parent has - * rewritten tf->x[0..30] and tf->elr — overriding tf->x[0] - * below would corrupt the parent's register state. */ + * rewritten the trapframe; set only the arch return register. */ if (proc_depth >= 0 && r != 0) { - tf->x[0] = (u64)r; + ARCH_SET_RET(tf, r); return 0; } break; @@ -1489,22 +1322,22 @@ i64 trap_sync(u64 esr, struct trapframe *tf) { uart_puts("[seed] ENOSYS "); uart_putd((i64)nr); uart_puts("\n"); r = -38; /* ENOSYS */ } - tf->x[0] = (u64)r; + ARCH_SET_RET(tf, r); (void)a4; (void)a5; return 0; } uart_puts("[seed] PANIC: user sync, ESR="); uart_putx(esr); - uart_puts(" ELR="); uart_putx(tf->elr); + uart_puts(" ELR="); uart_putx(ARCH_TF_PC(tf)); uart_puts(" FAR="); - u64 far = sysreg_read(SR_FAR_EL1); uart_putx(far); + u64 far = arch_fault_addr(); uart_putx(far); uart_puts("\n"); hang(); } void trap_kernel(u64 esr, struct trapframe *tf) { - u64 far = sysreg_read(SR_FAR_EL1); + u64 far = arch_fault_addr(); uart_puts("[seed] PANIC: kernel sync, ESR="); uart_putx(esr); - uart_puts(" ELR="); uart_putx(tf->elr); + uart_puts(" ELR="); uart_putx(ARCH_TF_PC(tf)); uart_puts(" FAR="); uart_putx(far); uart_puts("\n"); hang(); @@ -1512,15 +1345,13 @@ void trap_kernel(u64 esr, struct trapframe *tf) { void trap_unhandled(u64 esr, struct trapframe *tf) { uart_puts("[seed] PANIC: unhandled exception, ESR="); uart_putx(esr); - uart_puts(" ELR="); uart_putx(tf->elr); + uart_puts(" ELR="); uart_putx(ARCH_TF_PC(tf)); uart_puts("\n"); hang(); } /* ─── User stack setup + entry ──────────────────────────────────────────── */ -extern void eret_to_user(u64 entry, u64 sp); - /* Tokenise `src` in place (whitespace separators) into argv slots. * Writes pointers into argv[0..argc-1] and returns argc. Stops at cap. */ static int tokenise(char *src, char **argv, int cap) { @@ -1574,16 +1405,17 @@ static u64 build_user_stack(u64 stack_top, int argc, char **argv) { /* ─── kmain ─────────────────────────────────────────────────────────────── */ void kmain(u64 dtb_phys) { - setup_mmu(); + arch_setup_mmu(); /* Bring up heap immediately — placed at a 16MB-aligned offset above * our image, well clear of BSS/stack. Without -initrd reserving the * 0x44000000–0x4b000000 region, the full 176 MB is ours from boot. */ u64 image_end = (u64)_end; kheap_ptr = (u8 *)((image_end + 0xfffful) & ~0xfffful); - kheap_end = (u8 *)0x4b000000UL; /* 176MB of heap */ + kheap_end = (u8 *)ARCH_KERNEL_HEAP_END; - uart_puts("\n[seed] arm64 boot, x0/dtb="); uart_putx(dtb_phys); uart_puts("\n"); + uart_puts("\n[seed] "); uart_puts(ARCH_NAME); uart_puts(" boot, dtb="); + uart_putx(dtb_phys); uart_puts("\n"); struct dtb_info dt = {0}; parse_dtb((const void *)dtb_phys, &dt);