boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

mmu.c (5860B)


      1 typedef unsigned long u64;
      2 typedef unsigned int u32;
      3 typedef unsigned short u16;
      4 typedef unsigned char u8;
      5 
      6 #include "arch.h"
      7 
      8 #define PTE_P   0x001UL
      9 #define PTE_W   0x002UL
     10 #define PTE_U   0x004UL
     11 #define PTE_PCD 0x010UL
     12 #define PTE_PS  0x080UL
     13 
     14 #define KSEG (PTE_P | PTE_W | PTE_PS)
     15 #define USEG (PTE_P | PTE_W | PTE_U | PTE_PS)
     16 #define DSEG (PTE_P | PTE_W | PTE_PCD | PTE_PS)
     17 
     18 __attribute__((aligned(4096))) static u64 pml4[512];
     19 __attribute__((aligned(4096))) static u64 pdpt_low[512];
     20 __attribute__((aligned(4096))) static u64 pdpt_alias[512];
     21 __attribute__((aligned(4096))) static u64 pd0[512];
     22 __attribute__((aligned(4096))) static u64 pd1[512];
     23 __attribute__((aligned(4096))) static u64 pd2[512];
     24 __attribute__((aligned(4096))) static u64 pd3[512];
     25 __attribute__((aligned(4096))) static u64 pda0[512];
     26 __attribute__((aligned(4096))) static u64 pda1[512];
     27 __attribute__((aligned(4096))) static u64 pda2[512];
     28 __attribute__((aligned(4096))) static u64 pda3[512];
     29 
     30 struct gdtr {
     31     u16 limit;
     32     u64 base;
     33 } __attribute__((packed));
     34 
     35 struct idtr {
     36     u16 limit;
     37     u64 base;
     38 } __attribute__((packed));
     39 
     40 struct idt_gate {
     41     u16 off0;
     42     u16 sel;
     43     u8  ist;
     44     u8  type;
     45     u16 off1;
     46     u32 off2;
     47     u32 zero;
     48 } __attribute__((packed));
     49 
     50 struct tss64 {
     51     u32 reserved0;
     52     u64 rsp0;
     53     u64 rsp1;
     54     u64 rsp2;
     55     u64 reserved1;
     56     u64 ist[7];
     57     u64 reserved2;
     58     u16 reserved3;
     59     u16 iopb;
     60 } __attribute__((packed));
     61 
     62 static u64 gdt[7];
     63 static struct idt_gate idt[256];
     64 static struct tss64 tss;
     65 
     66 extern void amd64_load_cr3(u64 pml4_pa);
     67 extern void amd64_lgdt(const struct gdtr *g);
     68 extern void amd64_lidt(const struct idtr *i);
     69 extern void amd64_ltr(u16 sel);
     70 extern void amd64_int80(void);
     71 extern void amd64_unhandled(void);
     72 extern void amd64_syscall_entry(void);
     73 extern void amd64_wrmsr(u32 msr, u64 val);
     74 extern u64  amd64_rdmsr(u32 msr);
     75 extern char kstack_top[];
     76 
     77 #define MSR_EFER   0xc0000080U
     78 #define MSR_STAR   0xc0000081U
     79 #define MSR_LSTAR  0xc0000082U
     80 #define MSR_SFMASK 0xc0000084U
     81 
     82 static void set_gate(int vec, void (*fn)(void), int dpl) {
     83     u64 a = (u64)fn;
     84     idt[vec].off0 = (u16)a;
     85     idt[vec].sel = 0x08;
     86     idt[vec].ist = 0;
     87     idt[vec].type = (u8)(0x8e | (dpl << 5));
     88     idt[vec].off1 = (u16)(a >> 16);
     89     idt[vec].off2 = (u32)(a >> 32);
     90     idt[vec].zero = 0;
     91 }
     92 
     93 static void setup_cpu_tables(void) {
     94     gdt[0] = 0;
     95     gdt[1] = 0x00af9a000000ffffUL;  /* kernel code */
     96     gdt[2] = 0x00af92000000ffffUL;  /* kernel data */
     97     gdt[3] = 0x00aff2000000ffffUL;  /* user data */
     98     gdt[4] = 0x00affa000000ffffUL;  /* user code */
     99 
    100     u64 base = (u64)&tss;
    101     u64 limit = sizeof(tss) - 1;
    102     gdt[5] = (limit & 0xffff)
    103            | ((base & 0xffffff) << 16)
    104            | (0x89UL << 40)
    105            | (((limit >> 16) & 0xf) << 48)
    106            | (((base >> 24) & 0xff) << 56);
    107     gdt[6] = base >> 32;
    108 
    109     tss.rsp0 = (u64)kstack_top;
    110     tss.iopb = sizeof(tss);
    111 
    112     struct gdtr gdtr = { (u16)(sizeof(gdt) - 1), (u64)gdt };
    113     amd64_lgdt(&gdtr);
    114     amd64_ltr(0x28);
    115 
    116     for (int i = 0; i < 256; i++) set_gate(i, amd64_unhandled, 0);
    117     set_gate(0x80, amd64_int80, 3);
    118     struct idtr idtr = { (u16)(sizeof(idt) - 1), (u64)idt };
    119     amd64_lidt(&idtr);
    120 
    121     /* Modern x86_64 fast-syscall path: scheme1 (and any tcc-built user
    122      * binary that follows the standard SysV/Linux amd64 ABI) emits the
    123      * `syscall` instruction, which routes through MSR_LSTAR rather than
    124      * the IDT. Without this block the first `syscall` raises #UD because
    125      * EFER.SCE is clear, manifesting as the unhandled-vector PANIC that
    126      * scheme1 hits ~0xa09 bytes into its prelude.
    127      *
    128      * STAR layout for sysret with REX.W=1: the CPU computes user CS as
    129      * STAR[63:48]+16 and user SS as STAR[63:48]+8 (RPL forced to 3).
    130      * Our user CS sel is 0x23 (gdt[4] | 3) and user SS sel is 0x1b
    131      * (gdt[3] | 3), so STAR[63:48] = 0x10. Kernel side: STAR[47:32] = 8
    132      * yields kernel CS=0x08 and kernel SS=0x10 on syscall entry, which
    133      * matches our gdt[1]/gdt[2]. */
    134     amd64_wrmsr(MSR_EFER, amd64_rdmsr(MSR_EFER) | 1UL);
    135     amd64_wrmsr(MSR_STAR, ((u64)0x10UL << 48) | ((u64)0x08UL << 32));
    136     amd64_wrmsr(MSR_LSTAR, (u64)amd64_syscall_entry);
    137     amd64_wrmsr(MSR_SFMASK, 0x200UL);  /* clear IF on syscall entry */
    138 }
    139 
    140 static u64 pool_pa(int which) {
    141     return which ? ARCH_USER_POOL_B_PA : ARCH_USER_POOL_A_PA;
    142 }
    143 
    144 static void fill_pd(u64 *pd, u64 first_slot, u64 flags) {
    145     for (int i = 0; i < 512; i++) {
    146         u64 pa = (first_slot + (u64)i) * 0x200000UL;
    147         pd[i] = pa | flags;
    148     }
    149 }
    150 
    151 static void fill_user_pd0(int which) {
    152     fill_pd(pd0, 0, KSEG);
    153     u64 base = pool_pa(which);
    154     for (int i = ARCH_USER_POOL_FIRST_SLOT; i <= ARCH_USER_POOL_LAST_SLOT; i++) {
    155         u64 pa = base + (u64)(i - ARCH_USER_POOL_FIRST_SLOT) * 0x200000UL;
    156         pd0[i] = pa | USEG;
    157     }
    158 }
    159 
    160 void arch_setup_mmu(void) {
    161     for (int i = 0; i < 512; i++) {
    162         pml4[i] = 0;
    163         pdpt_low[i] = 0;
    164         pdpt_alias[i] = 0;
    165     }
    166 
    167     fill_user_pd0(0);
    168     fill_pd(pd1, 512, KSEG);
    169     fill_pd(pd2, 1024, KSEG);
    170     fill_pd(pd3, 1536, KSEG);
    171 
    172     fill_pd(pda0, 0, DSEG);
    173     fill_pd(pda1, 512, DSEG);
    174     fill_pd(pda2, 1024, DSEG);
    175     fill_pd(pda3, 1536, DSEG);
    176 
    177     pdpt_low[0] = (u64)pd0 | PTE_P | PTE_W | PTE_U;
    178     pdpt_low[1] = (u64)pd1 | PTE_P | PTE_W;
    179     pdpt_low[2] = (u64)pd2 | PTE_P | PTE_W;
    180     pdpt_low[3] = (u64)pd3 | PTE_P | PTE_W;
    181 
    182     pdpt_alias[0] = (u64)pda0 | PTE_P | PTE_W;
    183     pdpt_alias[1] = (u64)pda1 | PTE_P | PTE_W;
    184     pdpt_alias[2] = (u64)pda2 | PTE_P | PTE_W;
    185     pdpt_alias[3] = (u64)pda3 | PTE_P | PTE_W;
    186 
    187     pml4[0] = (u64)pdpt_low | PTE_P | PTE_W | PTE_U;
    188     pml4[256] = (u64)pdpt_alias | PTE_P | PTE_W;
    189 
    190     setup_cpu_tables();
    191     amd64_load_cr3((u64)pml4);
    192 }
    193 
    194 void arch_swap_user_pool(int which) {
    195     fill_user_pd0(which);
    196     amd64_load_cr3((u64)pml4);
    197 }