x86_64_win.c (7986B)
1 /* 2 * lib/coro/x86_64_win.c -- x86_64 Windows (MS x64 ABI) implementations of 3 * setjmp / longjmp (<setjmp.h>) 4 * __kit_coro_ctx_init / __kit_coro_switch / trampoline (<kit/coro.h>) 5 * 6 * MS x64 callee-saved set: rbx, rbp, rdi, rsi, r12-r15, xmm6-xmm15. 7 * (Compare with x86_64.c -- SysV doesn't preserve rdi/rsi or any xmm.) 8 * Windows additionally requires the TEB stack-bound slots gs:0x08 9 * (StackBase) and gs:0x10 (StackLimit) to track the live stack so 10 * exception unwinding etc. behave; these are saved/restored on every 11 * switch. 12 * 13 * regs[0]: rbx regs[8]: rsp 14 * regs[1]: rbp regs[9]: rip 15 * regs[2]: rdi regs[10]: stack_base (TEB gs:0x08) 16 * regs[3]: rsi regs[11]: stack_limit (TEB gs:0x10) 17 * regs[4..7]: r12-r15 18 * fp_regs[0..19]: xmm6-xmm15 (10 regs * 128b = 20 * 64b slots, off 96) 19 * 20 * sizeof = 256, 16-byte aligned. Exactly fills jmp_buf / coro_ctx. 21 * 22 * setjmp(env) %rcx=env 23 * longjmp(env, val) %rcx=env, %edx=val 24 * __kit_coro_switch(f, t, val) %rcx=from, %rdx=to, %r8=value 25 * 26 * The "save rsp/rip" trick mirrors x86_64.c: at function entry, 27 * (%rsp) holds the caller's return address, 8(%rsp) is the caller's 28 * pre-call rsp. 29 */ 30 31 #include <kit/coro.h> 32 #include <setjmp.h> 33 #include <stddef.h> 34 #include <stdint.h> 35 36 struct __kit_x86_64_win_ctx { 37 uintptr_t regs[12]; 38 uint64_t fp_regs[20]; 39 } __attribute__((aligned(16))); 40 41 _Static_assert(sizeof(struct __kit_x86_64_win_ctx) == 256, "layout"); 42 _Static_assert(_Alignof(struct __kit_x86_64_win_ctx) == 16, "align"); 43 _Static_assert(offsetof(struct __kit_x86_64_win_ctx, fp_regs) == 96, "fp off"); 44 _Static_assert(sizeof(struct __kit_x86_64_win_ctx) <= sizeof(coro_ctx), 45 "fits coro_ctx"); 46 _Static_assert(sizeof(struct __kit_x86_64_win_ctx) <= sizeof(jmp_buf), 47 "fits jmp_buf"); 48 _Static_assert(_Alignof(coro_ctx) >= _Alignof(struct __kit_x86_64_win_ctx), 49 "align coro_ctx"); 50 51 extern void __kit_coro_trampoline(void); 52 53 void __kit_coro_ctx_init(coro_ctx* ctx, void* stack_base, size_t stack_len, 54 void (*entry)(uintptr_t)) { 55 struct __kit_x86_64_win_ctx* c = (struct __kit_x86_64_win_ctx*)ctx; 56 57 /* x86_64 stacks grow down; align top to 16. */ 58 uintptr_t top = (uintptr_t)stack_base + stack_len; 59 top &= ~(uintptr_t)(CORO_STACK_ALIGN - 1); 60 61 for (size_t i = 0; i < sizeof(*c) / sizeof(uintptr_t); ++i) 62 ((uintptr_t*)c)[i] = 0; 63 64 c->regs[1] = 0; /* rbp */ 65 c->regs[4] = (uintptr_t)entry; /* r12 -- entry fn */ 66 c->regs[8] = top; /* rsp */ 67 c->regs[9] = (uintptr_t)__kit_coro_trampoline; /* rip */ 68 c->regs[10] = top; /* stack_base (TEB) */ 69 c->regs[11] = (uintptr_t)stack_base; /* stack_limit (TEB) */ 70 } 71 72 #define STR_(x) #x 73 #define STR(x) STR_(x) 74 #define SYM(n) STR(__USER_LABEL_PREFIX__) #n 75 76 /* Save callee-saved + (caller's) rsp + rip + TEB stack bounds + xmm6-15 77 into [reg]; clobbers %rax. Used at function-entry stack discipline: 78 (%rsp)=ret-addr, 8(%rsp)=pre-call rsp. */ 79 #define SAVE_INTO(reg) \ 80 " movq %rbx, 0(" reg \ 81 ")\n" \ 82 " movq %rbp, 8(" reg \ 83 ")\n" \ 84 " movq %rdi, 16(" reg \ 85 ")\n" \ 86 " movq %rsi, 24(" reg \ 87 ")\n" \ 88 " movq %r12, 32(" reg \ 89 ")\n" \ 90 " movq %r13, 40(" reg \ 91 ")\n" \ 92 " movq %r14, 48(" reg \ 93 ")\n" \ 94 " movq %r15, 56(" reg \ 95 ")\n" \ 96 " leaq 8(%rsp), %rax\n" \ 97 " movq %rax, 64(" reg \ 98 ")\n" \ 99 " movq (%rsp), %rax\n" \ 100 " movq %rax, 72(" reg \ 101 ")\n" \ 102 " movq %gs:0x08, %rax\n" \ 103 " movq %rax, 80(" reg \ 104 ")\n" \ 105 " movq %gs:0x10, %rax\n" \ 106 " movq %rax, 88(" reg \ 107 ")\n" \ 108 " movaps %xmm6, 96(" reg \ 109 ")\n" \ 110 " movaps %xmm7, 112(" reg \ 111 ")\n" \ 112 " movaps %xmm8, 128(" reg \ 113 ")\n" \ 114 " movaps %xmm9, 144(" reg \ 115 ")\n" \ 116 " movaps %xmm10, 160(" reg \ 117 ")\n" \ 118 " movaps %xmm11, 176(" reg \ 119 ")\n" \ 120 " movaps %xmm12, 192(" reg \ 121 ")\n" \ 122 " movaps %xmm13, 208(" reg \ 123 ")\n" \ 124 " movaps %xmm14, 224(" reg \ 125 ")\n" \ 126 " movaps %xmm15, 240(" reg ")\n" 127 128 /* Restore callee-saved + xmm + TEB bounds + rsp from [reg]; leaves rip 129 in %r10 ready to jmp. Caller delivers the destination value in %rax 130 beforehand, so %rax must not be touched here. */ 131 #define RESTORE_FROM(reg) \ 132 " movaps 96(" reg \ 133 "), %xmm6\n" \ 134 " movaps 112(" reg \ 135 "), %xmm7\n" \ 136 " movaps 128(" reg \ 137 "), %xmm8\n" \ 138 " movaps 144(" reg \ 139 "), %xmm9\n" \ 140 " movaps 160(" reg \ 141 "), %xmm10\n" \ 142 " movaps 176(" reg \ 143 "), %xmm11\n" \ 144 " movaps 192(" reg \ 145 "), %xmm12\n" \ 146 " movaps 208(" reg \ 147 "), %xmm13\n" \ 148 " movaps 224(" reg \ 149 "), %xmm14\n" \ 150 " movaps 240(" reg \ 151 "), %xmm15\n" \ 152 " movq 0(" reg \ 153 "), %rbx\n" \ 154 " movq 8(" reg \ 155 "), %rbp\n" \ 156 " movq 16(" reg \ 157 "), %rdi\n" \ 158 " movq 24(" reg \ 159 "), %rsi\n" \ 160 " movq 32(" reg \ 161 "), %r12\n" \ 162 " movq 40(" reg \ 163 "), %r13\n" \ 164 " movq 48(" reg \ 165 "), %r14\n" \ 166 " movq 56(" reg \ 167 "), %r15\n" \ 168 " movq 80(" reg \ 169 "), %r10\n" \ 170 " movq %r10, %gs:0x08\n" \ 171 " movq 88(" reg \ 172 "), %r10\n" \ 173 " movq %r10, %gs:0x10\n" \ 174 " movq 64(" reg \ 175 "), %rsp\n" \ 176 " movq 72(" reg "), %r10\n" 177 178 __asm__ ( 179 ".text\n" 180 ".p2align 4\n" 181 182 /* setjmp(env) -- env=%rcx */ 183 ".weak " SYM(setjmp) "\n" 184 SYM(setjmp) ":\n" 185 SAVE_INTO("%rcx") 186 " xorl %eax, %eax\n" 187 " ret\n" 188 189 /* longjmp(env, val) -- env=%rcx, val=%edx. 190 longjmp(_, 0) must deliver 1 (C11 7.13.2.1p4). */ 191 ".weak " SYM(longjmp) "\n" 192 SYM(longjmp) ":\n" 193 " movslq %edx, %rax\n" /* sign-extend int -> long */ 194 " testq %rax, %rax\n" 195 " movl $1, %r11d\n" 196 " cmoveq %r11, %rax\n" 197 RESTORE_FROM("%rcx") 198 " jmpq *%r10\n" 199 200 /* __kit_coro_switch(from, to, value) -- from=%rcx, to=%rdx, value=%r8. */ 201 ".globl " SYM(__kit_coro_switch) "\n" 202 SYM(__kit_coro_switch) ":\n" 203 SAVE_INTO("%rcx") 204 " movq %r8, %rax\n" /* deliver value as return reg */ 205 RESTORE_FROM("%rdx") 206 " jmpq *%r10\n" 207 208 /* __kit_coro_trampoline -- on first entry: %rax=value (delivered 209 by __kit_coro_switch), %r12=entry (set by __kit_coro_ctx_init), %rsp=stack_top 210 (no return addr pushed -- __kit_coro_switch reaches here via jmp). MS 211 x64 wants %rsp 16-byte aligned at call sites with 32 bytes of 212 shadow space reserved by the caller. */ 213 ".globl " SYM(__kit_coro_trampoline) "\n" 214 SYM(__kit_coro_trampoline) ":\n" 215 " andq $-16, %rsp\n" /* defensive align */ 216 " subq $32, %rsp\n" /* MS x64 shadow space */ 217 " movq %rax, %rcx\n" /* value -> first arg */ 218 " callq *%r12\n" /* entry(value) */ 219 " ud2\n" 220 );