kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

x86_64_win.c (7986B)


      1 /*
      2  * lib/coro/x86_64_win.c -- x86_64 Windows (MS x64 ABI) implementations of
      3  *   setjmp / longjmp                       (<setjmp.h>)
      4  *   __kit_coro_ctx_init / __kit_coro_switch / trampoline   (<kit/coro.h>)
      5  *
      6  * MS x64 callee-saved set: rbx, rbp, rdi, rsi, r12-r15, xmm6-xmm15.
      7  * (Compare with x86_64.c -- SysV doesn't preserve rdi/rsi or any xmm.)
      8  * Windows additionally requires the TEB stack-bound slots gs:0x08
      9  * (StackBase) and gs:0x10 (StackLimit) to track the live stack so
     10  * exception unwinding etc. behave; these are saved/restored on every
     11  * switch.
     12  *
     13  *   regs[0]:  rbx     regs[8]:  rsp
     14  *   regs[1]:  rbp     regs[9]:  rip
     15  *   regs[2]:  rdi     regs[10]: stack_base  (TEB gs:0x08)
     16  *   regs[3]:  rsi     regs[11]: stack_limit (TEB gs:0x10)
     17  *   regs[4..7]: r12-r15
     18  *   fp_regs[0..19]: xmm6-xmm15  (10 regs * 128b = 20 * 64b slots, off 96)
     19  *
     20  * sizeof = 256, 16-byte aligned. Exactly fills jmp_buf / coro_ctx.
     21  *
     22  *   setjmp(env)             %rcx=env
     23  *   longjmp(env, val)       %rcx=env, %edx=val
     24  *   __kit_coro_switch(f, t, val)  %rcx=from, %rdx=to, %r8=value
     25  *
     26  * The "save rsp/rip" trick mirrors x86_64.c: at function entry,
     27  * (%rsp) holds the caller's return address, 8(%rsp) is the caller's
     28  * pre-call rsp.
     29  */
     30 
     31 #include <kit/coro.h>
     32 #include <setjmp.h>
     33 #include <stddef.h>
     34 #include <stdint.h>
     35 
     36 struct __kit_x86_64_win_ctx {
     37   uintptr_t regs[12];
     38   uint64_t fp_regs[20];
     39 } __attribute__((aligned(16)));
     40 
     41 _Static_assert(sizeof(struct __kit_x86_64_win_ctx) == 256, "layout");
     42 _Static_assert(_Alignof(struct __kit_x86_64_win_ctx) == 16, "align");
     43 _Static_assert(offsetof(struct __kit_x86_64_win_ctx, fp_regs) == 96, "fp off");
     44 _Static_assert(sizeof(struct __kit_x86_64_win_ctx) <= sizeof(coro_ctx),
     45                "fits coro_ctx");
     46 _Static_assert(sizeof(struct __kit_x86_64_win_ctx) <= sizeof(jmp_buf),
     47                "fits jmp_buf");
     48 _Static_assert(_Alignof(coro_ctx) >= _Alignof(struct __kit_x86_64_win_ctx),
     49                "align coro_ctx");
     50 
     51 extern void __kit_coro_trampoline(void);
     52 
     53 void __kit_coro_ctx_init(coro_ctx* ctx, void* stack_base, size_t stack_len,
     54                          void (*entry)(uintptr_t)) {
     55   struct __kit_x86_64_win_ctx* c = (struct __kit_x86_64_win_ctx*)ctx;
     56 
     57   /* x86_64 stacks grow down; align top to 16. */
     58   uintptr_t top = (uintptr_t)stack_base + stack_len;
     59   top &= ~(uintptr_t)(CORO_STACK_ALIGN - 1);
     60 
     61   for (size_t i = 0; i < sizeof(*c) / sizeof(uintptr_t); ++i)
     62     ((uintptr_t*)c)[i] = 0;
     63 
     64   c->regs[1] = 0;                                /* rbp */
     65   c->regs[4] = (uintptr_t)entry;                 /* r12 -- entry fn */
     66   c->regs[8] = top;                              /* rsp */
     67   c->regs[9] = (uintptr_t)__kit_coro_trampoline; /* rip */
     68   c->regs[10] = top;                             /* stack_base (TEB) */
     69   c->regs[11] = (uintptr_t)stack_base;           /* stack_limit (TEB) */
     70 }
     71 
     72 #define STR_(x) #x
     73 #define STR(x) STR_(x)
     74 #define SYM(n) STR(__USER_LABEL_PREFIX__) #n
     75 
     76 /* Save callee-saved + (caller's) rsp + rip + TEB stack bounds + xmm6-15
     77    into [reg]; clobbers %rax. Used at function-entry stack discipline:
     78    (%rsp)=ret-addr, 8(%rsp)=pre-call rsp. */
     79 #define SAVE_INTO(reg)          \
     80   "    movq %rbx,    0(" reg    \
     81   ")\n"                         \
     82   "    movq %rbp,    8(" reg    \
     83   ")\n"                         \
     84   "    movq %rdi,   16(" reg    \
     85   ")\n"                         \
     86   "    movq %rsi,   24(" reg    \
     87   ")\n"                         \
     88   "    movq %r12,   32(" reg    \
     89   ")\n"                         \
     90   "    movq %r13,   40(" reg    \
     91   ")\n"                         \
     92   "    movq %r14,   48(" reg    \
     93   ")\n"                         \
     94   "    movq %r15,   56(" reg    \
     95   ")\n"                         \
     96   "    leaq 8(%rsp), %rax\n"    \
     97   "    movq %rax,   64(" reg    \
     98   ")\n"                         \
     99   "    movq (%rsp), %rax\n"     \
    100   "    movq %rax,   72(" reg    \
    101   ")\n"                         \
    102   "    movq %gs:0x08, %rax\n"   \
    103   "    movq %rax,   80(" reg    \
    104   ")\n"                         \
    105   "    movq %gs:0x10, %rax\n"   \
    106   "    movq %rax,   88(" reg    \
    107   ")\n"                         \
    108   "    movaps %xmm6,   96(" reg \
    109   ")\n"                         \
    110   "    movaps %xmm7,  112(" reg \
    111   ")\n"                         \
    112   "    movaps %xmm8,  128(" reg \
    113   ")\n"                         \
    114   "    movaps %xmm9,  144(" reg \
    115   ")\n"                         \
    116   "    movaps %xmm10, 160(" reg \
    117   ")\n"                         \
    118   "    movaps %xmm11, 176(" reg \
    119   ")\n"                         \
    120   "    movaps %xmm12, 192(" reg \
    121   ")\n"                         \
    122   "    movaps %xmm13, 208(" reg \
    123   ")\n"                         \
    124   "    movaps %xmm14, 224(" reg \
    125   ")\n"                         \
    126   "    movaps %xmm15, 240(" reg ")\n"
    127 
    128 /* Restore callee-saved + xmm + TEB bounds + rsp from [reg]; leaves rip
    129    in %r10 ready to jmp. Caller delivers the destination value in %rax
    130    beforehand, so %rax must not be touched here. */
    131 #define RESTORE_FROM(reg)     \
    132   "    movaps  96(" reg       \
    133   "), %xmm6\n"                \
    134   "    movaps 112(" reg       \
    135   "), %xmm7\n"                \
    136   "    movaps 128(" reg       \
    137   "), %xmm8\n"                \
    138   "    movaps 144(" reg       \
    139   "), %xmm9\n"                \
    140   "    movaps 160(" reg       \
    141   "), %xmm10\n"               \
    142   "    movaps 176(" reg       \
    143   "), %xmm11\n"               \
    144   "    movaps 192(" reg       \
    145   "), %xmm12\n"               \
    146   "    movaps 208(" reg       \
    147   "), %xmm13\n"               \
    148   "    movaps 224(" reg       \
    149   "), %xmm14\n"               \
    150   "    movaps 240(" reg       \
    151   "), %xmm15\n"               \
    152   "    movq  0(" reg          \
    153   "), %rbx\n"                 \
    154   "    movq  8(" reg          \
    155   "), %rbp\n"                 \
    156   "    movq 16(" reg          \
    157   "), %rdi\n"                 \
    158   "    movq 24(" reg          \
    159   "), %rsi\n"                 \
    160   "    movq 32(" reg          \
    161   "), %r12\n"                 \
    162   "    movq 40(" reg          \
    163   "), %r13\n"                 \
    164   "    movq 48(" reg          \
    165   "), %r14\n"                 \
    166   "    movq 56(" reg          \
    167   "), %r15\n"                 \
    168   "    movq 80(" reg          \
    169   "), %r10\n"                 \
    170   "    movq %r10, %gs:0x08\n" \
    171   "    movq 88(" reg          \
    172   "), %r10\n"                 \
    173   "    movq %r10, %gs:0x10\n" \
    174   "    movq 64(" reg          \
    175   "), %rsp\n"                 \
    176   "    movq 72(" reg "), %r10\n"
    177 
    178 __asm__ (
    179     ".text\n"
    180     ".p2align 4\n"
    181 
    182     /* setjmp(env) -- env=%rcx */
    183     ".weak " SYM(setjmp) "\n"
    184     SYM(setjmp) ":\n"
    185     SAVE_INTO("%rcx")
    186     "    xorl %eax, %eax\n"
    187     "    ret\n"
    188 
    189     /* longjmp(env, val) -- env=%rcx, val=%edx.
    190        longjmp(_, 0) must deliver 1 (C11 7.13.2.1p4). */
    191     ".weak " SYM(longjmp) "\n"
    192     SYM(longjmp) ":\n"
    193     "    movslq %edx, %rax\n"        /* sign-extend int -> long */
    194     "    testq %rax, %rax\n"
    195     "    movl  $1, %r11d\n"
    196     "    cmoveq %r11, %rax\n"
    197     RESTORE_FROM("%rcx")
    198     "    jmpq *%r10\n"
    199 
    200     /* __kit_coro_switch(from, to, value) -- from=%rcx, to=%rdx, value=%r8. */
    201     ".globl " SYM(__kit_coro_switch) "\n"
    202     SYM(__kit_coro_switch) ":\n"
    203     SAVE_INTO("%rcx")
    204     "    movq %r8, %rax\n"           /* deliver value as return reg */
    205     RESTORE_FROM("%rdx")
    206     "    jmpq *%r10\n"
    207 
    208     /* __kit_coro_trampoline -- on first entry: %rax=value (delivered
    209        by __kit_coro_switch), %r12=entry (set by __kit_coro_ctx_init), %rsp=stack_top
    210        (no return addr pushed -- __kit_coro_switch reaches here via jmp). MS
    211        x64 wants %rsp 16-byte aligned at call sites with 32 bytes of
    212        shadow space reserved by the caller. */
    213     ".globl " SYM(__kit_coro_trampoline) "\n"
    214     SYM(__kit_coro_trampoline) ":\n"
    215     "    andq $-16, %rsp\n"          /* defensive align */
    216     "    subq $32, %rsp\n"           /* MS x64 shadow space */
    217     "    movq %rax, %rcx\n"          /* value -> first arg */
    218     "    callq *%r12\n"              /* entry(value) */
    219     "    ud2\n"
    220 );