kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

arm32.c (8268B)


      1 /*
      2  * lib/coro/arm32.c -- ARM32 Thumb-2 (AAPCS) implementations of
      3  *   setjmp / longjmp                       (<setjmp.h>)
      4  *   __kit_coro_ctx_init / __kit_coro_switch / trampoline   (<kit/coro.h>)
      5  *
      6  * All three primitives sit on one per-target context layout:
      7  *
      8  *   regs[0..7]    r4-r11
      9  *   regs[8]       sp
     10  *   regs[9]       lr
     11  *   fp_regs[0..7] d8-d15  (AAPCS only mandates the lower 64 bits of
     12  *                          v8-v15 be preserved across calls; saved
     13  *                          only when __ARM_FP is defined, but the
     14  *                          slots are always allocated so the byte
     15  *                          layout is stable across soft/hard-float
     16  *                          builds).
     17  *
     18  * 10*4 GPR slots + 8*8 fp_regs slots = 104 bytes of payload, padded
     19  * to 112 by alignof(16). fp_regs at offset 40. Fits in the 256-byte
     20  * storage carved out by jmp_buf and coro_ctx.
     21  *
     22  * SAVE_/RESTORE_ are C string-concat macros so the same byte sequence
     23  * is emitted in setjmp, longjmp, and __kit_coro_switch. The VFP half is
     24  * gated by a C-level #ifdef on __ARM_FP -- the cpp pass picks one
     25  * macro body before the assembler sees anything, so we can't hide
     26  * `#ifdef` inside the asm string.
     27  *
     28  * Symbol naming uses __USER_LABEL_PREFIX__ so labels match the C
     29  * compiler's call-site mangling on both ELF (no prefix) and Mach-O
     30  * (leading "_").
     31  */
     32 
     33 #include <kit/coro.h>
     34 #include <setjmp.h>
     35 #include <stddef.h>
     36 #include <stdint.h>
     37 
     38 struct __kit_arm32_ctx {
     39   uintptr_t regs[10];
     40   uint64_t fp_regs[8];
     41 } __attribute__((aligned(16)));
     42 
     43 _Static_assert(sizeof(struct __kit_arm32_ctx) == 112, "layout");
     44 _Static_assert(_Alignof(struct __kit_arm32_ctx) == 16, "align");
     45 _Static_assert(offsetof(struct __kit_arm32_ctx, fp_regs) == 40, "fp off");
     46 _Static_assert(sizeof(struct __kit_arm32_ctx) <= sizeof(coro_ctx),
     47                "fits coro_ctx");
     48 _Static_assert(sizeof(struct __kit_arm32_ctx) <= sizeof(jmp_buf),
     49                "fits jmp_buf");
     50 _Static_assert(_Alignof(coro_ctx) >= _Alignof(struct __kit_arm32_ctx),
     51                "align coro_ctx");
     52 
     53 extern void __kit_coro_trampoline(void);
     54 
     55 void __kit_coro_ctx_init(coro_ctx* ctx, void* stack_base, size_t stack_len,
     56                          void (*entry)(uintptr_t)) {
     57   struct __kit_arm32_ctx* c = (struct __kit_arm32_ctx*)ctx;
     58 
     59   /* ARM32 stacks grow down; align top to 16 (AAPCS public-boundary
     60      requirement is 8, but coro stacks promise CORO_STACK_ALIGN=16). */
     61   uintptr_t top = (uintptr_t)stack_base + stack_len;
     62   top &= ~(uintptr_t)(CORO_STACK_ALIGN - 1);
     63 
     64   for (size_t i = 0; i < sizeof(*c) / sizeof(uintptr_t); ++i)
     65     ((uintptr_t*)c)[i] = 0;
     66 
     67   c->regs[0] = (uintptr_t)entry;                 /* r4 -- entry fn */
     68   c->regs[3] = 0;                                /* r7 -- frame ptr */
     69   c->regs[8] = top;                              /* sp */
     70   c->regs[9] = (uintptr_t)__kit_coro_trampoline; /* lr */
     71 }
     72 
     73 #define STR_(x) #x
     74 #define STR(x) STR_(x)
     75 #define SYM(n) STR(__USER_LABEL_PREFIX__) #n
     76 
     77 /* Save/restore macros. The VFP half is conditional on __ARM_FP at the
     78    C-cpp level -- by the time the inline assembler sees the string,
     79    only one variant remains. The byte offsets match the struct layout
     80    regardless (slots are always allocated). */
     81 #ifdef __ARM_FP
     82 #define SAVE_INTO(reg)  \
     83   "    str  r4,  [" reg \
     84   ", #0]\n"             \
     85   "    str  r5,  [" reg \
     86   ", #4]\n"             \
     87   "    str  r6,  [" reg \
     88   ", #8]\n"             \
     89   "    str  r7,  [" reg \
     90   ", #12]\n"            \
     91   "    str  r8,  [" reg \
     92   ", #16]\n"            \
     93   "    str  r9,  [" reg \
     94   ", #20]\n"            \
     95   "    str  r10, [" reg \
     96   ", #24]\n"            \
     97   "    str  r11, [" reg \
     98   ", #28]\n"            \
     99   "    str  sp,  [" reg \
    100   ", #32]\n"            \
    101   "    str  lr,  [" reg \
    102   ", #36]\n"            \
    103   "    vstr d8,  [" reg \
    104   ", #40]\n"            \
    105   "    vstr d9,  [" reg \
    106   ", #48]\n"            \
    107   "    vstr d10, [" reg \
    108   ", #56]\n"            \
    109   "    vstr d11, [" reg \
    110   ", #64]\n"            \
    111   "    vstr d12, [" reg \
    112   ", #72]\n"            \
    113   "    vstr d13, [" reg \
    114   ", #80]\n"            \
    115   "    vstr d14, [" reg \
    116   ", #88]\n"            \
    117   "    vstr d15, [" reg ", #96]\n"
    118 
    119 #define RESTORE_FROM(reg) \
    120   "    vldr d8,  [" reg   \
    121   ", #40]\n"              \
    122   "    vldr d9,  [" reg   \
    123   ", #48]\n"              \
    124   "    vldr d10, [" reg   \
    125   ", #56]\n"              \
    126   "    vldr d11, [" reg   \
    127   ", #64]\n"              \
    128   "    vldr d12, [" reg   \
    129   ", #72]\n"              \
    130   "    vldr d13, [" reg   \
    131   ", #80]\n"              \
    132   "    vldr d14, [" reg   \
    133   ", #88]\n"              \
    134   "    vldr d15, [" reg   \
    135   ", #96]\n"              \
    136   "    ldr  r4,  [" reg   \
    137   ", #0]\n"               \
    138   "    ldr  r5,  [" reg   \
    139   ", #4]\n"               \
    140   "    ldr  r6,  [" reg   \
    141   ", #8]\n"               \
    142   "    ldr  r7,  [" reg   \
    143   ", #12]\n"              \
    144   "    ldr  r8,  [" reg   \
    145   ", #16]\n"              \
    146   "    ldr  r9,  [" reg   \
    147   ", #20]\n"              \
    148   "    ldr  r10, [" reg   \
    149   ", #24]\n"              \
    150   "    ldr  r11, [" reg   \
    151   ", #28]\n"              \
    152   "    ldr  sp,  [" reg   \
    153   ", #32]\n"              \
    154   "    ldr  lr,  [" reg ", #36]\n"
    155 #else
    156 #define SAVE_INTO(reg)  \
    157   "    str  r4,  [" reg \
    158   ", #0]\n"             \
    159   "    str  r5,  [" reg \
    160   ", #4]\n"             \
    161   "    str  r6,  [" reg \
    162   ", #8]\n"             \
    163   "    str  r7,  [" reg \
    164   ", #12]\n"            \
    165   "    str  r8,  [" reg \
    166   ", #16]\n"            \
    167   "    str  r9,  [" reg \
    168   ", #20]\n"            \
    169   "    str  r10, [" reg \
    170   ", #24]\n"            \
    171   "    str  r11, [" reg \
    172   ", #28]\n"            \
    173   "    str  sp,  [" reg \
    174   ", #32]\n"            \
    175   "    str  lr,  [" reg ", #36]\n"
    176 
    177 #define RESTORE_FROM(reg) \
    178   "    ldr  r4,  [" reg   \
    179   ", #0]\n"               \
    180   "    ldr  r5,  [" reg   \
    181   ", #4]\n"               \
    182   "    ldr  r6,  [" reg   \
    183   ", #8]\n"               \
    184   "    ldr  r7,  [" reg   \
    185   ", #12]\n"              \
    186   "    ldr  r8,  [" reg   \
    187   ", #16]\n"              \
    188   "    ldr  r9,  [" reg   \
    189   ", #20]\n"              \
    190   "    ldr  r10, [" reg   \
    191   ", #24]\n"              \
    192   "    ldr  r11, [" reg   \
    193   ", #28]\n"              \
    194   "    ldr  sp,  [" reg   \
    195   ", #32]\n"              \
    196   "    ldr  lr,  [" reg ", #36]\n"
    197 #endif
    198 
    199 __asm__ (
    200     ".syntax unified\n"
    201     ".thumb\n"
    202     ".text\n"
    203     ".align 2\n"
    204 
    205     /* setjmp(env) -- env in r0. lr at call time is the return address
    206        into the caller, exactly what longjmp must restore. */
    207     ".weak " SYM(setjmp) "\n"
    208     ".thumb_func\n"
    209     ".type " SYM(setjmp) ", %function\n"
    210     SYM(setjmp) ":\n"
    211     SAVE_INTO("r0")
    212     "    movs r0, #0\n"
    213     "    bx   lr\n"
    214 
    215     /* longjmp(env, val) -- env in r0, val in r1.
    216        longjmp(_, 0) must deliver 1 (C11 7.13.2.1p4); the IT block
    217        gives r1 = (r1 == 0) ? 1 : r1, then we move it into r0 and
    218        branch to the saved lr. */
    219     ".weak " SYM(longjmp) "\n"
    220     ".thumb_func\n"
    221     ".type " SYM(longjmp) ", %function\n"
    222     SYM(longjmp) ":\n"
    223     RESTORE_FROM("r0")
    224     "    cmp  r1, #0\n"
    225     "    it   eq\n"
    226     "    moveq r1, #1\n"
    227     "    mov  r0, r1\n"
    228     "    bx   lr\n"
    229 
    230     /* __kit_coro_switch(from, to, value) -- r0=from, r1=to, r2=value.
    231        Save into [r0], restore from [r1], deliver r2 in r0. The lr
    232        loaded by RESTORE_FROM is either a real return address (a
    233        previously-suspended coro) or __kit_coro_trampoline (a fresh
    234        coro initialized by __kit_coro_ctx_init). Either way `bx lr` lands there
    235        with r0 holding `value`. */
    236     ".globl " SYM(__kit_coro_switch) "\n"
    237     ".thumb_func\n"
    238     ".type " SYM(__kit_coro_switch) ", %function\n"
    239     SYM(__kit_coro_switch) ":\n"
    240     SAVE_INTO("r0")
    241     RESTORE_FROM("r1")
    242     "    mov  r0, r2\n"
    243     "    bx   lr\n"
    244 
    245     /* __kit_coro_trampoline -- on first entry r0 = value (delivered
    246        by __kit_coro_switch's `mov r0, r2`), r4 = entry fn (set by __kit_coro_ctx_init),
    247        sp aligned to 16. udf if entry returns. */
    248     ".globl " SYM(__kit_coro_trampoline) "\n"
    249     ".thumb_func\n"
    250     ".type " SYM(__kit_coro_trampoline) ", %function\n"
    251     SYM(__kit_coro_trampoline) ":\n"
    252     "    blx  r4\n"
    253     "    udf  #0\n"
    254 
    255     ".section .note.GNU-stack,\"\",%progbits\n"
    256 );