kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

arm32_thumb1.c (7023B)


      1 /*
      2  * lib/coro/arm32_thumb1.c -- ARMv6-M (Cortex-M0 / M0+, Thumb-1) impls of
      3  *   setjmp / longjmp                       (<setjmp.h>)
      4  *   __kit_coro_ctx_init / __kit_coro_switch / trampoline   (<kit/coro.h>)
      5  *
      6  * Thumb-1 / ARMv6-M is a strict subset of the Thumb-2 ISA used by the
      7  * sibling arm32.c, and several conveniences disappear:
      8  *
      9  *   - no IT blocks: conditional execution must use a forward branch.
     10  *   - data-processing ops are restricted to r0-r7. r8-r15 are reachable
     11  *     only via the `mov` high-register form and a few specials; in
     12  *     particular there is no `str rN, [sp,...]` / `str sp, [rN,...]`.
     13  *   - `mov rd, rm` with *both* operands low is UNPREDICTABLE in
     14  *     ARMv6-M; use the T2 flags-setting form `movs rd, rm` for low->low
     15  *     register copies. The plain `mov` form is reserved for cases where
     16  *     at least one operand is a high register (sp/lr/r8-r11).
     17  *   - no VFP coprocessor on M0/M0+, so no fp_regs slots.
     18  *
     19  * Layout: 10 GPR slots (r4-r11, sp, lr) = 40 bytes, padded to 16-byte
     20  * alignment by alignof(16). Fits in the 256-byte storage carved out by
     21  * jmp_buf and coro_ctx.
     22  *
     23  * SAVE_INTO uses r4-r7 as scratches *after* they have themselves been
     24  * stored, so r0-r3 are never clobbered. That matters for __kit_coro_switch:
     25  * `to` (r1) and `value` (r2) survive across the save half and are still
     26  * live for the restore half / value delivery.
     27  */
     28 
     29 #include <kit/coro.h>
     30 #include <setjmp.h>
     31 #include <stddef.h>
     32 #include <stdint.h>
     33 
     34 struct __kit_arm32_thumb1_ctx {
     35   uintptr_t regs[10];
     36 } __attribute__((aligned(16)));
     37 
     38 _Static_assert(sizeof(struct __kit_arm32_thumb1_ctx) == 48, "layout");
     39 _Static_assert(_Alignof(struct __kit_arm32_thumb1_ctx) == 16, "align");
     40 _Static_assert(sizeof(struct __kit_arm32_thumb1_ctx) <= sizeof(coro_ctx),
     41                "fits coro_ctx");
     42 _Static_assert(sizeof(struct __kit_arm32_thumb1_ctx) <= sizeof(jmp_buf),
     43                "fits jmp_buf");
     44 _Static_assert(_Alignof(coro_ctx) >= _Alignof(struct __kit_arm32_thumb1_ctx),
     45                "align coro_ctx");
     46 
     47 extern void __kit_coro_trampoline(void);
     48 
     49 void __kit_coro_ctx_init(coro_ctx* ctx, void* stack_base, size_t stack_len,
     50                          void (*entry)(uintptr_t)) {
     51   struct __kit_arm32_thumb1_ctx* c = (struct __kit_arm32_thumb1_ctx*)ctx;
     52 
     53   /* ARM stacks grow down; align top to 16 (AAPCS public-boundary
     54      requirement is 8, but coro stacks promise CORO_STACK_ALIGN=16). */
     55   uintptr_t top = (uintptr_t)stack_base + stack_len;
     56   top &= ~(uintptr_t)(CORO_STACK_ALIGN - 1);
     57 
     58   for (size_t i = 0; i < sizeof(*c) / sizeof(uintptr_t); ++i)
     59     ((uintptr_t*)c)[i] = 0;
     60 
     61   c->regs[0] = (uintptr_t)entry;                 /* r4 -- entry fn */
     62   c->regs[3] = 0;                                /* r7 -- frame ptr */
     63   c->regs[8] = top;                              /* sp */
     64   c->regs[9] = (uintptr_t)__kit_coro_trampoline; /* lr */
     65 }
     66 
     67 #define STR_(x) #x
     68 #define STR(x) STR_(x)
     69 #define SYM(n) STR(__USER_LABEL_PREFIX__) #n
     70 
     71 /* Save callee-saved state into [reg].
     72    Stage 1: store r4-r7 directly (low->low str is fine).
     73    Stage 2: with r4-r7 already saved, reuse them as scratches to copy
     74             the high regs r8-r11 down and store them.
     75    Stage 3: same trick for sp and lr.
     76    r0-r3 are never touched. */
     77 #define SAVE_INTO(reg)  \
     78   "    str  r4,  [" reg \
     79   ", #0]\n"             \
     80   "    str  r5,  [" reg \
     81   ", #4]\n"             \
     82   "    str  r6,  [" reg \
     83   ", #8]\n"             \
     84   "    str  r7,  [" reg \
     85   ", #12]\n"            \
     86   "    mov  r4,  r8\n"  \
     87   "    mov  r5,  r9\n"  \
     88   "    mov  r6,  r10\n" \
     89   "    mov  r7,  r11\n" \
     90   "    str  r4,  [" reg \
     91   ", #16]\n"            \
     92   "    str  r5,  [" reg \
     93   ", #20]\n"            \
     94   "    str  r6,  [" reg \
     95   ", #24]\n"            \
     96   "    str  r7,  [" reg \
     97   ", #28]\n"            \
     98   "    mov  r4,  sp\n"  \
     99   "    mov  r5,  lr\n"  \
    100   "    str  r4,  [" reg \
    101   ", #32]\n"            \
    102   "    str  r5,  [" reg ", #36]\n"
    103 
    104 /* Restore callee-saved state from [reg]. Mirror image: load r8-r11/sp/lr
    105    first via r4-r7 as scratches, then restore the real r4-r7 last. */
    106 #define RESTORE_FROM(reg) \
    107   "    ldr  r4,  [" reg   \
    108   ", #16]\n"              \
    109   "    ldr  r5,  [" reg   \
    110   ", #20]\n"              \
    111   "    ldr  r6,  [" reg   \
    112   ", #24]\n"              \
    113   "    ldr  r7,  [" reg   \
    114   ", #28]\n"              \
    115   "    mov  r8,  r4\n"    \
    116   "    mov  r9,  r5\n"    \
    117   "    mov  r10, r6\n"    \
    118   "    mov  r11, r7\n"    \
    119   "    ldr  r4,  [" reg   \
    120   ", #32]\n"              \
    121   "    ldr  r5,  [" reg   \
    122   ", #36]\n"              \
    123   "    mov  sp,  r4\n"    \
    124   "    mov  lr,  r5\n"    \
    125   "    ldr  r4,  [" reg   \
    126   ", #0]\n"               \
    127   "    ldr  r5,  [" reg   \
    128   ", #4]\n"               \
    129   "    ldr  r6,  [" reg   \
    130   ", #8]\n"               \
    131   "    ldr  r7,  [" reg ", #12]\n"
    132 
    133 __asm__ (
    134     ".syntax unified\n"
    135     ".thumb\n"
    136     ".text\n"
    137     ".align 2\n"
    138 
    139     /* setjmp(env) -- env in r0. lr at call time is the return address
    140        into the caller, exactly what longjmp must restore. */
    141     ".weak " SYM(setjmp) "\n"
    142     ".thumb_func\n"
    143     ".type " SYM(setjmp) ", %function\n"
    144     SYM(setjmp) ":\n"
    145     SAVE_INTO("r0")
    146     "    movs r0, #0\n"
    147     "    bx   lr\n"
    148 
    149     /* longjmp(env, val) -- env in r0, val in r1.
    150        longjmp(_, 0) must deliver 1 (C11 7.13.2.1p4). No IT blocks in
    151        Thumb-1, so use a forward branch for the substitution.
    152        Both `r0 <- r1` and the immediate ops use the T2 (`movs`) form
    153        since plain `mov rd, rm` with both low operands is UNPREDICTABLE
    154        on ARMv6-M. */
    155     ".weak " SYM(longjmp) "\n"
    156     ".thumb_func\n"
    157     ".type " SYM(longjmp) ", %function\n"
    158     SYM(longjmp) ":\n"
    159     RESTORE_FROM("r0")
    160     "    cmp  r1, #0\n"
    161     "    bne  1f\n"
    162     "    movs r1, #1\n"
    163     "1:\n"
    164     "    movs r0, r1\n"
    165     "    bx   lr\n"
    166 
    167     /* __kit_coro_switch(from, to, value) -- r0=from, r1=to, r2=value.
    168        SAVE_INTO leaves r0-r3 untouched, so r1 (to) and r2 (value) are
    169        still live. RESTORE_FROM clobbers r4-r7 freely (they belong to
    170        the resumed coro). The lr loaded by RESTORE_FROM is either a
    171        real return address (a previously-suspended coro) or
    172        __kit_coro_trampoline (a fresh coro initialized by __kit_coro_ctx_init);
    173        either way `bx lr` lands there with r0 holding `value`. */
    174     ".globl " SYM(__kit_coro_switch) "\n"
    175     ".thumb_func\n"
    176     ".type " SYM(__kit_coro_switch) ", %function\n"
    177     SYM(__kit_coro_switch) ":\n"
    178     SAVE_INTO("r0")
    179     RESTORE_FROM("r1")
    180     "    movs r0, r2\n"
    181     "    bx   lr\n"
    182 
    183     /* __kit_coro_trampoline -- on first entry r0 = value (delivered
    184        by __kit_coro_switch's `movs r0, r2`), r4 = entry fn (set by __kit_coro_ctx_init),
    185        sp aligned to 16. UDF #0 (T1, ARMv6-M) traps if entry returns. */
    186     ".globl " SYM(__kit_coro_trampoline) "\n"
    187     ".thumb_func\n"
    188     ".type " SYM(__kit_coro_trampoline) ", %function\n"
    189     SYM(__kit_coro_trampoline) ":\n"
    190     "    blx  r4\n"
    191     "    udf  #0\n"
    192 
    193     ".section .note.GNU-stack,\"\",%progbits\n"
    194 );