arm32_thumb1.c (7023B)
1 /* 2 * lib/coro/arm32_thumb1.c -- ARMv6-M (Cortex-M0 / M0+, Thumb-1) impls of 3 * setjmp / longjmp (<setjmp.h>) 4 * __kit_coro_ctx_init / __kit_coro_switch / trampoline (<kit/coro.h>) 5 * 6 * Thumb-1 / ARMv6-M is a strict subset of the Thumb-2 ISA used by the 7 * sibling arm32.c, and several conveniences disappear: 8 * 9 * - no IT blocks: conditional execution must use a forward branch. 10 * - data-processing ops are restricted to r0-r7. r8-r15 are reachable 11 * only via the `mov` high-register form and a few specials; in 12 * particular there is no `str rN, [sp,...]` / `str sp, [rN,...]`. 13 * - `mov rd, rm` with *both* operands low is UNPREDICTABLE in 14 * ARMv6-M; use the T2 flags-setting form `movs rd, rm` for low->low 15 * register copies. The plain `mov` form is reserved for cases where 16 * at least one operand is a high register (sp/lr/r8-r11). 17 * - no VFP coprocessor on M0/M0+, so no fp_regs slots. 18 * 19 * Layout: 10 GPR slots (r4-r11, sp, lr) = 40 bytes, padded to 16-byte 20 * alignment by alignof(16). Fits in the 256-byte storage carved out by 21 * jmp_buf and coro_ctx. 22 * 23 * SAVE_INTO uses r4-r7 as scratches *after* they have themselves been 24 * stored, so r0-r3 are never clobbered. That matters for __kit_coro_switch: 25 * `to` (r1) and `value` (r2) survive across the save half and are still 26 * live for the restore half / value delivery. 27 */ 28 29 #include <kit/coro.h> 30 #include <setjmp.h> 31 #include <stddef.h> 32 #include <stdint.h> 33 34 struct __kit_arm32_thumb1_ctx { 35 uintptr_t regs[10]; 36 } __attribute__((aligned(16))); 37 38 _Static_assert(sizeof(struct __kit_arm32_thumb1_ctx) == 48, "layout"); 39 _Static_assert(_Alignof(struct __kit_arm32_thumb1_ctx) == 16, "align"); 40 _Static_assert(sizeof(struct __kit_arm32_thumb1_ctx) <= sizeof(coro_ctx), 41 "fits coro_ctx"); 42 _Static_assert(sizeof(struct __kit_arm32_thumb1_ctx) <= sizeof(jmp_buf), 43 "fits jmp_buf"); 44 _Static_assert(_Alignof(coro_ctx) >= _Alignof(struct __kit_arm32_thumb1_ctx), 45 "align coro_ctx"); 46 47 extern void __kit_coro_trampoline(void); 48 49 void __kit_coro_ctx_init(coro_ctx* ctx, void* stack_base, size_t stack_len, 50 void (*entry)(uintptr_t)) { 51 struct __kit_arm32_thumb1_ctx* c = (struct __kit_arm32_thumb1_ctx*)ctx; 52 53 /* ARM stacks grow down; align top to 16 (AAPCS public-boundary 54 requirement is 8, but coro stacks promise CORO_STACK_ALIGN=16). */ 55 uintptr_t top = (uintptr_t)stack_base + stack_len; 56 top &= ~(uintptr_t)(CORO_STACK_ALIGN - 1); 57 58 for (size_t i = 0; i < sizeof(*c) / sizeof(uintptr_t); ++i) 59 ((uintptr_t*)c)[i] = 0; 60 61 c->regs[0] = (uintptr_t)entry; /* r4 -- entry fn */ 62 c->regs[3] = 0; /* r7 -- frame ptr */ 63 c->regs[8] = top; /* sp */ 64 c->regs[9] = (uintptr_t)__kit_coro_trampoline; /* lr */ 65 } 66 67 #define STR_(x) #x 68 #define STR(x) STR_(x) 69 #define SYM(n) STR(__USER_LABEL_PREFIX__) #n 70 71 /* Save callee-saved state into [reg]. 72 Stage 1: store r4-r7 directly (low->low str is fine). 73 Stage 2: with r4-r7 already saved, reuse them as scratches to copy 74 the high regs r8-r11 down and store them. 75 Stage 3: same trick for sp and lr. 76 r0-r3 are never touched. */ 77 #define SAVE_INTO(reg) \ 78 " str r4, [" reg \ 79 ", #0]\n" \ 80 " str r5, [" reg \ 81 ", #4]\n" \ 82 " str r6, [" reg \ 83 ", #8]\n" \ 84 " str r7, [" reg \ 85 ", #12]\n" \ 86 " mov r4, r8\n" \ 87 " mov r5, r9\n" \ 88 " mov r6, r10\n" \ 89 " mov r7, r11\n" \ 90 " str r4, [" reg \ 91 ", #16]\n" \ 92 " str r5, [" reg \ 93 ", #20]\n" \ 94 " str r6, [" reg \ 95 ", #24]\n" \ 96 " str r7, [" reg \ 97 ", #28]\n" \ 98 " mov r4, sp\n" \ 99 " mov r5, lr\n" \ 100 " str r4, [" reg \ 101 ", #32]\n" \ 102 " str r5, [" reg ", #36]\n" 103 104 /* Restore callee-saved state from [reg]. Mirror image: load r8-r11/sp/lr 105 first via r4-r7 as scratches, then restore the real r4-r7 last. */ 106 #define RESTORE_FROM(reg) \ 107 " ldr r4, [" reg \ 108 ", #16]\n" \ 109 " ldr r5, [" reg \ 110 ", #20]\n" \ 111 " ldr r6, [" reg \ 112 ", #24]\n" \ 113 " ldr r7, [" reg \ 114 ", #28]\n" \ 115 " mov r8, r4\n" \ 116 " mov r9, r5\n" \ 117 " mov r10, r6\n" \ 118 " mov r11, r7\n" \ 119 " ldr r4, [" reg \ 120 ", #32]\n" \ 121 " ldr r5, [" reg \ 122 ", #36]\n" \ 123 " mov sp, r4\n" \ 124 " mov lr, r5\n" \ 125 " ldr r4, [" reg \ 126 ", #0]\n" \ 127 " ldr r5, [" reg \ 128 ", #4]\n" \ 129 " ldr r6, [" reg \ 130 ", #8]\n" \ 131 " ldr r7, [" reg ", #12]\n" 132 133 __asm__ ( 134 ".syntax unified\n" 135 ".thumb\n" 136 ".text\n" 137 ".align 2\n" 138 139 /* setjmp(env) -- env in r0. lr at call time is the return address 140 into the caller, exactly what longjmp must restore. */ 141 ".weak " SYM(setjmp) "\n" 142 ".thumb_func\n" 143 ".type " SYM(setjmp) ", %function\n" 144 SYM(setjmp) ":\n" 145 SAVE_INTO("r0") 146 " movs r0, #0\n" 147 " bx lr\n" 148 149 /* longjmp(env, val) -- env in r0, val in r1. 150 longjmp(_, 0) must deliver 1 (C11 7.13.2.1p4). No IT blocks in 151 Thumb-1, so use a forward branch for the substitution. 152 Both `r0 <- r1` and the immediate ops use the T2 (`movs`) form 153 since plain `mov rd, rm` with both low operands is UNPREDICTABLE 154 on ARMv6-M. */ 155 ".weak " SYM(longjmp) "\n" 156 ".thumb_func\n" 157 ".type " SYM(longjmp) ", %function\n" 158 SYM(longjmp) ":\n" 159 RESTORE_FROM("r0") 160 " cmp r1, #0\n" 161 " bne 1f\n" 162 " movs r1, #1\n" 163 "1:\n" 164 " movs r0, r1\n" 165 " bx lr\n" 166 167 /* __kit_coro_switch(from, to, value) -- r0=from, r1=to, r2=value. 168 SAVE_INTO leaves r0-r3 untouched, so r1 (to) and r2 (value) are 169 still live. RESTORE_FROM clobbers r4-r7 freely (they belong to 170 the resumed coro). The lr loaded by RESTORE_FROM is either a 171 real return address (a previously-suspended coro) or 172 __kit_coro_trampoline (a fresh coro initialized by __kit_coro_ctx_init); 173 either way `bx lr` lands there with r0 holding `value`. */ 174 ".globl " SYM(__kit_coro_switch) "\n" 175 ".thumb_func\n" 176 ".type " SYM(__kit_coro_switch) ", %function\n" 177 SYM(__kit_coro_switch) ":\n" 178 SAVE_INTO("r0") 179 RESTORE_FROM("r1") 180 " movs r0, r2\n" 181 " bx lr\n" 182 183 /* __kit_coro_trampoline -- on first entry r0 = value (delivered 184 by __kit_coro_switch's `movs r0, r2`), r4 = entry fn (set by __kit_coro_ctx_init), 185 sp aligned to 16. UDF #0 (T1, ARMv6-M) traps if entry returns. */ 186 ".globl " SYM(__kit_coro_trampoline) "\n" 187 ".thumb_func\n" 188 ".type " SYM(__kit_coro_trampoline) ", %function\n" 189 SYM(__kit_coro_trampoline) ":\n" 190 " blx r4\n" 191 " udf #0\n" 192 193 ".section .note.GNU-stack,\"\",%progbits\n" 194 );