arm32.c (8268B)
1 /* 2 * lib/coro/arm32.c -- ARM32 Thumb-2 (AAPCS) implementations of 3 * setjmp / longjmp (<setjmp.h>) 4 * __kit_coro_ctx_init / __kit_coro_switch / trampoline (<kit/coro.h>) 5 * 6 * All three primitives sit on one per-target context layout: 7 * 8 * regs[0..7] r4-r11 9 * regs[8] sp 10 * regs[9] lr 11 * fp_regs[0..7] d8-d15 (AAPCS only mandates the lower 64 bits of 12 * v8-v15 be preserved across calls; saved 13 * only when __ARM_FP is defined, but the 14 * slots are always allocated so the byte 15 * layout is stable across soft/hard-float 16 * builds). 17 * 18 * 10*4 GPR slots + 8*8 fp_regs slots = 104 bytes of payload, padded 19 * to 112 by alignof(16). fp_regs at offset 40. Fits in the 256-byte 20 * storage carved out by jmp_buf and coro_ctx. 21 * 22 * SAVE_/RESTORE_ are C string-concat macros so the same byte sequence 23 * is emitted in setjmp, longjmp, and __kit_coro_switch. The VFP half is 24 * gated by a C-level #ifdef on __ARM_FP -- the cpp pass picks one 25 * macro body before the assembler sees anything, so we can't hide 26 * `#ifdef` inside the asm string. 27 * 28 * Symbol naming uses __USER_LABEL_PREFIX__ so labels match the C 29 * compiler's call-site mangling on both ELF (no prefix) and Mach-O 30 * (leading "_"). 31 */ 32 33 #include <kit/coro.h> 34 #include <setjmp.h> 35 #include <stddef.h> 36 #include <stdint.h> 37 38 struct __kit_arm32_ctx { 39 uintptr_t regs[10]; 40 uint64_t fp_regs[8]; 41 } __attribute__((aligned(16))); 42 43 _Static_assert(sizeof(struct __kit_arm32_ctx) == 112, "layout"); 44 _Static_assert(_Alignof(struct __kit_arm32_ctx) == 16, "align"); 45 _Static_assert(offsetof(struct __kit_arm32_ctx, fp_regs) == 40, "fp off"); 46 _Static_assert(sizeof(struct __kit_arm32_ctx) <= sizeof(coro_ctx), 47 "fits coro_ctx"); 48 _Static_assert(sizeof(struct __kit_arm32_ctx) <= sizeof(jmp_buf), 49 "fits jmp_buf"); 50 _Static_assert(_Alignof(coro_ctx) >= _Alignof(struct __kit_arm32_ctx), 51 "align coro_ctx"); 52 53 extern void __kit_coro_trampoline(void); 54 55 void __kit_coro_ctx_init(coro_ctx* ctx, void* stack_base, size_t stack_len, 56 void (*entry)(uintptr_t)) { 57 struct __kit_arm32_ctx* c = (struct __kit_arm32_ctx*)ctx; 58 59 /* ARM32 stacks grow down; align top to 16 (AAPCS public-boundary 60 requirement is 8, but coro stacks promise CORO_STACK_ALIGN=16). */ 61 uintptr_t top = (uintptr_t)stack_base + stack_len; 62 top &= ~(uintptr_t)(CORO_STACK_ALIGN - 1); 63 64 for (size_t i = 0; i < sizeof(*c) / sizeof(uintptr_t); ++i) 65 ((uintptr_t*)c)[i] = 0; 66 67 c->regs[0] = (uintptr_t)entry; /* r4 -- entry fn */ 68 c->regs[3] = 0; /* r7 -- frame ptr */ 69 c->regs[8] = top; /* sp */ 70 c->regs[9] = (uintptr_t)__kit_coro_trampoline; /* lr */ 71 } 72 73 #define STR_(x) #x 74 #define STR(x) STR_(x) 75 #define SYM(n) STR(__USER_LABEL_PREFIX__) #n 76 77 /* Save/restore macros. The VFP half is conditional on __ARM_FP at the 78 C-cpp level -- by the time the inline assembler sees the string, 79 only one variant remains. The byte offsets match the struct layout 80 regardless (slots are always allocated). */ 81 #ifdef __ARM_FP 82 #define SAVE_INTO(reg) \ 83 " str r4, [" reg \ 84 ", #0]\n" \ 85 " str r5, [" reg \ 86 ", #4]\n" \ 87 " str r6, [" reg \ 88 ", #8]\n" \ 89 " str r7, [" reg \ 90 ", #12]\n" \ 91 " str r8, [" reg \ 92 ", #16]\n" \ 93 " str r9, [" reg \ 94 ", #20]\n" \ 95 " str r10, [" reg \ 96 ", #24]\n" \ 97 " str r11, [" reg \ 98 ", #28]\n" \ 99 " str sp, [" reg \ 100 ", #32]\n" \ 101 " str lr, [" reg \ 102 ", #36]\n" \ 103 " vstr d8, [" reg \ 104 ", #40]\n" \ 105 " vstr d9, [" reg \ 106 ", #48]\n" \ 107 " vstr d10, [" reg \ 108 ", #56]\n" \ 109 " vstr d11, [" reg \ 110 ", #64]\n" \ 111 " vstr d12, [" reg \ 112 ", #72]\n" \ 113 " vstr d13, [" reg \ 114 ", #80]\n" \ 115 " vstr d14, [" reg \ 116 ", #88]\n" \ 117 " vstr d15, [" reg ", #96]\n" 118 119 #define RESTORE_FROM(reg) \ 120 " vldr d8, [" reg \ 121 ", #40]\n" \ 122 " vldr d9, [" reg \ 123 ", #48]\n" \ 124 " vldr d10, [" reg \ 125 ", #56]\n" \ 126 " vldr d11, [" reg \ 127 ", #64]\n" \ 128 " vldr d12, [" reg \ 129 ", #72]\n" \ 130 " vldr d13, [" reg \ 131 ", #80]\n" \ 132 " vldr d14, [" reg \ 133 ", #88]\n" \ 134 " vldr d15, [" reg \ 135 ", #96]\n" \ 136 " ldr r4, [" reg \ 137 ", #0]\n" \ 138 " ldr r5, [" reg \ 139 ", #4]\n" \ 140 " ldr r6, [" reg \ 141 ", #8]\n" \ 142 " ldr r7, [" reg \ 143 ", #12]\n" \ 144 " ldr r8, [" reg \ 145 ", #16]\n" \ 146 " ldr r9, [" reg \ 147 ", #20]\n" \ 148 " ldr r10, [" reg \ 149 ", #24]\n" \ 150 " ldr r11, [" reg \ 151 ", #28]\n" \ 152 " ldr sp, [" reg \ 153 ", #32]\n" \ 154 " ldr lr, [" reg ", #36]\n" 155 #else 156 #define SAVE_INTO(reg) \ 157 " str r4, [" reg \ 158 ", #0]\n" \ 159 " str r5, [" reg \ 160 ", #4]\n" \ 161 " str r6, [" reg \ 162 ", #8]\n" \ 163 " str r7, [" reg \ 164 ", #12]\n" \ 165 " str r8, [" reg \ 166 ", #16]\n" \ 167 " str r9, [" reg \ 168 ", #20]\n" \ 169 " str r10, [" reg \ 170 ", #24]\n" \ 171 " str r11, [" reg \ 172 ", #28]\n" \ 173 " str sp, [" reg \ 174 ", #32]\n" \ 175 " str lr, [" reg ", #36]\n" 176 177 #define RESTORE_FROM(reg) \ 178 " ldr r4, [" reg \ 179 ", #0]\n" \ 180 " ldr r5, [" reg \ 181 ", #4]\n" \ 182 " ldr r6, [" reg \ 183 ", #8]\n" \ 184 " ldr r7, [" reg \ 185 ", #12]\n" \ 186 " ldr r8, [" reg \ 187 ", #16]\n" \ 188 " ldr r9, [" reg \ 189 ", #20]\n" \ 190 " ldr r10, [" reg \ 191 ", #24]\n" \ 192 " ldr r11, [" reg \ 193 ", #28]\n" \ 194 " ldr sp, [" reg \ 195 ", #32]\n" \ 196 " ldr lr, [" reg ", #36]\n" 197 #endif 198 199 __asm__ ( 200 ".syntax unified\n" 201 ".thumb\n" 202 ".text\n" 203 ".align 2\n" 204 205 /* setjmp(env) -- env in r0. lr at call time is the return address 206 into the caller, exactly what longjmp must restore. */ 207 ".weak " SYM(setjmp) "\n" 208 ".thumb_func\n" 209 ".type " SYM(setjmp) ", %function\n" 210 SYM(setjmp) ":\n" 211 SAVE_INTO("r0") 212 " movs r0, #0\n" 213 " bx lr\n" 214 215 /* longjmp(env, val) -- env in r0, val in r1. 216 longjmp(_, 0) must deliver 1 (C11 7.13.2.1p4); the IT block 217 gives r1 = (r1 == 0) ? 1 : r1, then we move it into r0 and 218 branch to the saved lr. */ 219 ".weak " SYM(longjmp) "\n" 220 ".thumb_func\n" 221 ".type " SYM(longjmp) ", %function\n" 222 SYM(longjmp) ":\n" 223 RESTORE_FROM("r0") 224 " cmp r1, #0\n" 225 " it eq\n" 226 " moveq r1, #1\n" 227 " mov r0, r1\n" 228 " bx lr\n" 229 230 /* __kit_coro_switch(from, to, value) -- r0=from, r1=to, r2=value. 231 Save into [r0], restore from [r1], deliver r2 in r0. The lr 232 loaded by RESTORE_FROM is either a real return address (a 233 previously-suspended coro) or __kit_coro_trampoline (a fresh 234 coro initialized by __kit_coro_ctx_init). Either way `bx lr` lands there 235 with r0 holding `value`. */ 236 ".globl " SYM(__kit_coro_switch) "\n" 237 ".thumb_func\n" 238 ".type " SYM(__kit_coro_switch) ", %function\n" 239 SYM(__kit_coro_switch) ":\n" 240 SAVE_INTO("r0") 241 RESTORE_FROM("r1") 242 " mov r0, r2\n" 243 " bx lr\n" 244 245 /* __kit_coro_trampoline -- on first entry r0 = value (delivered 246 by __kit_coro_switch's `mov r0, r2`), r4 = entry fn (set by __kit_coro_ctx_init), 247 sp aligned to 16. udf if entry returns. */ 248 ".globl " SYM(__kit_coro_trampoline) "\n" 249 ".thumb_func\n" 250 ".type " SYM(__kit_coro_trampoline) ", %function\n" 251 SYM(__kit_coro_trampoline) ":\n" 252 " blx r4\n" 253 " udf #0\n" 254 255 ".section .note.GNU-stack,\"\",%progbits\n" 256 );