boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

atomic.s (3393B)


      1 /* tcc-build aarch64 atomic primitives.
      2  *
      3  * Stock musl's atomic primitives are inline asm with output operands
      4  * (LL/SC pairs and `dmb ish` named-option), unsupported by
      5  * arm64-asm.c phase 1+2. We can't decompose into separate extern
      6  * a_ll/a_sc — function-call boundaries between ldaxr and stlxr clear
      7  * the exclusive monitor on real hardware (and on QEMU/Apple Silicon),
      8  * making the LL/SC retry loop deadloop. So provide a_cas / a_cas_p as
      9  * single asm functions whose LL/SC pair lives inside one call.
     10  *
     11  * Two arm64-asm.c phase-2 quirks shape the layout below:
     12  *   1. Forward `b.cond` / `cbz` / `cbnz` to a same-file label emits
     13  *      `CONDBR19 reloc (unsupported)`.
     14  *   2. Forward unconditional `b` to a same-file label silently
     15  *      assembles as `b +0` (branch-to-self) — no error, but the
     16  *      function turns into an infinite loop.
     17  * Backward branches resolve correctly (offset known at emit time);
     18  * branches to external symbols (e.g. `bl __syscall`) go through
     19  * JUMP26/CALL26 relocations which arm64-link.c handles. So the trick
     20  * is: define each function's "exit" block BEFORE the function entry,
     21  * so every conditional branch out of the loop is backward, and the
     22  * tail unconditional `b` is also backward.
     23  *
     24  * Mnemonics ldaxr, stlxr, dmb, rbit, clz are outside arm64-asm.c
     25  * phase 1+2; emit them as raw .long words. Phase 1+2 covers cmp,
     26  * b.cond (backward only), cbnz (backward), mov, ret, b (backward to
     27  * same-file or any-direction to extern).
     28  *
     29  * Encoding cheat sheet:
     30  *   0x885FFC03  ldaxr w3, [x0]
     31  *   0xC85FFC03  ldaxr x3, [x0]
     32  *   0x8804FC02  stlxr w4, w2, [x0]
     33  *   0xC804FC02  stlxr w4, x2, [x0]
     34  *   0xD5033BBF  dmb ish
     35  *   0xDAC00000  rbit x0, x0
     36  *   0xDAC01000  clz  x0, x0
     37  *
     38  * Note: tcc treats `.word` as 2 bytes; use `.long` for 4. */
     39 
     40 .text
     41 
     42 /* Exit blocks defined first so a_cas / a_cas_p only emit BACKWARD
     43  * branches. Not part of any function — only entered via the backward
     44  * b.ne / b inside their respective functions below. */
     45 .Lcas_done:
     46 	.long 0xD5033BBF      /* dmb ish */
     47 	mov w0, w3            /* return old (32-bit) */
     48 	ret
     49 
     50 .Lcasp_done:
     51 	.long 0xD5033BBF      /* dmb ish */
     52 	mov x0, x3            /* return old (64-bit) */
     53 	ret
     54 
     55 /* int a_cas(volatile int *p, int t, int s) — 32-bit CAS. */
     56 .global a_cas
     57 .type a_cas,@function
     58 a_cas:
     59 	.long 0x885FFC03      /* ldaxr w3, [x0]    : old = *p, mark exclusive */
     60 	cmp w3, w1            /* old == t ? */
     61 	b.ne .Lcas_done       /* backward — OK */
     62 	.long 0x8804FC02      /* stlxr w4, w2, [x0]: try *p = s, w4 = status */
     63 	cbnz w4, a_cas        /* backward — failed, retry from a_cas entry */
     64 	b .Lcas_done          /* backward — succeeded */
     65 
     66 /* void *a_cas_p(volatile void *p, void *t, void *s) — 64-bit CAS. */
     67 .global a_cas_p
     68 .type a_cas_p,@function
     69 a_cas_p:
     70 	.long 0xC85FFC03      /* ldaxr x3, [x0] */
     71 	cmp x3, x1
     72 	b.ne .Lcasp_done
     73 	.long 0xC804FC02      /* stlxr w4, x2, [x0] */
     74 	cbnz w4, a_cas_p
     75 	b .Lcasp_done
     76 
     77 /* void a_barrier(void) — dmb ish */
     78 .global a_barrier
     79 .type a_barrier,@function
     80 a_barrier:
     81 	.long 0xD5033BBF
     82 	ret
     83 
     84 /* int a_ctz_64(uint64_t x) — count trailing zeros: rbit + clz. */
     85 .global a_ctz_64
     86 .type a_ctz_64,@function
     87 a_ctz_64:
     88 	.long 0xDAC00000      /* rbit x0, x0 */
     89 	.long 0xDAC01000      /* clz  x0, x0 */
     90 	ret
     91 
     92 /* int a_clz_64(uint64_t x) */
     93 .global a_clz_64
     94 .type a_clz_64,@function
     95 a_clz_64:
     96 	.long 0xDAC01000
     97 	ret