kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

jit_tls_relax_test.c (8844B)


      1 /* In-process JIT TLS Local-Exec relaxation for the Windows (COFF) TEB idiom.
      2  *
      3  * The single-threaded JIT rewrites the PE TLS-access idiom (read TEB
      4  * ThreadLocalStoragePointer, index by `_tls_index`, then `+ SECREL(var)`) to
      5  * address the in-image .tls instance directly, dropping the TEB/`_tls_index`
      6  * indirection (see src/link/link_jit.c and the per-arch jit_tls_le_relax). The
      7  * aarch64 path is verified end-to-end on the Windows VM; this pins the
      8  * byte-level rewrite for BOTH arches, and is the only automated guard for the
      9  * x86-64 idiom (x64-windows self-host can't run end-to-end yet).
     10  *
     11  * Each case hand-encodes the exact idiom the codegen emits
     12  * (x64_tls_addr_of_win64 / aa_tls_addr_of_win), drives the relax, and asserts
     13  * the result is the in-image address materialization. The x64 builder is pinned
     14  * to a golden captured from real `kit cc -c` disassembly so it can't drift away
     15  * from the codegen it must match.
     16  *
     17  * Exit 0 = pass; non-zero = fail. */
     18 
     19 #include <stdint.h>
     20 #include <string.h>
     21 
     22 #include <kit/core.h>
     23 
     24 #include "core/core.h"
     25 #include "lib/kit_unit.h"
     26 #include "obj/obj.h"
     27 
     28 /* Reached via LinkArchDesc.jit_tls_le_relax; not exposed in a header. */
     29 void x64_jit_tls_le_relax(Compiler* c, RelocKind k, u8* site, u64 storage,
     30                           u64 site_pc);
     31 void aa64_jit_tls_le_relax(Compiler* c, RelocKind k, u8* site, u64 storage,
     32                            u64 site_pc);
     33 
     34 static KitUnit g_u;
     35 #define EXPECT(cond, ...) CU_EXPECT(&g_u, cond, __VA_ARGS__)
     36 
     37 static KitCompiler* compiler_for(KitArchKind arch) {
     38   static KitCompiler* aa64 = NULL;
     39   static KitCompiler* x64 = NULL;
     40   KitCompiler** slot = arch == KIT_ARCH_ARM_64 ? &aa64 : &x64;
     41   if (!*slot) {
     42     /* Windows/COFF so the compiler matches the idiom's origin; only the panic
     43      * path reads it, so the exact spec barely matters. */
     44     KitTargetSpec t = kit_unit_target(arch, KIT_OS_WINDOWS, KIT_OBJ_COFF);
     45     if (kit_unit_compiler_new(&g_u, t, slot) != KIT_OK || !*slot) {
     46       fprintf(stderr, "compiler_new failed for arch=%d\n", (int)arch);
     47       exit(2);
     48     }
     49   }
     50   return *slot;
     51 }
     52 
     53 static u32 rd32(const u8* p) {
     54   return (u32)p[0] | ((u32)p[1] << 8) | ((u32)p[2] << 16) | ((u32)p[3] << 24);
     55 }
     56 
     57 /* ---- x86-64 -------------------------------------------------------------- */
     58 
     59 /* Emit the 4-instruction Win64 TLS idiom for destination register `rd` into
     60  * `b`, mirroring x64_tls_addr_of_win64 byte-for-byte. Returns total length;
     61  * *site_off receives the offset of the trailing lea's SECREL disp32. */
     62 static u32 x64_emit_idiom(u8* b, u32 rd, u32* site_off) {
     63   u32 n = 0;
     64   /* (1) mov rd, gs:[0x58]  (9 bytes) */
     65   b[n++] = 0x65;
     66   b[n++] = (u8)(0x48u | ((rd & 8u) ? 0x04u : 0u));
     67   b[n++] = 0x8B;
     68   b[n++] = (u8)(((rd & 7u) << 3) | 4u);
     69   b[n++] = 0x25;
     70   b[n++] = 0x58;
     71   b[n++] = 0;
     72   b[n++] = 0;
     73   b[n++] = 0;
     74   /* (2) mov r11d, [rip+_tls_index]  (7 bytes) */
     75   b[n++] = 0x44;
     76   b[n++] = 0x8B;
     77   b[n++] = 0x1D;
     78   b[n++] = 0;
     79   b[n++] = 0;
     80   b[n++] = 0;
     81   b[n++] = 0;
     82   /* (3) mov rd, [rd+r11*8]  (4 or 5 bytes) */
     83   b[n++] = (u8)(0x4Au | ((rd & 8u) ? 0x05u : 0u));
     84   b[n++] = 0x8B;
     85   if ((rd & 7u) == 5u) {
     86     b[n++] = (u8)((1u << 6) | ((rd & 7u) << 3) | 4u);
     87     b[n++] = (u8)(0xD8u | (rd & 7u));
     88     b[n++] = 0;
     89   } else {
     90     b[n++] = (u8)(((rd & 7u) << 3) | 4u);
     91     b[n++] = (u8)(0xD8u | (rd & 7u));
     92   }
     93   /* (4) lea rd, [rd + sym@SECREL]  (7 or 8 bytes) */
     94   b[n++] = (u8)(0x48u | ((rd & 8u) ? 0x05u : 0u));
     95   b[n++] = 0x8D;
     96   if ((rd & 7u) == 4u) {
     97     b[n++] = (u8)((2u << 6) | ((rd & 7u) << 3) | 4u);
     98     b[n++] = (u8)((4u << 3) | (rd & 7u));
     99   } else {
    100     b[n++] = (u8)((2u << 6) | ((rd & 7u) << 3) | (rd & 7u));
    101   }
    102   *site_off = n;
    103   b[n++] = 0;
    104   b[n++] = 0;
    105   b[n++] = 0;
    106   b[n++] = 0;
    107   return n;
    108 }
    109 
    110 static void x64_check(u32 rd) {
    111   u8 buf[40];
    112   u32 site_off;
    113   u32 total = x64_emit_idiom(buf, rd, &site_off);
    114   /* Distinct write/runtime "addresses": disp must use site_pc, not &buf. */
    115   const u64 site_pc = 0x140005000ull + site_off;
    116   const u64 storage = 0x140009123ull;
    117   u32 lea = total - 7u; /* the rewritten 7-byte rip-lea sits at the block end */
    118   i64 want_disp = (i64)storage - (i64)(site_pc + 4u);
    119   u32 i;
    120   x64_jit_tls_le_relax(compiler_for(KIT_ARCH_X86_64), R_COFF_SECREL,
    121                        &buf[site_off], storage, site_pc);
    122   for (i = 0; i < lea; ++i)
    123     EXPECT(buf[i] == 0x90u, "x64 rd=%u: byte %u not NOP (0x%02x)", rd, i,
    124            buf[i]);
    125   EXPECT(buf[lea] == (u8)(0x48u | ((rd >= 8u) ? 0x04u : 0u)),
    126          "x64 rd=%u: lea REX 0x%02x", rd, buf[lea]);
    127   EXPECT(buf[lea + 1] == 0x8Du, "x64 rd=%u: lea opcode 0x%02x", rd,
    128          buf[lea + 1]);
    129   EXPECT(buf[lea + 2] == (u8)(((rd & 7u) << 3) | 5u),
    130          "x64 rd=%u: lea modrm 0x%02x", rd, buf[lea + 2]);
    131   EXPECT((i32)rd32(&buf[lea + 3]) == (i32)want_disp,
    132          "x64 rd=%u: disp 0x%08x want 0x%08x", rd, rd32(&buf[lea + 3]),
    133          (u32)want_disp);
    134 }
    135 
    136 /* Pin x64_emit_idiom to real codegen: the rd=r8 idiom captured from
    137  * `kit cc -target x86_64-windows -c` disassembly. A drift in the builder (and
    138  * thus a relax tested against a fictional idiom) turns this red. */
    139 static void x64_golden(void) {
    140   static const u8 want[] = {0x65, 0x4c, 0x8b, 0x04, 0x25, 0x58, 0x00, 0x00,
    141                             0x00, 0x44, 0x8b, 0x1d, 0x00, 0x00, 0x00, 0x00,
    142                             0x4f, 0x8b, 0x04, 0xd8, 0x4d, 0x8d, 0x80, 0x00,
    143                             0x00, 0x00, 0x00};
    144   u8 buf[40];
    145   u32 site_off;
    146   u32 total = x64_emit_idiom(buf, 8u /* r8 */, &site_off);
    147   EXPECT(total == sizeof want, "x64 golden length %u != %zu", total,
    148          sizeof want);
    149   EXPECT(memcmp(buf, want, sizeof want) == 0, "x64 golden idiom mismatch");
    150 }
    151 
    152 /* ---- aarch64 ------------------------------------------------------------- */
    153 
    154 #define AA_NOP 0xd503201fu
    155 
    156 static u32 aa_add_imm(u32 rd, u32 rn, u32 imm12, u32 sh) {
    157   return 0x91000000u | (sh << 22) | ((imm12 & 0xfffu) << 10) | ((rn & 0x1fu)
    158          << 5) | (rd & 0x1fu);
    159 }
    160 
    161 /* Emit the 7-instruction Win64/aarch64 TLS idiom for `rd` (aa_tls_addr_of_win).
    162  * Only instruction (6) — the HIGH12A add the relax keys on — needs a faithful
    163  * encoding; the rest are placeholders the relax overwrites unconditionally. */
    164 static u32 aa_emit_idiom(u32* w, u32 rd) {
    165   w[0] = 0xf9400240u | (18u << 5) | rd; /* ldr rd,[x18,#0x58]  (placeholder) */
    166   w[1] = 0x90000010u;                   /* adrp x16,_tls_index */
    167   w[2] = aa_add_imm(16u, 16u, 0u, 0u);  /* add  x16,x16,:lo12: */
    168   w[3] = 0xb9400210u;                   /* ldr  w16,[x16]      */
    169   w[4] = 0xf8607a00u | rd;              /* ldr  rd,[rd,x16,lsl#3] */
    170   w[5] = aa_add_imm(rd, rd, 0u, 1u);    /* add  rd,rd,:secrel_hi12: (HIGH12A) */
    171   w[6] = aa_add_imm(rd, rd, 0u, 0u);    /* add  rd,rd,:secrel_lo12: (LOW12A) */
    172   return 5u; /* word index of the HIGH12A site the relax keys on */
    173 }
    174 
    175 /* Decode ADRP+ADD (the relax's output) back to the absolute address it
    176  * materializes, so we confirm it equals `storage`. */
    177 static u64 aa_decode_adrp_add(u32 adrp, u32 add, u64 adrp_pc) {
    178   u32 immlo = (adrp >> 29) & 0x3u;
    179   u32 immhi = (adrp >> 5) & 0x7ffffu;
    180   i64 imm = (i64)(((u64)immhi << 2) | immlo);
    181   if (imm & (1ll << 20)) imm -= (1ll << 21); /* sign-extend 21 bits */
    182   u64 page = (adrp_pc & ~0xfffull) + ((u64)imm << 12);
    183   return page + ((add >> 10) & 0xfffu);
    184 }
    185 
    186 static void aa_check(u32 rd) {
    187   u32 w[7];
    188   u32 site_i = aa_emit_idiom(w, rd);
    189   const u64 site_pc = 0x140005000ull + site_i * 4u;
    190   const u64 storage = 0x140009123ull;
    191   u8* site = (u8*)&w[site_i];
    192   u32 i;
    193   aa64_jit_tls_le_relax(compiler_for(KIT_ARCH_ARM_64),
    194                         R_COFF_AARCH64_SECREL_HIGH12A, site, storage, site_pc);
    195   for (i = 0; i < 4u; ++i) /* instructions (1)-(4) -> NOP */
    196     EXPECT(w[i] == AA_NOP, "aa64 rd=%u: insn %u not NOP (0x%08x)", rd, i, w[i]);
    197   EXPECT((w[4] & 0x9f000000u) == 0x90000000u && (w[4] & 0x1fu) == rd,
    198          "aa64 rd=%u: insn(5) not ADRP rd (0x%08x)", rd, w[4]);
    199   EXPECT((w[5] & 0xff800000u) == 0x91000000u && (w[5] & 0x1fu) == rd &&
    200              ((w[5] >> 5) & 0x1fu) == rd,
    201          "aa64 rd=%u: insn(6) not ADD rd,rd,#imm (0x%08x)", rd, w[5]);
    202   EXPECT(w[6] == AA_NOP, "aa64 rd=%u: insn(7) not NOP (0x%08x)", rd, w[6]);
    203   EXPECT(aa_decode_adrp_add(w[4], w[5], site_pc - 4u) == storage,
    204          "aa64 rd=%u: ADRP+ADD != storage", rd);
    205 }
    206 
    207 int main(void) {
    208   /* Cover the x64 length-variant branches: rd&7==4 (rsp/r12 -> lea sib),
    209    * rd&7==5 (rbp/r13 -> mov disp8), and rd>=8 (REX.R/B). */
    210   static const u32 x64_rds[] = {0, 1, 4, 5, 8, 12, 13};
    211   static const u32 aa_rds[] = {0, 1, 9, 20};
    212   size_t i;
    213   kit_unit_init(&g_u);
    214   x64_golden();
    215   for (i = 0; i < sizeof x64_rds / sizeof x64_rds[0]; ++i) x64_check(x64_rds[i]);
    216   for (i = 0; i < sizeof aa_rds / sizeof aa_rds[0]; ++i) aa_check(aa_rds[i]);
    217   kit_unit_summary(&g_u, "jit_tls_relax_test");
    218   return kit_unit_status(&g_u);
    219 }