start.c (8097B)
1 /* Freestanding _start for Path E (ELF exec) tests. 2 * 3 * Convention: 4 * test_main() — primary test body; returns 0 on pass. 5 * test_post_fini() — optional post-destructor check (weak default: 0). 6 * 7 * Lifecycle: TLS setup → ctors → test_main → dtors → test_post_fini → exit. 8 * 9 * kit-ld defines: 10 * __init_array_start/end, __fini_array_start/end — sorted ctor/dtor 11 * spans (synthesized around the corresponding SHT_*_ARRAY sections). 12 * __tdata_start, __tdata_end — .tdata template 13 * bytes; identical when no TLS (length 0). 14 * __tbss_size — SK_ABS, holds the 15 * .tbss byte count as its symbol value. 16 * 17 * The TLS prologue runs unconditionally: with no TLS in the image, the 18 * three boundary symbols all read as 0 and the loop is a no-op. */ 19 20 extern int test_main(void); 21 __attribute__((weak)) int test_post_fini(void) { return 0; } 22 23 typedef void (*VoidFn)(void); 24 extern VoidFn __preinit_array_start[]; 25 extern VoidFn __preinit_array_end[]; 26 extern VoidFn __init_array_start[]; 27 extern VoidFn __init_array_end[]; 28 extern VoidFn __fini_array_start[]; 29 extern VoidFn __fini_array_end[]; 30 31 extern char __tdata_start[]; 32 extern char __tdata_end[]; 33 extern char __tbss_size[]; /* SK_ABS: address-of yields the byte count */ 34 35 /* TLS-block prologue layout — per-arch ABI dictates whether the TCB sits 36 * before or after .tdata in the thread-pointer-relative image. AArch64 37 * keeps a 16-byte reserved TCB; SysV-x86_64 uses TLS variant II (negative 38 * offsets from the thread pointer, see below); RISC-V LP64 follows 39 * variant I and points the thread pointer at the TCB end. */ 40 #define AARCH64_TCB_SIZE 16 41 42 /* Per-thread TLS image; the test harness is single-threaded so a 43 * file-scope buffer is enough. Sized generously for any test we run 44 * here. Layout: [TCB | .tdata copy | .tbss zero-fill] for variants 45 * that put the TCB first. */ 46 static char g_tls_block[4096] __attribute__((aligned(16))); 47 48 /* IFUNC startup init. Mirrors rt/lib/kit/ifunc_init.c — duplicated 49 * here so the test harness doesn't need libkit_rt.a on the link 50 * line. When the linker emits a static ET_EXEC and the image 51 * contains any STT_GNU_IFUNC, layout_iplt synthesizes a .init_array 52 * entry pointing at __kit_ifunc_init; the loop in _start below 53 * walks .init_array and calls each entry, so this fills every 54 * .igot.plt slot before test_main runs. */ 55 extern void* __start_iplt_pairs[] __attribute__((weak)); 56 extern void* __stop_iplt_pairs[] __attribute__((weak)); 57 void __kit_ifunc_init(void) { 58 void** p = __start_iplt_pairs; 59 void** end = __stop_iplt_pairs; 60 if (!p || !end) return; 61 for (; p < end; p += 2) { 62 void* (*r)(void) = (void* (*)(void))p[0]; 63 void** slot = (void**)p[1]; 64 *slot = r(); 65 } 66 } 67 68 #if defined(__APPLE__) 69 /* macOS doesn't expose a stable syscall ABI — all syscalls must go 70 * through libSystem.dylib. start.c on macOS therefore calls libc 71 * `exit` rather than emitting `svc #0x80` inline; the kit Mach-O 72 * exe linker resolves the import via LC_LOAD_DYLIB libSystem.B.dylib 73 * and the dyld bind info / chained-fixups stream. */ 74 extern void exit(int) __attribute__((noreturn)); 75 #endif 76 77 __attribute__((noreturn)) static void do_exit(int code) { 78 #if defined(__APPLE__) 79 exit(code); 80 __builtin_unreachable(); 81 #elif defined(__aarch64__) 82 register long x8 __asm__("x8") = 94; /* sys_exit_group */ 83 register long x0 __asm__("x0") = code; 84 __asm__ volatile("svc #0" ::"r"(x8), "r"(x0) : "memory"); 85 #elif defined(__x86_64__) 86 register long rax __asm__("rax") = 231; /* sys_exit_group */ 87 register long rdi __asm__("rdi") = code; 88 __asm__ volatile("syscall" ::"r"(rax), "r"(rdi) : "memory"); 89 #elif defined(__riscv) && __riscv_xlen == 64 90 register long a7 __asm__("a7") = 94; /* sys_exit_group */ 91 register long a0 __asm__("a0") = code; 92 __asm__ volatile("ecall" ::"r"(a7), "r"(a0) : "memory"); 93 #else 94 #error "start.c: unsupported architecture" 95 #endif 96 __builtin_unreachable(); 97 } 98 99 static void tls_init(void) { 100 #if defined(__APPLE__) 101 /* On Darwin, tpidr_el0 is owned by libsystem/dyld; freestanding 102 * tests don't synthesize TLS roots (31_tls_local_exec is N/A on 103 * Mach-O), so the prologue is a no-op. */ 104 return; 105 #else 106 unsigned long td_n = (unsigned long)(__tdata_end - __tdata_start); 107 unsigned long bs_n = (unsigned long)(unsigned long long)__tbss_size; 108 unsigned long i; 109 /* Launder bs_n past clang's "extern char[] has non-null address" 110 * assumption — without this the .tbss zero loop is peeled and 111 * unconditionally writes one byte at tls[td_n], which on the SysV 112 * x86_64 variant II layout (TCB sits at tls[td_n]) clobbers the 113 * thread-pointer self-pointer for any TLS image with bs_n == 0. */ 114 __asm__ volatile("" : "+r"(bs_n)); 115 #if defined(__aarch64__) 116 /* Variant I (TCB first): tp -> [TCB(16) | tdata | tbss] */ 117 char* dst = g_tls_block + AARCH64_TCB_SIZE; 118 for (i = 0; i < td_n; ++i) dst[i] = __tdata_start[i]; 119 for (i = 0; i < bs_n; ++i) dst[td_n + i] = 0; 120 __asm__ volatile("msr tpidr_el0, %0" ::"r"(g_tls_block) : "memory"); 121 #elif defined(__x86_64__) 122 /* SysV TLS variant II: TLS bytes at *negative* offsets from the 123 * thread pointer (fs base). Lay out [tdata | tbss | TCB] where the 124 * TCB self-pointer sits at offset 0. The first slot of the TCB 125 * must be the thread pointer (self) per ELF ABI. */ 126 char* tcb = g_tls_block + sizeof(g_tls_block) - 64; 127 *(void**)tcb = tcb; 128 char* tls = tcb - (td_n + bs_n); 129 for (i = 0; i < td_n; ++i) tls[i] = __tdata_start[i]; 130 for (i = 0; i < bs_n; ++i) tls[td_n + i] = 0; 131 /* arch_prctl(ARCH_SET_FS, tcb): syscall 158, code 0x1002. */ 132 register long rax __asm__("rax") = 158; 133 register long rdi __asm__("rdi") = 0x1002; 134 register long rsi __asm__("rsi") = (long)tcb; 135 __asm__ volatile("syscall" 136 : "+r"(rax) 137 : "r"(rdi), "r"(rsi) 138 : "rcx", "r11", "memory"); 139 #elif defined(__riscv) && __riscv_xlen == 64 140 /* Variant I: tp -> [TCB | tdata | tbss], TCB is reserved (here just 141 * the first 16 bytes of the block); RISC-V psABI puts tp 16 bytes 142 * past the start of the static TLS block convention varies, but 143 * the unwind/glibc convention used by linker-generated code 144 * resolves &var via tp + offset_from_TLS_image_start. We place 145 * .tdata immediately after a 16-byte reservation. */ 146 char* dst = g_tls_block + 16; 147 for (i = 0; i < td_n; ++i) dst[i] = __tdata_start[i]; 148 for (i = 0; i < bs_n; ++i) dst[td_n + i] = 0; 149 __asm__ volatile("mv tp, %0" ::"r"(g_tls_block) : "memory"); 150 #else 151 #error "start.c: unsupported architecture" 152 #endif 153 #endif /* !__APPLE__ */ 154 } 155 156 /* On x86_64 the kernel hands _start an rsp that is 16-aligned (so argc 157 * lands on a 16-byte boundary), but clang compiles _start as an ordinary 158 * function assuming the standard SysV contract of rsp ≡ 8 (mod 16) on 159 * entry — off by 8. force_align_arg_pointer makes the prologue realign 160 * rsp itself so every `call` downstream lands at the canonical 161 * rsp ≡ 8 (mod 16). aarch64/rv64 ABIs keep SP 16-aligned at all times, 162 * so no analogue is needed there. */ 163 #if defined(__x86_64__) 164 __attribute__((force_align_arg_pointer)) 165 #endif 166 void _start(void) { 167 VoidFn* p; 168 int result; 169 170 tls_init(); 171 172 #if defined(__APPLE__) 173 /* Mach-O: dyld walks __DATA,__mod_init_func before _start runs, so 174 * the harness must NOT walk __init_array_start/end — the boundary 175 * symbols are synthesized into the __got region (no real init array 176 * on Mach-O) and dereferencing them faults. */ 177 (void)p; 178 #else 179 /* SHT_PREINIT_ARRAY runs strictly before .init_array. kit-ld 180 * lands its synthetic __kit_ifunc_init entry here so IFUNC 181 * slots are filled before any user ctor or test_main runs. */ 182 for (p = __preinit_array_start; p != __preinit_array_end; ++p) (*p)(); 183 for (p = __init_array_start; p != __init_array_end; ++p) (*p)(); 184 #endif 185 186 result = test_main(); 187 188 #if !defined(__APPLE__) 189 for (p = __fini_array_end; p-- != __fini_array_start;) (*p)(); 190 #endif 191 192 if (result == 0) result = test_post_fini(); 193 194 do_exit(result); 195 }