linux.c (7269B)
1 /* Linux-specific env bits: memfd_create dual-mapped exec memory (+ optional 2 * MAP_32BIT runtime-alias hint on x86_64 to keep direct call/jmp 3 * displacements in range from text), st_mtim mtime, dlsym pass-through 4 * with optional `_` retry, host_target OS=LINUX/OBJ=ELF. */ 5 6 #include <dlfcn.h> 7 #include <stdint.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <sys/mman.h> 11 #include <sys/stat.h> 12 #include <sys/syscall.h> 13 #include <unistd.h> 14 15 #include "env_posix.h" 16 17 /* ---------------- dual-mapped exec memory (memfd_create) ---------------- 18 * memfd_create gives us an anonymous fd; two mmaps of that fd alias the 19 * same physical pages at distinct VAs. Both aliases live across the 20 * close(fd) below. The runtime-alias hint and any extra mmap flags are 21 * arch-specific (x86_64 uses MAP_32BIT to keep direct displacements in 22 * range from text); see linux_exec_hint_<arch>.c. */ 23 KitStatus os_execmem_reserve_exec(size_t size, KitExecMemRegion* out) { 24 int fd = (int)syscall(SYS_memfd_create, "kit-jit", 0u); 25 int extra = env_execmem_runtime_extra_flags(); 26 void* w; 27 void* r; 28 ExecMemToken* tok; 29 if (fd < 0) return KIT_ERR; 30 if (ftruncate(fd, (off_t)size) != 0) { 31 close(fd); 32 return KIT_ERR; 33 } 34 35 w = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED | extra, fd, 0); 36 if (w == MAP_FAILED) { 37 close(fd); 38 return KIT_NOMEM; 39 } 40 r = mmap(env_execmem_low_runtime_hint(size), size, PROT_READ, 41 MAP_SHARED | extra, fd, 0); 42 if (r == MAP_FAILED) { 43 munmap(w, size); 44 close(fd); 45 return KIT_NOMEM; 46 } 47 close(fd); 48 49 tok = (ExecMemToken*)malloc(sizeof(*tok)); 50 if (!tok) { 51 munmap(r, size); 52 munmap(w, size); 53 return KIT_NOMEM; 54 } 55 tok->write_addr = w; 56 tok->runtime_addr = r; 57 tok->size = size; 58 59 exec_dual_register(w, r, size); 60 61 out->write = w; 62 out->runtime = r; 63 out->size = size; 64 out->token = tok; 65 return KIT_OK; 66 } 67 68 /* ---------------- dbg W^X dance ---------------- */ 69 /* Linux dual-mapping uses memfd: write alias and runtime alias have 70 * distinct VAs. Prefer the alias lookup; fall back to a transient 71 * mprotect of the runtime alias for single-mapping reservations. */ 72 73 static size_t page_floor(size_t v, size_t pg) { return v & ~(pg - 1); } 74 static size_t page_ceil(size_t v, size_t pg) { 75 return (v + pg - 1) & ~(pg - 1); 76 } 77 78 KitStatus os_dbg_code_write_begin(void* user, void* runtime_addr, size_t n, 79 void** write_out) { 80 size_t pg; 81 uintptr_t a; 82 uintptr_t base; 83 size_t span; 84 (void)user; 85 if (!runtime_addr || !n || !write_out) return KIT_INVALID; 86 if (exec_dual_lookup(runtime_addr, n, write_out) == 0) return KIT_OK; 87 pg = driver_host_page_size(); 88 a = (uintptr_t)runtime_addr; 89 base = page_floor(a, pg); 90 span = page_ceil((a - base) + n, pg); 91 if (mprotect((void*)base, span, PROT_READ | PROT_WRITE | PROT_EXEC) != 0) 92 return KIT_ERR; 93 *write_out = runtime_addr; 94 return KIT_OK; 95 } 96 97 void os_dbg_code_write_end(void* user, void* runtime_addr, size_t n) { 98 void* w; 99 size_t pg; 100 uintptr_t a; 101 uintptr_t base; 102 size_t span; 103 (void)user; 104 if (exec_dual_lookup(runtime_addr, n, &w) == 0) 105 return; /* dual: nothing to flip back */ 106 pg = driver_host_page_size(); 107 a = (uintptr_t)runtime_addr; 108 base = page_floor(a, pg); 109 span = page_ceil((a - base) + n, pg); 110 mprotect((void*)base, span, PROT_READ | PROT_EXEC); 111 } 112 113 void os_dbg_flush_icache(void* user, void* runtime_addr, size_t n) { 114 (void)user; 115 env_flush_icache(runtime_addr, n); 116 } 117 118 /* ---------------- st_mtim ---------------- */ 119 120 int os_stat_mtime_ns(const struct stat* sb, int64_t* out) { 121 *out = 122 (int64_t)sb->st_mtim.tv_sec * 1000000000LL + (int64_t)sb->st_mtim.tv_nsec; 123 return 0; 124 } 125 126 /* ---------------- dlsym ---------------- */ 127 /* ELF has no underscore mangling; pass through. The retry with a stripped 128 * leading underscore matches the old shim's behavior for users that hand 129 * us Mach-O-mangled names. */ 130 void* os_dlsym(const char* name) { 131 void* p = dlsym(RTLD_DEFAULT, name); 132 if (!p && name[0] == '_' && name[1] != '\0') 133 p = dlsym(RTLD_DEFAULT, name + 1); 134 return p; 135 } 136 137 /* ---------------- host_target os/obj ---------------- */ 138 139 void os_host_target_fill(KitTargetSpec* t) { 140 t->os = KIT_OS_LINUX; 141 t->obj = KIT_OBJ_ELF; 142 } 143 144 /* ---------------- self executable path ---------------- */ 145 /* /proc/self/exe is a kernel-maintained symlink to the running binary; reading 146 * it yields the canonical absolute path. readlink does not NUL-terminate and a 147 * truncated read is indistinguishable from an exact fit, so grow until the 148 * result fits with room for the terminator. */ 149 int driver_self_exe_path(DriverEnv* env, char** out, size_t* out_size) { 150 size_t cap = 256; 151 if (!env || !out || !out_size) return 1; 152 for (;;) { 153 char* buf = (char*)driver_alloc(env, cap); 154 ssize_t n; 155 if (!buf) return 1; 156 n = readlink("/proc/self/exe", buf, cap); 157 if (n < 0) { 158 driver_free(env, buf, cap); 159 return 1; 160 } 161 if ((size_t)n < cap) { 162 buf[n] = '\0'; 163 *out = buf; 164 *out_size = cap; 165 return 0; 166 } 167 driver_free(env, buf, cap); 168 if (cap >= (size_t)1 << 20) return 1; /* implausible path length */ 169 cap *= 2; 170 } 171 } 172 173 /* ---------------- default hosted dirs probe ---------------- */ 174 /* Linux multiarch triple for the supported 64-bit arches; NULL otherwise. */ 175 static const char* linux_multiarch_triple(KitArchKind arch) { 176 switch (arch) { 177 case KIT_ARCH_ARM_64: 178 return "aarch64-linux-gnu"; 179 case KIT_ARCH_X86_64: 180 return "x86_64-linux-gnu"; 181 case KIT_ARCH_RV64: 182 return "riscv64-linux-gnu"; 183 default: 184 return NULL; 185 } 186 } 187 188 /* A live Linux root is not sysroot-shaped: glibc keeps crt + libc_nonshared.a 189 * in /usr/lib/<triple> and libc.so.6 in /lib/<triple>; musl/Alpine is flat in 190 * /usr/lib + /lib. We hand the resolver an ordered library search list that 191 * covers both, plus the standard include roots, and let its per-file search 192 * bind each crt/libc wherever it actually lives. Host target only. */ 193 int driver_default_hosted_dirs(DriverEnv* env, KitTargetSpec target, 194 DriverHostedDirs* out) { 195 const char* triple; 196 (void)env; 197 if (target.os != KIT_OS_LINUX) return 1; 198 triple = linux_multiarch_triple(target.arch); 199 /* Includes: top-level first, then the arch multiarch dir (glibc), then 200 * /usr/local/include. Emission order matters for header shadowing. */ 201 if (driver_hosted_dirs_add_inc(out, "/usr/include") != 0) return 1; 202 if (triple && 203 driver_hosted_dirs_add_inc_join(out, "/usr/include", triple) != 0) 204 return 1; 205 if (driver_hosted_dirs_add_inc(out, "/usr/local/include") != 0) return 1; 206 /* Libdirs: glibc multiarch first, then flat musl/base dirs, then lib64 207 * (where the glibc x86_64 loader and some libs live). */ 208 if (triple) { 209 if (driver_hosted_dirs_add_lib_join(out, "/usr/lib", triple) != 0) return 1; 210 if (driver_hosted_dirs_add_lib_join(out, "/lib", triple) != 0) return 1; 211 } 212 if (driver_hosted_dirs_add_lib(out, "/usr/lib") != 0) return 1; 213 if (driver_hosted_dirs_add_lib(out, "/lib") != 0) return 1; 214 if (driver_hosted_dirs_add_lib(out, "/usr/lib64") != 0) return 1; 215 if (driver_hosted_dirs_add_lib(out, "/lib64") != 0) return 1; 216 return out->nlibdirs ? 0 : 1; 217 }