kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 31020a257f10fb15cc5c70df86090ffc83a236d3
parent 9362d8f146cb919674bca15538ef7f3484e18451
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 14 May 2026 12:55:27 -0700

Move linker arch cases behind descriptor

Diffstat:
Mdoc/arch-registration-plan.md | 2+-
Msrc/link/link_arch.h | 31+++++++++++++++++++++++++++----
Msrc/link/link_arch_aa64.c | 65++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
Msrc/link/link_arch_rv64.c | 39++++++++++++++++++++-------------------
Msrc/link/link_dyn.c | 175+++++++++----------------------------------------------------------------------
Msrc/link/link_elf.c | 92++++++++++++++++++++++++-------------------------------------------------------
Msrc/link/link_internal.h | 22++++++++++++----------
Msrc/link/link_layout.c | 20++++++++++++--------
Msrc/link/link_macho.c | 154++++++++++++++++++++++++++++++-------------------------------------------------
Msrc/link/link_reloc_layout.c | 35++++++++++++++++-------------------
10 files changed, 252 insertions(+), 383 deletions(-)

diff --git a/doc/arch-registration-plan.md b/doc/arch-registration-plan.md @@ -6,7 +6,7 @@ - [x] Route existing arch dispatchers through that descriptor without changing behavior. - [x] Move ABI selection behind the arch descriptor. - [x] Move object-format relocation translators behind the arch descriptor. -- [ ] Move linker-only arch constants and stub emitters fully behind the descriptor. +- [x] Move linker-only arch constants and stub emitters fully behind the descriptor. - [ ] Move assembler/disassembler/register helpers behind arch-owned implementation files. - [ ] Make `MCEmitter` delegate label fixup encoding to the arch descriptor. - [ ] Consolidate files into `src/arch/{aa64,rv64,x64}/` with one exposed implementation object per arch. diff --git a/src/link/link_arch.h b/src/link/link_arch.h @@ -34,7 +34,7 @@ typedef struct LinkArchIPltReloc { typedef struct LinkArchDesc { /* ---- ELF identity ---- */ - u32 e_machine; /* EM_AARCH64 / EM_X86_64 / EM_RISCV */ + u32 e_machine; /* EM_AARCH64 / EM_X86_64 / EM_RISCV */ /* Default PT_INTERP (canonical musl loader for this arch). Drivers * should override via link_set_interp_path; the default fires only @@ -50,6 +50,11 @@ typedef struct LinkArchDesc { u32 elf_r_glob_dat; u32 elf_r_jump_slot; + /* ---- Mach-O identity ---- + * Zero means the target has no Mach-O executable writer yet. */ + u32 macho_cputype; + u32 macho_cpusubtype; + /* ---- PLT geometry ---- * All three arches today use a 32-byte PLT0 + 16-byte per-import * entry, but exposing the sizes keeps the linker free of magic @@ -60,6 +65,13 @@ typedef struct LinkArchDesc { /* ---- IPLT geometry (ifunc trampolines, layout_iplt) ---- */ u32 iplt_stub_size; + /* ---- Optional arch-defined linker symbols ---- + * Some ABIs reserve a global-pointer anchor. When non-NULL, the + * top-level layout pass defines the named symbol at the first + * writable segment plus global_pointer_rw_offset. */ + const char* global_pointer_symbol; + u64 global_pointer_rw_offset; + /* ---- Stub emitters ---- * Each writes its full byte range; callers do not need to pre-fill * the buffer. All vaddrs are post-shift (final image addresses). @@ -81,10 +93,21 @@ typedef struct LinkArchDesc { * records the caller must enqueue (0 or 2); * the records' offset_in_stub / width / kind are * populated, the caller fills the rest. */ - void (*emit_plt0) (u8* dst, u64 plt0_vaddr, u64 gotplt_vaddr); + void (*emit_plt0)(u8* dst, u64 plt0_vaddr, u64 gotplt_vaddr); void (*emit_plt_entry)(u8* dst, u64 entry_vaddr, u64 slot_vaddr); - u32 (*emit_iplt_stub)(u8* dst, u64 stub_vaddr, u64 slot_vaddr, - LinkArchIPltReloc out[2]); + u32 (*emit_iplt_stub)(u8* dst, u64 stub_vaddr, u64 slot_vaddr, + LinkArchIPltReloc out[2]); + + /* Mach-O stubs. Used only when macho_cputype is non-zero. */ + u32 macho_stub_size; + void (*emit_macho_stub)(u8* dst, u64 stub_vaddr, u64 got_slot_vaddr); + + /* Relocation classification used by format-specific linker passes. */ + int (*is_branch_reloc)(RelocKind); + int (*is_got_load_reloc)(RelocKind); + int (*is_tlvp_reloc)(RelocKind); + int (*is_direct_page_reloc)(RelocKind); + int (*needs_jit_call_stub)(RelocKind); } LinkArchDesc; /* Per-arch descriptors, defined in link_arch_<arch>.c. */ diff --git a/src/link/link_arch_aa64.c b/src/link/link_arch_aa64.c @@ -16,6 +16,7 @@ #include "core/core.h" #include "link/link_arch.h" #include "obj/elf.h" +#include "obj/macho.h" #include "obj/obj.h" /* Fixed register assignments mandated by the AArch64 PLT ABI. */ @@ -43,8 +44,7 @@ * segment-base shift that moves both endpoints by the same delta. */ static inline void aa64_adrp_imm_halves(u64 pc, u64 target, u32* immlo, u32* immhi) { - i64 page_disp = - (i64)(target & ~AA64_PAGE_MASK) - (i64)(pc & ~AA64_PAGE_MASK); + i64 page_disp = (i64)(target & ~AA64_PAGE_MASK) - (i64)(pc & ~AA64_PAGE_MASK); i64 imm21 = page_disp >> 12; *immlo = (u32)(imm21 & 0x3); *immhi = (u32)((imm21 >> 2) & 0x7ffff); @@ -121,9 +121,9 @@ static u32 aa64_emit_iplt_stub(u8* dst, u64 stub_vaddr, u64 slot_vaddr, wr_u32_le(dst + 0, aa64_adrp(AA64_PLT_SCRATCH_X16, /*immlo=*/0, /*immhi=*/0)); - wr_u32_le(dst + 4, aa64_ldr64_uimm12(AA64_PLT_SCRATCH_X16, - AA64_PLT_SCRATCH_X16, - /*imm12_scaled=*/0)); + wr_u32_le(dst + 4, + aa64_ldr64_uimm12(AA64_PLT_SCRATCH_X16, AA64_PLT_SCRATCH_X16, + /*imm12_scaled=*/0)); wr_u32_le(dst + 8, aa64_br(AA64_PLT_SCRATCH_X16)); out[0].offset_in_stub = 0; @@ -135,6 +135,50 @@ static u32 aa64_emit_iplt_stub(u8* dst, u64 stub_vaddr, u64 slot_vaddr, return 2; } +static void aa64_emit_macho_stub(u8* out, u64 stub_vaddr, u64 got_slot_vaddr) { + i64 page_s = ((i64)got_slot_vaddr) & ~(i64)0xfff; + i64 page_p = ((i64)stub_vaddr) & ~(i64)0xfff; + i64 imm21 = (page_s - page_p) >> 12; + u32 immlo = (u32)(imm21 & 0x3u); + u32 immhi = (u32)((imm21 >> 2) & 0x7ffffu); + u32 lo12 = (u32)(got_slot_vaddr & 0xfffu); + u32 imm12_ldr = (lo12 >> 3) & 0xfffu; + + wr_u32_le(out + 0, aa64_adrp(AA64_PLT_SCRATCH_X16, immlo, immhi)); + wr_u32_le(out + 4, aa64_ldr64_uimm12(AA64_PLT_SCRATCH_X16, + AA64_PLT_SCRATCH_X16, imm12_ldr)); + wr_u32_le(out + 8, aa64_br(AA64_PLT_SCRATCH_X16)); +} + +static int aa64_is_branch_reloc(RelocKind kind) { + return kind == R_AARCH64_CALL26 || kind == R_AARCH64_JUMP26; +} + +static int aa64_is_got_load_reloc(RelocKind kind) { + return kind == R_AARCH64_ADR_GOT_PAGE || kind == R_AARCH64_LD64_GOT_LO12_NC; +} + +static int aa64_is_tlvp_reloc(RelocKind kind) { + return kind == R_AARCH64_TLVP_LOAD_PAGE21 || + kind == R_AARCH64_TLVP_LOAD_PAGEOFF12; +} + +static int aa64_is_direct_page_reloc(RelocKind kind) { + switch (kind) { + case R_AARCH64_ADR_PREL_PG_HI21: + case R_AARCH64_ADR_PREL_PG_HI21_NC: + case R_AARCH64_ADD_ABS_LO12_NC: + case R_AARCH64_LDST8_ABS_LO12_NC: + case R_AARCH64_LDST16_ABS_LO12_NC: + case R_AARCH64_LDST32_ABS_LO12_NC: + case R_AARCH64_LDST64_ABS_LO12_NC: + case R_AARCH64_LDST128_ABS_LO12_NC: + return 1; + default: + return 0; + } +} + const LinkArchDesc link_arch_aa64 = { .e_machine = EM_AARCH64, .default_musl_interp = "/lib/ld-musl-aarch64.so.1", @@ -143,6 +187,9 @@ const LinkArchDesc link_arch_aa64 = { .elf_r_glob_dat = ELF_R_AARCH64_GLOB_DAT, .elf_r_jump_slot = ELF_R_AARCH64_JUMP_SLOT, + .macho_cputype = CPU_TYPE_ARM64, + .macho_cpusubtype = CPU_SUBTYPE_ARM64_ALL, + .plt0_size = AA64_PLT0_SIZE, .plt_entry_size = AA64_PLT_ENTRY_SIZE, .iplt_stub_size = AA64_IPLT_STUB_SIZE, @@ -150,4 +197,12 @@ const LinkArchDesc link_arch_aa64 = { .emit_plt0 = aa64_emit_plt0, .emit_plt_entry = aa64_emit_plt_entry, .emit_iplt_stub = aa64_emit_iplt_stub, + .macho_stub_size = AA64_IPLT_STUB_SIZE, + .emit_macho_stub = aa64_emit_macho_stub, + + .is_branch_reloc = aa64_is_branch_reloc, + .is_got_load_reloc = aa64_is_got_load_reloc, + .is_tlvp_reloc = aa64_is_tlvp_reloc, + .is_direct_page_reloc = aa64_is_direct_page_reloc, + .needs_jit_call_stub = aa64_is_branch_reloc, }; diff --git a/src/link/link_arch_rv64.c b/src/link/link_arch_rv64.c @@ -5,17 +5,16 @@ * vtable refactor; comments preserve the WHY (notably the +0x800 bias * on AUIPC immediates). */ -#include "link/link_arch.h" - #include "arch/rv64_isa.h" #include "core/bytes.h" #include "core/core.h" +#include "link/link_arch.h" #include "obj/elf.h" /* PLT0 is 8 canonical NOPs (32 bytes); each PLT entry and IPLT stub is * 4 instructions (16 bytes) / 3 instructions (12 bytes) respectively. * Encoded once here so the descriptor and emitters stay in sync. */ -#define RV64_PLT0_SIZE 32u +#define RV64_PLT0_SIZE 32u #define RV64_PLT_ENTRY_SIZE 16u #define RV64_IPLT_STUB_SIZE 12u @@ -53,9 +52,9 @@ static void rv64_emit_plt_entry(u8* dst, u64 entry_vaddr, u64 slot_vaddr) { u32 hi20; u32 lo12; rv64_split_pcrel(disp, &hi20, &lo12); - wr_u32_le(dst + 0, rv_auipc(RV_T3, hi20)); - wr_u32_le(dst + 4, rv_ld(RV_T3, RV_T3, (i32)lo12)); - wr_u32_le(dst + 8, rv_jalr(RV_T1, RV_T3, 0)); + wr_u32_le(dst + 0, rv_auipc(RV_T3, hi20)); + wr_u32_le(dst + 4, rv_ld(RV_T3, RV_T3, (i32)lo12)); + wr_u32_le(dst + 8, rv_jalr(RV_T1, RV_T3, 0)); wr_u32_le(dst + 12, rv_nop()); } @@ -78,17 +77,19 @@ static u32 rv64_emit_iplt_stub(u8* dst, u64 stub_vaddr, u64 slot_vaddr, } const LinkArchDesc link_arch_rv64 = { - .e_machine = EM_RISCV, - .default_musl_interp = "/lib/ld-musl-riscv64.so.1", - /* RISC-V psABI has no dedicated GLOB_DAT — GOT-slot data imports - * use the generic absolute-64 reloc instead. */ - .elf_r_relative = ELF_R_RISCV_RELATIVE, - .elf_r_glob_dat = ELF_R_RISCV_64, - .elf_r_jump_slot = ELF_R_RISCV_JUMP_SLOT, - .plt0_size = RV64_PLT0_SIZE, - .plt_entry_size = RV64_PLT_ENTRY_SIZE, - .iplt_stub_size = RV64_IPLT_STUB_SIZE, - .emit_plt0 = rv64_emit_plt0, - .emit_plt_entry = rv64_emit_plt_entry, - .emit_iplt_stub = rv64_emit_iplt_stub, + .e_machine = EM_RISCV, + .default_musl_interp = "/lib/ld-musl-riscv64.so.1", + /* RISC-V psABI has no dedicated GLOB_DAT — GOT-slot data imports + * use the generic absolute-64 reloc instead. */ + .elf_r_relative = ELF_R_RISCV_RELATIVE, + .elf_r_glob_dat = ELF_R_RISCV_64, + .elf_r_jump_slot = ELF_R_RISCV_JUMP_SLOT, + .plt0_size = RV64_PLT0_SIZE, + .plt_entry_size = RV64_PLT_ENTRY_SIZE, + .iplt_stub_size = RV64_IPLT_STUB_SIZE, + .global_pointer_symbol = "__global_pointer$", + .global_pointer_rw_offset = 0x800u, + .emit_plt0 = rv64_emit_plt0, + .emit_plt_entry = rv64_emit_plt_entry, + .emit_iplt_stub = rv64_emit_iplt_stub, }; diff --git a/src/link/link_dyn.c b/src/link/link_dyn.c @@ -505,9 +505,7 @@ void layout_dyn(Linker* l, LinkImage* img) { arch = link_arch_desc_for(l->c); if (!arch) - compiler_panic(img->c, no_loc(), - "link: layout_dyn: no arch descriptor for arch %u", - (u32)l->c->target.arch); + compiler_panic(img->c, no_loc(), "link: layout_dyn: no arch descriptor"); dyn = (LinkDynState*)h->alloc(h, sizeof(*dyn), _Alignof(LinkDynState)); if (!dyn) compiler_panic(img->c, no_loc(), "link: oom on dyn state"); @@ -600,7 +598,10 @@ void layout_dyn(Linker* l, LinkImage* img) { * are harmless to the loader (R_AARCH64_NONE). */ u64 rela_dyn_bytes = (u64)dyn->cap_rela_dyn * ELF64_RELA_SIZE; u64 rela_plt_bytes = (u64)dyn->nrela_plt * ELF64_RELA_SIZE; - u64 plt_bytes = (u64)(imports.nfuncs ? 32u + 16u * imports.nfuncs : 0u); + u64 plt_bytes = + (u64)(imports.nfuncs + ? arch->plt0_size + arch->plt_entry_size * imports.nfuncs + : 0u); u64 gotplt_bytes = (u64)(imports.nfuncs ? 8u * (3u + imports.nfuncs) : 0u); dyn->ndyn_entries = count_dynamic_entries(dyn); u64 dynamic_bytes = (u64)dyn->ndyn_entries * ELF64_DYN_SIZE; @@ -716,145 +717,22 @@ void layout_dyn(Linker* l, LinkImage* img) { dyn->plt_size = plt_bytes; dyn->got_plt_vaddr = rw_vaddr; dyn->got_plt_size = gotplt_bytes; - /* PLT body emit (Phase 5). - * - * AArch64 psABI PLT layout: - * - * PLT0 (32 B): - * stp x16, x30, [sp, #-16]! - * adrp x16, page(.got.plt + 16) - * ldr x17, [x16, #lo12(.got.plt + 16)] - * add x16, x16, #lo12(.got.plt + 16) - * br x17 - * nop ; nop ; nop - * - * per-import (16 B), entry i targets .got.plt[3 + i]: - * adrp x16, page(slot) - * ldr x17, [x16, #lo12(slot)] - * add x16, x16, #lo12(slot) - * br x17 - * - * Encoded with raw bit-twiddling — no LinkRelocApply records. - * Distances between .plt and .got.plt are constant across the - * post-layout shift_image_addresses bump (both segments shift by - * the same delta), so page-relative offsets and lo12 are - * preserved. Under DF_1_NOW the loader patches every .got.plt - * slot from .rela.plt before running PLT0, so PLT0's resolve - * stub is never executed, but it is still emitted in canonical - * form for disassembler / unwinder consumption. - * - * Encoding bases (Rd / Rn / Rt fixed at x16 / x16 / x17): - * adrp x16, sym : 0x90000010 | (immlo<<29) | (immhi<<5) - * ldr x17, [x16, #i12] : 0xF9400211 | (i12_scaled<<10) - * add x16, x16, #i12 : 0x91000210 | (i12<<10) - * br x17 : 0xD61F0220 - * stp x16, x30, [sp,#-16]! : 0xa9bf7bf0 - * nop : 0xD503201F - */ - if (l->c->target.arch == CFREE_ARCH_ARM_64) { + /* PLT body emit: the descriptor owns the psABI-specific bytes. */ + if (!arch->emit_plt0 || !arch->emit_plt_entry) + compiler_panic(l->c, no_loc(), "link: PLT emit not configured"); + { u8* plt_b = img->segment_bytes[rx_seg_idx]; - /* PLT0: load .got.plt[2] (resolver) into x17 and tail-call. */ - u64 plt0_pc = dyn->plt_vaddr + 4u; - u64 slot2 = dyn->got_plt_vaddr + 16u; - i64 page_disp = ((i64)slot2 & ~(i64)0xfff) - ((i64)plt0_pc & ~(i64)0xfff); - i64 imm21 = page_disp >> 12; - u32 immlo = (u32)(imm21 & 0x3); - u32 immhi = (u32)((imm21 >> 2) & 0x7ffff); - u32 lo12 = (u32)(slot2 & 0xfff); - u32 imm12_ldr = (lo12 >> 3) & 0xfff; /* slot is 8-byte aligned */ - wr_u32_le(plt_b + 0, 0xa9bf7bf0u); - wr_u32_le(plt_b + 4, 0x90000010u | (immlo << 29) | (immhi << 5)); - wr_u32_le(plt_b + 8, 0xF9400211u | (imm12_ldr << 10)); - wr_u32_le(plt_b + 12, 0x91000210u | (lo12 << 10)); - wr_u32_le(plt_b + 16, 0xD61F0220u); - wr_u32_le(plt_b + 20, 0xD503201Fu); - wr_u32_le(plt_b + 24, 0xD503201Fu); - wr_u32_le(plt_b + 28, 0xD503201Fu); - /* Per-import 16-byte entries. */ u32 ki; + arch->emit_plt0(plt_b, dyn->plt_vaddr, dyn->got_plt_vaddr); for (ki = 0; ki < imports.nfuncs; ++ki) { - u64 entry_vaddr = dyn->plt_vaddr + 32u + 16u * (u64)ki; + u64 entry_vaddr = dyn->plt_vaddr + arch->plt0_size + + (u64)arch->plt_entry_size * (u64)ki; u64 slot_vaddr = dyn->got_plt_vaddr + 8u * (3u + ki); - i64 e_page_disp = - ((i64)slot_vaddr & ~(i64)0xfff) - ((i64)entry_vaddr & ~(i64)0xfff); - i64 e_imm21 = e_page_disp >> 12; - u32 e_immlo = (u32)(e_imm21 & 0x3); - u32 e_immhi = (u32)((e_imm21 >> 2) & 0x7ffff); - u32 e_lo12 = (u32)(slot_vaddr & 0xfff); - u32 e_imm12_ldr = (e_lo12 >> 3) & 0xfff; - u8* p = plt_b + 32u + 16u * (u64)ki; - wr_u32_le(p + 0, 0x90000010u | (e_immlo << 29) | (e_immhi << 5)); - wr_u32_le(p + 4, 0xF9400211u | (e_imm12_ldr << 10)); - wr_u32_le(p + 8, 0x91000210u | (e_lo12 << 10)); - wr_u32_le(p + 12, 0xD61F0220u); + u8* p = + plt_b + arch->plt0_size + (size_t)arch->plt_entry_size * (size_t)ki; + arch->emit_plt_entry(p, entry_vaddr, slot_vaddr); } - } else if (l->c->target.arch == CFREE_ARCH_X86_64) { - /* x86_64 PLT layout under DF_1_NOW: - * - * PLT0 (32 B): emitted as the canonical lazy-resolve trampoline - * for disassembler readability. Loaders patch every .got.plt - * slot from .rela.plt before user code, so PLT0 itself never - * runs. We just fill it with NOPs (0x90) — that's - * self-documenting and trivially well-formed. - * - * per-import (16 B), entry i targets .got.plt[3 + i]: - * ff 25 disp32 ; jmpq *[rip + disp_to_slot] - * 0f 1f 84 00 00 00 00 00 ; 8-byte NOP (Intel "long nop") - * 90 90 90 ; pad to 16 - * - * The disp32 PC base is the END of the jmp (entry_vaddr + 6). */ - u8* plt_b = img->segment_bytes[rx_seg_idx]; - memset(plt_b, 0x90, (size_t)plt_bytes); /* default NOP pad */ - u32 ki; - for (ki = 0; ki < imports.nfuncs; ++ki) { - u64 entry_vaddr = dyn->plt_vaddr + 32u + 16u * (u64)ki; - u64 slot_vaddr = dyn->got_plt_vaddr + 8u * (3u + ki); - i64 disp = (i64)slot_vaddr - (i64)(entry_vaddr + 6u); - u8* p = plt_b + 32u + 16u * (u64)ki; - p[0] = 0xff; - p[1] = 0x25; - wr_u32_le(p + 2, (u32)((u64)disp & 0xffffffffu)); - /* p[6..15] left as 0x90 from the memset above. */ - } - } else if (l->c->target.arch == CFREE_ARCH_RV64) { - /* RISC-V psABI PLT under DF_1_NOW. Each entry resolves through - * the GOT slot the loader pre-fills via R_RISCV_JUMP_SLOT: - * - * auipc t3, %pcrel_hi(slot) - * ld t3, %pcrel_lo(.)(t3) - * jalr t1, t3 ; tail-call (t1 trashed) - * nop ; 16-byte alignment pad - * - * Encoded as raw u32 instructions. AUIPC operand carries the - * usual +0x800 bias so the LO12 in the load sign-extends - * correctly. PLT0 is left as canonical nops (32 bytes of - * 0x00000013) — never executed under BIND_NOW. */ - u8* plt_b = img->segment_bytes[rx_seg_idx]; - u32 ki; - u32 i; - for (i = 0; i < (u32)plt_bytes; i += 4) wr_u32_le(plt_b + i, 0x00000013u); - for (ki = 0; ki < imports.nfuncs; ++ki) { - u64 entry_vaddr = dyn->plt_vaddr + 32u + 16u * (u64)ki; - u64 slot_vaddr = dyn->got_plt_vaddr + 8u * (3u + ki); - i64 disp = (i64)slot_vaddr - (i64)entry_vaddr; - u32 hi20 = (u32)(((u64)(disp + 0x800)) >> 12) & 0xfffffu; - u32 lo12 = (u32)((u64)disp & 0xfffu); - u8* p = plt_b + 32u + 16u * (u64)ki; - /* auipc t3, hi20 ; opcode 0x17, rd=t3(28). */ - wr_u32_le(p + 0, 0x00000e17u | (hi20 << 12)); - /* ld t3, lo12(t3) ; opcode 0x03, funct3=3, rs1=t3, rd=t3. */ - wr_u32_le(p + 4, 0x000e3e03u | (lo12 << 20)); - /* jalr t1, 0(t3) ; opcode 0x67, funct3=0, rs1=t3, rd=t1(6). */ - wr_u32_le(p + 8, 0x000e0367u); - /* nop */ - wr_u32_le(p + 12, 0x00000013u); - } - } else { - compiler_panic(l->c, no_loc(), - "link: PLT emit for arch %u not implemented", - (u32)l->c->target.arch); } - } /* rw_seg always exists — it carries .dynamic, plus .got.plt when * imports are present. */ @@ -1002,28 +880,11 @@ void layout_dyn(Linker* l, LinkImage* img) { LinkSymId lsid = imports.funcs[ki]; u32 dynidx = dyn->sym_dynidx[lsid]; u64 slot_vaddr = dyn->got_plt_vaddr + 8u * (3u + ki); - u64 plt_entry_vaddr = dyn->plt_vaddr + 32u + 16u * (u64)ki; + u64 plt_entry_vaddr = dyn->plt_vaddr + arch->plt0_size + + (u64)arch->plt_entry_size * (u64)ki; DynRela* r = &dyn->rela_plt[ki]; r->r_offset = slot_vaddr; - { - u32 jt; - switch (l->c->target.arch) { - case CFREE_ARCH_ARM_64: - jt = ELF_R_AARCH64_JUMP_SLOT; - break; - case CFREE_ARCH_X86_64: - jt = ELF_R_X86_64_JUMP_SLOT; - break; - case CFREE_ARCH_RV64: - jt = ELF_R_RISCV_JUMP_SLOT; - break; - default: - compiler_panic(l->c, no_loc(), - "link: JUMP_SLOT type for arch %u not configured", - (u32)l->c->target.arch); - } - r->r_info = ELF64_R_INFO((u64)dynidx, jt); - } + r->r_info = ELF64_R_INFO((u64)dynidx, arch->elf_r_jump_slot); r->r_addend = 0; /* Serialize into segment bytes (will be re-serialized post-shift). */ u8* p = ro_bytes + rela_plt_off + (u64)ki * ELF64_RELA_SIZE; diff --git a/src/link/link_elf.c b/src/link/link_elf.c @@ -3,7 +3,7 @@ * * 64-bit little-endian only. The per-arch ELF reloc-type tables in * obj/elf_reloc_<arch>.c handle RelocKind <-> ELF translation; this - * file picks e_machine from Compiler.target.arch. + * file gets e_machine from the link arch descriptor. * * File layout (in write order): * @@ -52,6 +52,7 @@ #include "core/util.h" #include "core/vec.h" #include "link/link.h" +#include "link/link_arch.h" #include "link/link_internal.h" #include "obj/elf.h" @@ -147,10 +148,8 @@ static u32 perms_to_pflags(u32 secflags) { * touches only the file dimension. */ static void shift_image_file_offsets(LinkImage* img, u64 delta) { u32 i; - for (i = 0; i < img->nsegments; ++i) - img->segments[i].file_offset += delta; - for (i = 0; i < img->nsections; ++i) - img->sections[i].file_offset += delta; + for (i = 0; i < img->nsegments; ++i) img->segments[i].file_offset += delta; + for (i = 0; i < img->nsections; ++i) img->sections[i].file_offset += delta; for (i = 0; i < LinkRelocs_count(&img->relocs); ++i) LinkRelocs_at(&img->relocs, i)->write_file_offset += delta; } @@ -211,9 +210,8 @@ static void shift_image_addresses(LinkImage* img, u64 delta) { static int reloc_is_tlsle(RelocKind k) { return k == R_AARCH64_TLSLE_ADD_TPREL_HI12 || - k == R_AARCH64_TLSLE_ADD_TPREL_LO12_NC || - k == R_RV_TPREL_HI20 || k == R_RV_TPREL_LO12_I || - k == R_RV_TPREL_LO12_S; + k == R_AARCH64_TLSLE_ADD_TPREL_LO12_NC || k == R_RV_TPREL_HI20 || + k == R_RV_TPREL_LO12_I || k == R_RV_TPREL_LO12_S; } /* x86_64 SysV ABI: TLS variant II — the per-thread TLS image sits at @@ -235,8 +233,8 @@ static int reloc_is_abs(RelocKind k) { return k == R_ABS32 || k == R_ABS64; } * it's not resolvable here use the PLT trampoline" contract; the apply * pass overwrites S with the PLT entry vaddr in that case. */ static int reloc_is_branch26(RelocKind k) { - return k == R_AARCH64_CALL26 || k == R_AARCH64_JUMP26 || - k == R_X64_PLT32 || k == R_PLT32 || k == R_RV_CALL; + return k == R_AARCH64_CALL26 || k == R_AARCH64_JUMP26 || k == R_X64_PLT32 || + k == R_PLT32 || k == R_RV_CALL; } static void emit_dyn_record(LinkImage* img, u64 site_vaddr, u32 reloc_type, @@ -255,38 +253,22 @@ static void emit_dyn_record(LinkImage* img, u64 site_vaddr, u32 reloc_type, r->r_addend = addend; } -/* Dynamic-reloc type numbers are arch-specific (aarch64 starts at 1024; - * x86_64 lives in the low single-digit range; risc-v has its own - * encoding). Pick the right constant for the active target.arch so the - * loader recognizes our .rela.dyn entries. */ -static u32 dyn_reloc_type(LinkImage* img, u32 aarch64, u32 x86_64, u32 rv64) { - switch (img->c->target.arch) { - case CFREE_ARCH_ARM_64: - return aarch64; - case CFREE_ARCH_X86_64: - return x86_64; - case CFREE_ARCH_RV64: - return rv64; - default: - compiler_panic(img->c, no_loc(), - "link: dyn reloc type for arch %u not configured", - (u32)img->c->target.arch); - } +static const LinkArchDesc* elf_arch_or_panic(Compiler* c, const char* where) { + const LinkArchDesc* arch = link_arch_desc_for(c); + if (!arch || !arch->e_machine) + compiler_panic(c, no_loc(), "%s: no ELF arch descriptor", where); + return arch; } static void emit_relative_record(LinkImage* img, u64 site_vaddr, u64 addend) { - u32 t = dyn_reloc_type(img, ELF_R_AARCH64_RELATIVE, ELF_R_X86_64_RELATIVE, - ELF_R_RISCV_RELATIVE); - emit_dyn_record(img, site_vaddr, t, 0, (i64)addend); + const LinkArchDesc* arch = elf_arch_or_panic(img->c, "link"); + emit_dyn_record(img, site_vaddr, arch->elf_r_relative, 0, (i64)addend); } static void emit_globdat_record(LinkImage* img, u64 site_vaddr, u32 dynidx, i64 addend) { - u32 t = dyn_reloc_type(img, ELF_R_AARCH64_GLOB_DAT, ELF_R_X86_64_GLOB_DAT, - /* RISC-V dyn loader uses R_RISCV_64 for GOT-slot - * data imports — no dedicated GLOB_DAT type. */ - ELF_R_RISCV_64); - emit_dyn_record(img, site_vaddr, t, dynidx, addend); + const LinkArchDesc* arch = elf_arch_or_panic(img->c, "link"); + emit_dyn_record(img, site_vaddr, arch->elf_r_glob_dat, dynidx, addend); } /* RISC-V PCREL_LO12_* references the address of an AUIPC carrying the @@ -296,8 +278,7 @@ static void emit_globdat_record(LinkImage* img, u64 site_vaddr, u32 dynidx, * * Linear scan over img->relocs is fine in practice: kernel images and * cg cases produce at most a few hundred relocs total. */ -static i64 rv_pcrel_lo12_disp(LinkImage* img, u64 auipc_vaddr, - u64 img_base) { +static i64 rv_pcrel_lo12_disp(LinkImage* img, u64 auipc_vaddr, u64 img_base) { u32 i; for (i = 0; i < LinkRelocs_count(&img->relocs); ++i) { const LinkRelocApply* hi = LinkRelocs_at(&img->relocs, i); @@ -306,8 +287,7 @@ static i64 rv_pcrel_lo12_disp(LinkImage* img, u64 auipc_vaddr, if (hi->kind != R_RV_PCREL_HI20 && hi->kind != R_RV_GOT_HI20) continue; if (hi->write_vaddr + img_base != auipc_vaddr) continue; hi_tgt = LinkSyms_at(&img->syms, hi->target - 1); - hi_S = (hi_tgt->kind == SK_ABS) ? hi_tgt->vaddr - : hi_tgt->vaddr + img_base; + hi_S = (hi_tgt->kind == SK_ABS) ? hi_tgt->vaddr : hi_tgt->vaddr + img_base; hi_P = hi->write_vaddr + img_base; return (i64)hi_S + hi->addend - (i64)hi_P; } @@ -340,8 +320,7 @@ static void apply_all_relocs(LinkImage* img, u64 img_base) { * writes the full 32- or 64-bit signed value. */ i64 off = (i64)(tgt->vaddr - img->tls_vaddr) - (i64)img->tls_memsz; S = (u64)off; - } else if (r->kind == R_RV_PCREL_LO12_I || - r->kind == R_RV_PCREL_LO12_S) { + } else if (r->kind == R_RV_PCREL_LO12_I || r->kind == R_RV_PCREL_LO12_S) { /* PCREL_LO12: rewrite S so that link_reloc_apply's existing * LO12_I/LO12_S encoder produces the right low 12 bits of the * paired AUIPC's PC-relative displacement. The reloc's own @@ -351,8 +330,8 @@ static void apply_all_relocs(LinkImage* img, u64 img_base) { (size_t)(r->write_file_offset - seg->file_offset); { i64 disp = rv_pcrel_lo12_disp(img, tgt->vaddr + img_base, img_base); - RelocKind alias = (r->kind == R_RV_PCREL_LO12_I) ? R_RV_LO12_I - : R_RV_LO12_S; + RelocKind alias = + (r->kind == R_RV_PCREL_LO12_I) ? R_RV_LO12_I : R_RV_LO12_S; link_reloc_apply(img->c, alias, P_bytes, (u64)disp, 0, P); } continue; @@ -645,23 +624,8 @@ static u64 sec_flags_to_shf(u32 flags) { void link_emit_elf(LinkImage* img, Writer* w) { Heap* heap = img->heap; Compiler* c = img->c; - - u32 e_machine; - switch (c->target.arch) { - case CFREE_ARCH_ARM_64: - e_machine = EM_AARCH64; - break; - case CFREE_ARCH_X86_64: - e_machine = EM_X86_64; - break; - case CFREE_ARCH_RV64: - e_machine = EM_RISCV; - break; - default: - compiler_panic(c, no_loc(), - "link_emit_elf: unsupported target arch %u", - (u32)c->target.arch); - } + const LinkArchDesc* arch = elf_arch_or_panic(c, "link_emit_elf"); + u32 e_machine = arch->e_machine; if (img->entry_sym == LINK_SYM_NONE) compiler_panic(c, no_loc(), "link_emit_elf: no resolved entry symbol"); /* IFUNC trampolines: layout_iplt builds the .iplt stubs + .igot.plt @@ -696,11 +660,11 @@ void link_emit_elf(LinkImage* img, Writer* w) { u32 nphdr_extra_dyn = pie ? 4u : 0u; u32 nphdr_headers = scripted ? 0u : 1u; u32 nphdr_buildid = scripted ? 0u : 1u; - u32 nphdr_total = - nphdr_headers + img->nsegments + nphdr_buildid + has_tls + nphdr_extra_dyn; + u32 nphdr_total = nphdr_headers + img->nsegments + nphdr_buildid + has_tls + + nphdr_extra_dyn; u64 build_id_note_bytes = scripted ? 0ULL : BUILD_ID_NOTE_BYTES; - u64 headers_size = sizeof(Ehdr64) + (u64)nphdr_total * sizeof(Phdr64) + - build_id_note_bytes; + u64 headers_size = + sizeof(Ehdr64) + (u64)nphdr_total * sizeof(Phdr64) + build_id_note_bytes; u64 headers_load = ALIGN_UP(headers_size, (u64)PAGE_SIZE); /* The build-id note lives inside the headers PT_LOAD at this offset. */ diff --git a/src/link/link_internal.h b/src/link/link_internal.h @@ -157,12 +157,12 @@ LinkRelocApply* link_append_reloc_slot(LinkImage* img); /* Emit or upsert a synthetic global boundary symbol (link_layout.c). */ void link_emit_boundary_sym(Linker* l, LinkImage* img, const char* name, - u64 vaddr); + u64 vaddr); /* Detect __start_<X> / __stop_<X> with <X> a valid C identifier. * Defined in link_resolve.c; used by link_reloc_layout.c. */ int link_gc_split_start_stop(const char* s, size_t n, size_t* out_off, - size_t* out_len, int* out_is_start); + size_t* out_len, int* out_is_start); /* GC liveness helpers (link_resolve.c). */ int link_gc_live_get(const GcLive* g, u32 ii, ObjSecId j); @@ -171,7 +171,8 @@ int link_gc_live_get(const GcLive* g, u32 ii, ObjSecId j); u32 link_iplt_alloc_segments(LinkImage* img, u32 nseg); u32 link_iplt_alloc_sections(LinkImage* img, u32 nsec); -/* ---- Public entries (link_resolve.c) -------------------------------------- */ +/* ---- Public entries (link_resolve.c) -------------------------------------- + */ void link_ingest_archives(struct Linker*); void link_resolve_symbols(struct Linker*, LinkImage*); void link_resolve_undefs(struct Linker*, LinkImage*); @@ -180,23 +181,25 @@ void link_gc_live_alloc(GcLive* g, struct Linker* l, Heap* h); void link_gc_live_free(GcLive* g, Heap* h); void link_gc_drop_dead_globals(struct Linker*, LinkImage*, const GcLive*); -/* ---- Public entries (link_layout.c) --------------------------------------- */ +/* ---- Public entries (link_layout.c) --------------------------------------- + */ void link_layout_sections(struct Linker*, LinkImage*, const GcLive*); void link_layout_commons(struct Linker*, LinkImage*); void link_emit_segment_bytes(struct Linker*, LinkImage*); -/* ---- Public entries (link_reloc_layout.c) --------------------------------- */ +/* ---- Public entries (link_reloc_layout.c) --------------------------------- + */ void link_assign_symbol_vaddrs(struct Linker*, LinkImage*); void link_emit_array_boundaries(struct Linker*, LinkImage*); void link_emit_tls_boundaries(struct Linker*, LinkImage*); void link_emit_encoding_section_boundaries(struct Linker*, LinkImage*); void link_layout_jit_stubs(struct Linker*, LinkImage*, u32 map_size, - LinkSymId** stub_map_out); + LinkSymId** stub_map_out); void link_layout_got(struct Linker*, LinkImage*, u32 map_size, LinkSymId** got_map_out); void link_layout_iplt(struct Linker*, LinkImage*); void link_emit_relocations(struct Linker*, LinkImage*, const LinkSymId* got_map, - const LinkSymId* stub_map); + const LinkSymId* stub_map); void link_resolve_entry(struct Linker*, LinkImage*); /* Defined in link.c. Walks the Linker's inputs and records each input's @@ -430,9 +433,8 @@ struct LinkImage { void link_reloc_apply(Compiler*, RelocKind, u8* P_bytes, u64 S, i64 A, u64 P); /* Public link_emit_image_writer dispatches by Compiler.target.obj. The - * ELF implementation lives in link_elf.c and dispatches internally on - * Compiler.target.arch for e_machine and reloc translation. The Mach-O - * peer (link_macho.c) and COFF peer arrive in later phases. */ + * ELF and Mach-O writers get architecture identity from LinkArchDesc; + * reloc application remains keyed by RelocKind. COFF arrives later. */ void link_emit_elf(LinkImage*, Writer*); void link_emit_macho(LinkImage*, Writer*); diff --git a/src/link/link_layout.c b/src/link/link_layout.c @@ -160,7 +160,8 @@ void link_layout_sections(Linker* l, LinkImage* img, const GcLive* g) { ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; for (j = 1; j < obj_section_count(ob); ++j) { const Section* s = obj_section_get(ob, j); - if (!s || !link_section_kept(s) || !link_gc_live_get(g, ii, j)) continue; + if (!s || !link_section_kept(s) || !link_gc_live_get(g, ii, j)) + continue; entries[e].input_idx = ii; entries[e].obj_sec_id = j; entries[e].name = s->name; @@ -456,7 +457,7 @@ static Sym boundary_name(Linker* l, const char* name) { /* Upsert a global symbol with the given absolute vaddr. Satisfies any * prior undef ref in place; fans out to per-input duplicate name slots. */ void link_emit_boundary_sym(Linker* l, LinkImage* img, const char* name, - u64 vaddr) { + u64 vaddr) { Sym sym = boundary_name(l, name); LinkSymId id = symhash_get(&img->globals, sym); LinkSymbol rec; @@ -930,16 +931,19 @@ LinkImage* link_resolve(Linker* l) { link_emit_boundary_sym(l, img, "__dso_handle", 0); link_emit_boundary_sym(l, img, "_DYNAMIC", 0); link_emit_boundary_sym(l, img, "_GLOBAL_OFFSET_TABLE_", 0); - if (l->c->target.arch == CFREE_ARCH_RV64) { + { + const LinkArchDesc* arch = link_arch_desc_for(l->c); u32 si; u64 gp_vaddr = 0; - for (si = 0; si < img->nsegments; ++si) { - if (img->segments[si].flags & SF_WRITE) { - gp_vaddr = img->segments[si].vaddr + 0x800u; - break; + if (arch && arch->global_pointer_symbol) { + for (si = 0; si < img->nsegments; ++si) { + if (img->segments[si].flags & SF_WRITE) { + gp_vaddr = img->segments[si].vaddr + arch->global_pointer_rw_offset; + break; + } } + link_emit_boundary_sym(l, img, arch->global_pointer_symbol, gp_vaddr); } - link_emit_boundary_sym(l, img, "__global_pointer$", gp_vaddr); } link_resolve_undefs(l, img); link_gc_drop_dead_globals(l, img, &g); diff --git a/src/link/link_macho.c b/src/link/link_macho.c @@ -39,7 +39,6 @@ #include <string.h> -#include "arch/aa64_isa.h" #include "core/bytes.h" #include "core/heap.h" #include "core/pool.h" @@ -47,6 +46,7 @@ #include "core/util.h" #include "core/vec.h" #include "link/link.h" +#include "link/link_arch.h" #include "link/link_internal.h" #include "obj/macho.h" @@ -58,7 +58,6 @@ static SrcLoc no_loc(void) { /* ---- constants ---- */ #define MZ_PAGEZERO 0x100000000ULL #define MZ_PAGE 0x4000ULL -#define MZ_STUB_SIZE 12u #define MZ_GOT_SIZE 8u /* __DATA,__thread_ptrs slot size — one pointer per unique TLV referenced * via TLVP_LOAD_PAGE21/PAGEOFF12. Each slot holds the address of the @@ -261,6 +260,7 @@ typedef struct MCtx { Heap* h; Writer* w; Linker* linker; + const LinkArchDesc* arch; /* imports */ MachImp* imports; @@ -354,33 +354,6 @@ static LinkSymbol* sym_at(LinkImage* img, LinkSymId id) { return LinkSyms_at(&img->syms, id - 1); } -/* ---- pass: build __stubs and __got bytes ---- */ - -static void write_u32(u8* p, u32 v) { wr_u32_le(p, v); } - -/* Mach-O __stubs scratch register: x16 (matches the AArch64 PLT ABI's - * IP0 scratch convention used for both ADRP base and BR target). */ -#define MZ_STUB_SCRATCH_X16 16u - -/* Encode a 12-byte arm64 stub: - * ADRP x16, __got_slot@PAGE - * LDR x16, [x16, #__got_slot@PAGEOFF] - * BR x16 - */ -static void encode_stub(u8* out, u64 stub_vaddr, u64 got_slot_vaddr) { - i64 page_s = ((i64)got_slot_vaddr) & ~(i64)0xfff; - i64 page_p = ((i64)stub_vaddr) & ~(i64)0xfff; - i64 imm21 = (page_s - page_p) >> 12; - u32 immlo = (u32)(imm21 & 0x3u); - u32 immhi = (u32)((imm21 >> 2) & 0x7ffffu); - u32 lo12 = (u32)(got_slot_vaddr & 0xfffu); - u32 imm12_ldr = (lo12 >> 3) & 0xfffu; /* slot 8-byte aligned */ - write_u32(out + 0, aa64_adrp(MZ_STUB_SCRATCH_X16, immlo, immhi)); - write_u32(out + 4, aa64_ldr64_uimm12(MZ_STUB_SCRATCH_X16, - MZ_STUB_SCRATCH_X16, imm12_ldr)); - write_u32(out + 8, aa64_br(MZ_STUB_SCRATCH_X16)); -} - /* ---- pass: collect imports ---- */ static u32 dylib_ordinal_of(MCtx* x, Sym install) { @@ -421,7 +394,8 @@ static void collect_imports(MCtx* x) { /* Back-classify: any CALL26/JUMP26 reloc target -> function. */ for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) { LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); - if (r->kind != R_AARCH64_CALL26 && r->kind != R_AARCH64_JUMP26) continue; + if (!x->arch->is_branch_reloc || !x->arch->is_branch_reloc(r->kind)) + continue; if (r->target == LINK_SYM_NONE || r->target >= x->sym_to_imp_size) continue; u32 idx = x->sym_to_imp[r->target]; if (!idx) { @@ -488,8 +462,7 @@ static void collect_imports(MCtx* x) { * post-ASLR. */ for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) { LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); - if (r->kind != R_AARCH64_ADR_GOT_PAGE && - r->kind != R_AARCH64_LD64_GOT_LO12_NC) + if (!x->arch->is_got_load_reloc || !x->arch->is_got_load_reloc(r->kind)) continue; if (r->target == LINK_SYM_NONE || r->target >= x->sym_to_imp_size) continue; if (x->sym_to_imp[r->target]) continue; @@ -562,9 +535,7 @@ static void collect_tlv(MCtx* x) { u32 cap = 0; for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) { LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); - if (r->kind != R_AARCH64_TLVP_LOAD_PAGE21 && - r->kind != R_AARCH64_TLVP_LOAD_PAGEOFF12) - continue; + if (!x->arch->is_tlvp_reloc || !x->arch->is_tlvp_reloc(r->kind)) continue; if (r->target == LINK_SYM_NONE || r->target >= x->sym_to_tlv_size) continue; /* Resolve through canonical so multiple per-input duplicate undefs * collapse onto one __thread_ptrs slot. */ @@ -745,7 +716,7 @@ static void plan_layout(MCtx* x) { /* __stubs synthetic */ if (x->nimport_funcs) { - x->stubs_size = x->nimport_funcs * MZ_STUB_SIZE; + x->stubs_size = x->nimport_funcs * x->arch->macho_stub_size; x->stubs_bytes = (u8*)h->alloc(h, x->stubs_size, 4); if (!x->stubs_bytes) compiler_panic(x->c, no_loc(), "link_macho: oom on stubs"); @@ -761,7 +732,7 @@ static void plan_layout(MCtx* x) { m->segidx = 1; m->flags = 0x80000000u | 0x00000400u | 0x00000008u /*S_SYMBOL_STUBS*/; m->reserved1 = 0; /* fill in later: indirect-symtab base */ - m->reserved2 = MZ_STUB_SIZE; + m->reserved2 = x->arch->macho_stub_size; } x->segs[1].nsects = x->nsecs - first_text_sec; x->segs[1].first_sec = first_text_sec; @@ -1034,10 +1005,12 @@ static void plan_layout(MCtx* x) { for (u32 i = 0; i < x->nimports; ++i) { MachImp* mi = &x->imports[i]; if (!mi->is_func || !mi->stub_idx) continue; - u64 stub_v = x->stubs_vaddr + (mi->stub_idx - 1u) * MZ_STUB_SIZE; + u64 stub_v = + x->stubs_vaddr + (mi->stub_idx - 1u) * x->arch->macho_stub_size; u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE; - encode_stub(x->stubs_bytes + (mi->stub_idx - 1u) * MZ_STUB_SIZE, stub_v, - got_v); + x->arch->emit_macho_stub( + x->stubs_bytes + (mi->stub_idx - 1u) * x->arch->macho_stub_size, stub_v, + got_v); } /* Phase B: build OutSec[] now that all MSec vaddrs are final. Walk @@ -1329,8 +1302,7 @@ static void apply_relocs(MCtx* x, FixList* fl) { * before the import / internal split because an imported TLV * descriptor doesn't use the __got slot (its address lives in * __thread_ptrs with its own chained bind). */ - if (r->kind == R_AARCH64_TLVP_LOAD_PAGE21 || - r->kind == R_AARCH64_TLVP_LOAD_PAGEOFF12) { + if (x->arch->is_tlvp_reloc && x->arch->is_tlvp_reloc(r->kind)) { u32 tlv_idx = (r->target < x->sym_to_tlv_size) ? x->sym_to_tlv[r->target] : 0u; if (!tlv_idx) @@ -1343,55 +1315,44 @@ static void apply_relocs(MCtx* x, FixList* fl) { if (is_imp) { MachImp* mi = (imp_idx > 0) ? &x->imports[imp_idx - 1] : NULL; - switch (r->kind) { - case R_AARCH64_CALL26: - case R_AARCH64_JUMP26: { - if (!mi || !mi->stub_idx) - compiler_panic(x->c, no_loc(), - "link_macho: import has no stub for branch"); - u64 stub_v = x->stubs_vaddr + (mi->stub_idx - 1u) * MZ_STUB_SIZE; - link_reloc_apply(x->c, r->kind, P_bytes, stub_v, r->addend, P); - continue; - } - case R_AARCH64_ADR_GOT_PAGE: - case R_AARCH64_LD64_GOT_LO12_NC: { - if (!mi) - compiler_panic(x->c, no_loc(), - "link_macho: GOT reloc for unknown import"); - u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE; - link_reloc_apply(x->c, r->kind, P_bytes, got_v, r->addend, P); - continue; - } - case R_AARCH64_ADR_PREL_PG_HI21: - case R_AARCH64_ADR_PREL_PG_HI21_NC: - case R_AARCH64_ADD_ABS_LO12_NC: - case R_AARCH64_LDST8_ABS_LO12_NC: - case R_AARCH64_LDST16_ABS_LO12_NC: - case R_AARCH64_LDST32_ABS_LO12_NC: - case R_AARCH64_LDST64_ABS_LO12_NC: - case R_AARCH64_LDST128_ABS_LO12_NC: { - /* Direct page/lo12 against an import: route through __got - * by treating the reloc as the GOT-load form. */ - if (!mi) - compiler_panic(x->c, no_loc(), - "link_macho: PAGE/LO12 against unknown import"); - u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE; - link_reloc_apply(x->c, r->kind, P_bytes, got_v, r->addend, P); - continue; - } - case R_ABS64: { - /* Direct 8-byte absolute against an import: bind the slot. */ - wr_u64_le(P_bytes, 0); - FixSite fs = {(u8)msec->segidx, 1, {0}, (u32)imp_idx, P, 0}; - fix_push(fl, &fs); - continue; - } - default: + if (x->arch->is_branch_reloc && x->arch->is_branch_reloc(r->kind)) { + if (!mi || !mi->stub_idx) + compiler_panic(x->c, no_loc(), + "link_macho: import has no stub for branch"); + u64 stub_v = + x->stubs_vaddr + (mi->stub_idx - 1u) * x->arch->macho_stub_size; + link_reloc_apply(x->c, r->kind, P_bytes, stub_v, r->addend, P); + continue; + } + if (x->arch->is_got_load_reloc && x->arch->is_got_load_reloc(r->kind)) { + if (!mi) compiler_panic(x->c, no_loc(), - "link_macho: unhandled reloc kind %u against " - "imported symbol", - (u32)r->kind); + "link_macho: GOT reloc for unknown import"); + u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE; + link_reloc_apply(x->c, r->kind, P_bytes, got_v, r->addend, P); + continue; + } + if (x->arch->is_direct_page_reloc && + x->arch->is_direct_page_reloc(r->kind)) { + /* Direct page/lo12 against an import: route through __got. */ + if (!mi) + compiler_panic(x->c, no_loc(), + "link_macho: PAGE/LO12 against unknown import"); + u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE; + link_reloc_apply(x->c, r->kind, P_bytes, got_v, r->addend, P); + continue; + } + if (r->kind == R_ABS64) { + /* Direct 8-byte absolute against an import: bind the slot. */ + wr_u64_le(P_bytes, 0); + FixSite fs = {(u8)msec->segidx, 1, {0}, (u32)imp_idx, P, 0}; + fix_push(fl, &fs); + continue; } + compiler_panic(x->c, no_loc(), + "link_macho: unhandled reloc kind %u against imported " + "symbol", + (u32)r->kind); } /* Internal relocs. */ @@ -1420,8 +1381,8 @@ static void apply_relocs(MCtx* x, FixList* fl) { * for any extern global, even if the def is in-image). imp_idx * was populated by collect_imports' internal-GOT pass; redirect * the page/lo12 reloc to the GOT slot's vaddr. */ - if (imp_idx > 0 && (r->kind == R_AARCH64_ADR_GOT_PAGE || - r->kind == R_AARCH64_LD64_GOT_LO12_NC)) { + if (imp_idx > 0 && x->arch->is_got_load_reloc && + x->arch->is_got_load_reloc(r->kind)) { MachImp* mi = &x->imports[imp_idx - 1]; u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE; link_reloc_apply(x->c, r->kind, P_bytes, got_v, r->addend, P); @@ -2237,11 +2198,12 @@ void link_emit_macho(LinkImage* img, Writer* w) { x.h = img->heap; x.w = w; x.linker = img->linker; + x.arch = link_arch_desc_for(img->c); - if (x.c->target.arch != CFREE_ARCH_ARM_64) + if (!x.arch || !x.arch->macho_cputype || !x.arch->emit_macho_stub || + !x.arch->macho_stub_size) compiler_panic(x.c, no_loc(), - "link_emit_macho: only arm64 supported (arch=%u)", - (u32)x.c->target.arch); + "link_emit_macho: no Mach-O descriptor for target"); if (img->entry_sym == LINK_SYM_NONE) compiler_panic(x.c, no_loc(), "link_emit_macho: no resolved entry"); @@ -2445,8 +2407,8 @@ void link_emit_macho(LinkImage* img, Writer* w) { * main, fn_starts, data_in_code, codesig) = 11 */ mbuf_u32(&file, MH_MAGIC_64); - mbuf_u32(&file, CPU_TYPE_ARM64); - mbuf_u32(&file, CPU_SUBTYPE_ARM64_ALL); + mbuf_u32(&file, x.arch->macho_cputype); + mbuf_u32(&file, x.arch->macho_cpusubtype); mbuf_u32(&file, MH_EXECUTE); mbuf_u32(&file, ncmds); mbuf_u32(&file, lc.len); diff --git a/src/link/link_reloc_layout.c b/src/link/link_reloc_layout.c @@ -210,7 +210,8 @@ void link_emit_encoding_section_boundaries(Linker* l, LinkImage* img) { if (sym->defined) continue; if (sym->name == 0) continue; nm = pool_str(l->c->global, sym->name, &namelen); - if (!link_gc_split_start_stop(nm, namelen, &off, &ilen, &is_start)) continue; + if (!link_gc_split_start_stop(nm, namelen, &off, &ilen, &is_start)) + continue; secname = pool_intern(l->c->global, nm + off, ilen); for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; @@ -382,7 +383,7 @@ u32 link_iplt_alloc_sections(LinkImage* img, u32 nsec) { /* ---- pass: JIT call stubs ---- */ void link_layout_jit_stubs(Linker* l, LinkImage* img, u32 map_size, - LinkSymId** stub_map_out) { + LinkSymId** stub_map_out) { Heap* h = img->heap; const LinkArchDesc* arch; LinkSymId* stub_map; @@ -402,11 +403,9 @@ void link_layout_jit_stubs(Linker* l, LinkImage* img, u32 map_size, u8* stubs_bytes; *stub_map_out = NULL; - if (l->emit_static_exe) return; - if (l->c->target.arch != CFREE_ARCH_ARM_64) return; - arch = link_arch_desc_for(l->c); - if (!arch) return; + if (l->emit_static_exe) return; + if (!arch || !arch->needs_jit_call_stub) return; stub_map = (LinkSymId*)h->alloc(h, sizeof(*stub_map) * map_size, _Alignof(LinkSymId)); @@ -425,7 +424,7 @@ void link_layout_jit_stubs(Linker* l, LinkImage* img, u32 map_size, const LinkSymbol* tgt; if (!s || !link_section_kept(s)) continue; if (m->section[r->section_id] == LINK_SEC_NONE) continue; - if (r->kind != R_AARCH64_CALL26 && r->kind != R_AARCH64_JUMP26) continue; + if (!arch->needs_jit_call_stub(r->kind)) continue; if (r->sym == OBJ_SYM_NONE || r->sym >= m->nsym) continue; target = m->sym[r->sym]; if (target == LINK_SYM_NONE) continue; @@ -575,8 +574,8 @@ void link_layout_jit_stubs(Linker* l, LinkImage* img, u32 map_size, rrec.input_id = LINK_INPUT_NONE; rrec.section_id = OBJ_SEC_NONE; rrec.link_section_id = stubs_sec->id; - rrec.offset = (u32)(i * arch->iplt_stub_size) + - stub_relocs[ri].offset_in_stub; + rrec.offset = + (u32)(i * arch->iplt_stub_size) + stub_relocs[ri].offset_in_stub; rrec.width = stub_relocs[ri].width; rrec.write_vaddr = stub_vaddr + stub_relocs[ri].offset_in_stub; rrec.write_file_offset = rrec.write_vaddr; @@ -624,8 +623,8 @@ void link_layout_got(Linker* l, LinkImage* img, u32 map_size, *got_map_out = NULL; - got_map = (LinkSymId*)h->alloc(h, sizeof(*got_map) * map_size, - _Alignof(LinkSymId)); + got_map = + (LinkSymId*)h->alloc(h, sizeof(*got_map) * map_size, _Alignof(LinkSymId)); if (!got_map) compiler_panic(img->c, no_loc(), "link: oom on got map"); memset(got_map, 0, sizeof(*got_map) * map_size); @@ -801,9 +800,7 @@ void link_layout_iplt(Linker* l, LinkImage* img) { Sym init_section_name; const LinkArchDesc* arch = link_arch_desc_for(l->c); if (!arch) - compiler_panic(img->c, no_loc(), - "link: layout_iplt: no arch descriptor for arch %u", - (u32)l->c->target.arch); + compiler_panic(img->c, no_loc(), "link: layout_iplt: no arch descriptor"); for (i = 0; i < LinkSyms_count(&img->syms); ++i) { const LinkSymbol* s = LinkSyms_at(&img->syms, i); @@ -1177,9 +1174,9 @@ void link_resolve_entry(Linker* l, LinkImage* img) { /* ---- pass 4: emit reloc records ---- */ -void link_emit_relocations(Linker* l, LinkImage* img, - const LinkSymId* got_map, - const LinkSymId* stub_map) { +void link_emit_relocations(Linker* l, LinkImage* img, const LinkSymId* got_map, + const LinkSymId* stub_map) { + const LinkArchDesc* arch = link_arch_desc_for(l->c); u32 ii; for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) { ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj; @@ -1210,8 +1207,8 @@ void link_emit_relocations(Linker* l, LinkImage* img, compiler_panic(l->c, no_loc(), "link: GOT slot missing for symbol"); target = slot; } - if (stub_map && (r->kind == R_AARCH64_CALL26 || - r->kind == R_AARCH64_JUMP26)) { + if (stub_map && arch && arch->needs_jit_call_stub && + arch->needs_jit_call_stub(r->kind)) { LinkSymId stub = stub_map[target]; if (stub != LINK_SYM_NONE) target = stub; }