kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit d0785fcb8283dadb957a8afb00dd246b4f59e461
parent b56c3ece7c3472325f2596146c84be3b61cf2079
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon, 25 May 2026 03:45:32 -0700

Refactor object formats into gated directories

Diffstat:
MMakefile | 18+++++++++++++++++-
Mdoc/REGISTRY.md | 118+++++++++++++++++++++++++++++++++++++++----------------------------------------
Msrc/api/stubs.c | 2+-
Msrc/arch/aa64/arch.c | 35++++-------------------------------
Msrc/arch/aa64/link.c | 22++--------------------
Msrc/arch/arch.h | 67+++++++++++++++++++------------------------------------------------
Msrc/arch/registry.c | 37++++---------------------------------
Msrc/arch/rv64/arch.c | 13++-----------
Msrc/arch/rv64/link.c | 8--------
Msrc/arch/x64/arch.c | 31++-----------------------------
Msrc/arch/x64/link.c | 38++++++++++++--------------------------
Msrc/emu/elf_load.c | 26+++++++++++++-------------
Msrc/link/link.c | 105+++++++++++++++++++++++++++++++++++++++++--------------------------------------
Msrc/link/link_arch.h | 40----------------------------------------
Dsrc/link/link_coff.c | 1748-------------------------------------------------------------------------------
Dsrc/link/link_dyn.c | 982-------------------------------------------------------------------------------
Dsrc/link/link_elf.c | 1417-------------------------------------------------------------------------------
Msrc/link/link_internal.h | 8--------
Msrc/link/link_jit.c | 21+++++++++++----------
Msrc/link/link_layout.c | 20+++++++++-----------
Dsrc/link/link_macho.c | 2603-------------------------------------------------------------------------------
Rsrc/obj/coff_archive.c -> src/obj/coff/archive.c | 0
Rsrc/obj/coff.h -> src/obj/coff/coff.h | 0
Asrc/obj/coff/emit.c | 732+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/obj/coff/link.c | 1731+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/obj/coff/read.c | 739+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/obj/coff/read_dso.c | 237+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/obj/coff/reloc_aarch64.c | 96+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/obj/coff/reloc_x86_64.c | 76++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dsrc/obj/coff_emit.c | 732-------------------------------------------------------------------------------
Dsrc/obj/coff_read.c | 722-------------------------------------------------------------------------------
Dsrc/obj/coff_read_dso.c | 236-------------------------------------------------------------------------------
Dsrc/obj/coff_reloc_aarch64.c | 96-------------------------------------------------------------------------------
Dsrc/obj/coff_reloc_x86_64.c | 76----------------------------------------------------------------------------
Rsrc/obj/elf.h -> src/obj/elf/elf.h | 0
Asrc/obj/elf/emit.c | 752+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/obj/elf/link.c | 1421+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/obj/elf/link_dyn.c | 992+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/obj/elf/read.c | 694+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/obj/elf/reloc_aarch64.c | 182+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/obj/elf/reloc_riscv64.c | 182+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/obj/elf/reloc_x86_64.c | 134+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dsrc/obj/elf_emit.c | 751-------------------------------------------------------------------------------
Dsrc/obj/elf_read.c | 684-------------------------------------------------------------------------------
Dsrc/obj/elf_reloc_aarch64.c | 182-------------------------------------------------------------------------------
Dsrc/obj/elf_reloc_riscv64.c | 182-------------------------------------------------------------------------------
Dsrc/obj/elf_reloc_x86_64.c | 134-------------------------------------------------------------------------------
Msrc/obj/format.h | 52++++++++++++++++++++++++++++++++++++++++++++++++++--
Asrc/obj/macho/emit.c | 797+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/obj/macho/link.c | 2613+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Rsrc/obj/macho.h -> src/obj/macho/macho.h | 0
Asrc/obj/macho/read.c | 651+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/obj/macho/reloc_aarch64.c | 113+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/obj/macho/reloc_x86_64.c | 84+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Rsrc/obj/tbd_read.c -> src/obj/macho/tbd_read.c | 0
Dsrc/obj/macho_emit.c | 791-------------------------------------------------------------------------------
Dsrc/obj/macho_read.c | 635-------------------------------------------------------------------------------
Dsrc/obj/macho_reloc_aarch64.c | 113-------------------------------------------------------------------------------
Dsrc/obj/macho_reloc_x86_64.c | 84-------------------------------------------------------------------------------
Msrc/obj/registry.c | 199++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Mtest/coff/README.md | 2+-
Mtest/coff/cfree-roundtrip-coff.c | 94+++++++++++++++++++++++++++++++++++++++----------------------------------------
Mtest/coff/pe-dso-forwarder.c | 116++++++++++++++++++++++++++++++++++++++++----------------------------------------
Mtest/elf/unit/smoke.c | 2+-
Mtest/emu/rv64_extras_test.c | 125+++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------
Mtest/emu/rv64_smoke_test.c | 51+++++++++++++++++++++++++--------------------------
66 files changed, 12893 insertions(+), 12751 deletions(-)

diff --git a/Makefile b/Makefile @@ -33,13 +33,20 @@ LIB_SRCS_NONARCH = $(shell find src -name '*.c' \ -not -path 'src/arch/aa64/*' \ -not -path 'src/arch/x64/*' \ -not -path 'src/arch/rv64/*' \ - -not -path 'src/arch/c_target/*') + -not -path 'src/arch/c_target/*' \ + -not -path 'src/obj/elf/*' \ + -not -path 'src/obj/macho/*' \ + -not -path 'src/obj/coff/*') LIB_SRCS_ARCH_AA64 = $(shell find src/arch/aa64 -name '*.c' 2>/dev/null) LIB_SRCS_ARCH_X64 = $(shell find src/arch/x64 -name '*.c' 2>/dev/null) LIB_SRCS_ARCH_RV64 = $(shell find src/arch/rv64 -name '*.c' 2>/dev/null) LIB_SRCS_ARCH_C_TARGET = $(shell find src/arch/c_target -name '*.c' 2>/dev/null) +LIB_SRCS_OBJ_ELF = $(shell find src/obj/elf -name '*.c' 2>/dev/null) +LIB_SRCS_OBJ_MACHO = $(shell find src/obj/macho -name '*.c' 2>/dev/null) +LIB_SRCS_OBJ_COFF = $(shell find src/obj/coff -name '*.c' 2>/dev/null) + LIB_SRCS = $(LIB_SRCS_NONARCH) ifeq ($(CFREE_ARCH_AA64_ENABLED),1) LIB_SRCS += $(LIB_SRCS_ARCH_AA64) @@ -53,6 +60,15 @@ endif ifeq ($(CFREE_ARCH_C_TARGET_ENABLED),1) LIB_SRCS += $(LIB_SRCS_ARCH_C_TARGET) endif +ifeq ($(CFREE_OBJ_ELF_ENABLED),1) +LIB_SRCS += $(LIB_SRCS_OBJ_ELF) +endif +ifeq ($(CFREE_OBJ_MACHO_ENABLED),1) +LIB_SRCS += $(LIB_SRCS_OBJ_MACHO) +endif +ifeq ($(CFREE_OBJ_COFF_ENABLED),1) +LIB_SRCS += $(LIB_SRCS_OBJ_COFF) +endif # Per-frontend source sets. Each is gated by its CFREE_LANG_*_ENABLED flag # from mk/config.mk so the matching `#if` in src/api/lang_registry.c and diff --git a/doc/REGISTRY.md b/doc/REGISTRY.md @@ -33,59 +33,49 @@ Hand-edited today; a future `configure` script can generate it. #define CFREE_CONFIG_H /* Backend architectures. */ -#define CFREE_ARCH_AA64 1 -#define CFREE_ARCH_X64 1 -#define CFREE_ARCH_RV64 1 -#define CFREE_ARCH_C_TARGET 0 +#define CFREE_ARCH_AA64_ENABLED 1 +#define CFREE_ARCH_X64_ENABLED 1 +#define CFREE_ARCH_RV64_ENABLED 1 +#define CFREE_ARCH_C_TARGET_ENABLED 1 -/* Object/image formats. Each gates emit + read + link-image paths and - * the matching arch-side reloc tables. */ -#define CFREE_OBJ_ELF 1 -#define CFREE_OBJ_MACHO 1 -#define CFREE_OBJ_COFF 1 +/* Object/image formats. */ +#define CFREE_OBJ_ELF_ENABLED 1 +#define CFREE_OBJ_MACHO_ENABLED 1 +#define CFREE_OBJ_COFF_ENABLED 1 /* Language frontends. CFREE_LANG_ASM is unconditional: the assembler * lives inside libcfree as part of the codegen substrate. */ -#define CFREE_LANG_C_ENABLED 1 -#define CFREE_LANG_TOY_ENABLED 1 -#define CFREE_LANG_WASM_ENABLED 1 +#define CFREE_LANG_CPP_ENABLED 1 +#define CFREE_LANG_C_ENABLED 1 +#define CFREE_LANG_TOY_ENABLED 1 +#define CFREE_LANG_WASM_ENABLED 1 #endif ``` -A `config.c` adds `_Static_assert` checks that at least one arch and at -least one obj format are enabled. +`src/core/config_assert.c` adds `_Static_assert` checks that at least +one arch and at least one obj format are enabled. ## Axis 1: Backend architectures -**Status: vtable and registry already exist; only gating is new.** +**Status: vtable, registry, and source gating are done.** -- **Vtable**: `ArchImpl` (`src/arch/arch.h:1098`). Carries sub-pointers - for the format-specific reloc tables (`elf`, `macho`, `coff`), - `dwarf`/`dbg` hooks, the `link` arch descriptor, the per-OS - `abi_vtable` dispatcher, and the codegen/assembler/disassembler - factories. +- **Vtable**: `ArchImpl` (`src/arch/arch.h`). Carries `dwarf`/`dbg` + hooks, the `link` arch descriptor, the per-OS `abi_vtable` + dispatcher, and the codegen/assembler/disassembler factories. - **Registry**: `src/arch/registry.c` already holds a static - `arch_impls[]` array and exposes `arch_lookup`, - `arch_lookup_elf_machine`, `arch_lookup_macho_cputype`, - `arch_lookup_coff_machine`. -- **Change**: wrap each entry (and its `extern const ArchImpl` - declaration) with `#if CFREE_ARCH_<NAME>`. The `Makefile` switches - from globbed arch sources to per-arch object groups gated by the same - flags, so disabled archs are neither compiled nor linked. - -No new vtable or registry code; this axis is the cheapest of the four. + `arch_impls[]` array and exposes `arch_lookup`. +- **Build**: each `src/arch/<name>/` source group is gated by the + matching `CFREE_ARCH_*_ENABLED` flag. ## Axis 2: Object/image formats -**Status: vtable and registry exist; source gating remains.** +**Status: vtable, registry, format-arch ops, and directory-based source +gating are done.** -`emit_elf` / `emit_macho` / `emit_coff` are still implemented as -format-specific functions (`src/obj/{elf,macho,coff}_emit.c`), as are -the read paths (`*_read.c`), DSO readers (`*_read_dso.c`), and -link-image emitters (`src/link/link_{elf,macho,coff}.c`). Generic call -sites now reach them through `ObjFormatImpl` in `src/obj/format.h` and -`src/obj/registry.c`. +`emit_elf` / `emit_macho` / `emit_coff` and their read/link paths live +under `src/obj/{elf,macho,coff}/`. Generic call sites reach them through +`ObjFormatImpl` in `src/obj/format.h` and `src/obj/registry.c`. **Vtable** (`src/obj/format.h`): @@ -108,6 +98,16 @@ typedef struct ObjFormatImpl { /* Link-image emit (executable / shared object). */ void (*link_emit)(LinkImage*, Writer*); + void (*layout_dyn)(Linker*, LinkImage*); + void (*free_dyn)(LinkImage*); + + /* Format-owned arch mappings and relocation wire encoders. */ + const ObjElfArchOps* (*elf_arch)(CfreeArchKind); + const ObjElfArchOps* (*elf_machine)(u32 e_machine); + const ObjMachoArchOps* (*macho_arch)(CfreeArchKind); + const ObjMachoArchOps* (*macho_cputype)(u32 cputype); + const ObjCoffArchOps* (*coff_arch)(CfreeArchKind); + const ObjCoffArchOps* (*coff_machine)(u16 machine); /* Optional format-specific linker input policy. */ int (*classify_obj_input)(Compiler*, ObjBuilder*, Sym* soname_out); @@ -142,13 +142,14 @@ static const ObjFormatImpl* const obj_format_impls[] = { **Call-site changes**: the switch in `src/link/link.c` and the obj emit/read entry points have collapsed to `obj_format_lookup(target.obj_format)->fn(...)`. COFF short-import and -long-form import-archive handling now live behind object-format hooks, -so linker input ingestion stays generic over the object format. +long-form import-archive handling, ELF dynamic layout, and +(arch × format) relocation wire metadata now live behind object-format +hooks, so the linker and object entry points stay generic over the +object format implementation. -The arch-side format reloc tables (`ArchElfOps`, `ArchMachoOps`, -`ArchCoffOps` on `ArchImpl`) stay where they are — they're the -(arch × format) intersection and naturally drop out when either side -isn't compiled in. +**Build**: `Makefile` excludes `src/obj/elf/`, `src/obj/macho/`, and +`src/obj/coff/` from the shared source glob, then adds each directory +back only when the matching `CFREE_OBJ_*_ENABLED` flag is enabled. ## Axis 3: ABIs (derived) @@ -209,27 +210,24 @@ isn't compiled in. ## Summary -| Axis | Vtable | Registry | Net new work | -|-------------|-------------------------------------------------|----------------------------------------------|-----------------------------------------------------------| -| Arch | `ArchImpl` (exists) | `src/arch/registry.c` (exists) | `#if CFREE_ARCH_*` gates | -| Obj format | `ObjFormatImpl` (new) | `src/obj/registry.c` (new) | Extract `emit_*` / `read_*` / `link_emit_*` behind vtable | -| ABI | `ABIVtable` (exists) | per-arch `abi_dispatch` (exists) | Gate per-OS dispatch entries by obj-format flag | -| Frontend | `CfreeFrontendVTable` (exists) | `src/api/lang_registry.c` (new) | Per-frontend vtable extern + folded into `libcfree.a` | +| Axis | Vtable | Registry | Remaining work | +|------------|-----------------------|---------------------------|-------------------------------------------------| +| Arch | `ArchImpl` | `src/arch/registry.c` | none for registry/source gating | +| Obj format | `ObjFormatImpl` | `src/obj/registry.c` | move small policy checks into vtable as needed | +| ABI | `ABIVtable` | per-arch `abi_dispatch` | gate per-OS dispatch entries by obj-format flag | +| Frontend | `CfreeFrontendVTable` | `src/api/lang_registry.c` | none for registry/source gating | ## Implementation order -1. Land `include/cfree/config.h` with all flags set to `1` (no behavior - change). Add `_Static_assert` minimums in `config.c`. -2. Gate arch registry entries and arch Makefile sources. Verify with a - build that flips one arch off. -3. Extract `ObjFormatImpl` and the obj registry. Replace the switches - in `link.c` and the obj entry points. Verify a build with one obj - format off. -4. Gate per-ABI sources and per-OS dispatch entries. -5. Add `src/api/lang_registry.c`, expose `cfree_<lang>_frontend_vtable` - externs, fold `lang/<name>/*.c` into `libcfree.a` gated by - `CFREE_LANG_<NAME>_ENABLED`, and drop host-side registration calls - from `driver/env.c`. +1. Done: land `include/cfree/config.h` and `mk/config.mk` with all flags + enabled by default. +2. Done: gate arch registry entries and arch Makefile sources. +3. Done: extract `ObjFormatImpl`, move format code under + `src/obj/{elf,macho,coff}/`, and gate those directories. +4. Remaining: gate per-ABI sources and per-OS dispatch entries. +5. Done: add `src/api/lang_registry.c`, expose + `cfree_<lang>_frontend_vtable` externs, and fold frontends into + `libcfree.a`. Each step is independently testable and leaves the build green with the default all-on configuration. diff --git a/src/api/stubs.c b/src/api/stubs.c @@ -11,7 +11,7 @@ static _Noreturn void unimplemented(Compiler* c, const char* what) { } /* WASM emit/read remain stubs until those writers/readers land. - * COFF emit/read are implemented in src/obj/coff_emit.c and coff_read.c. */ + * COFF emit/read are implemented under src/obj/coff/. */ void emit_wasm(Compiler* c, ObjBuilder* o, Writer* w) { (void)o; diff --git a/src/arch/aa64/arch.c b/src/arch/aa64/arch.c @@ -8,9 +8,6 @@ #include "arch/aa64/regs.h" #include "core/bytes.h" #include "link/link_arch.h" -#include "obj/coff.h" -#include "obj/elf.h" -#include "obj/macho.h" #include "obj/obj.h" extern const LinkArchDesc link_arch_aa64; @@ -45,33 +42,11 @@ static void aa64_wr_u64_target(Compiler* c, u8* p, u64 v) { } } -static const ArchElfOps aa64_elf_ops = { - .e_machine = EM_AARCH64, - .e_flags = 0, - .reloc_to = elf_aarch64_reloc_to, - .reloc_from = elf_aarch64_reloc_from, -}; - -static const ArchMachoOps aa64_macho_ops = { - .cputype = CPU_TYPE_ARM64, - .cpusubtype = CPU_SUBTYPE_ARM64_ALL, - .reloc_to = macho_aarch64_reloc_to, - .reloc_pcrel = macho_aarch64_reloc_pcrel, - .reloc_length = macho_aarch64_reloc_length, - .reloc_from = macho_aarch64_reloc_from, -}; - static const ArchDwarfOps aa64_dwarf_ops = { .min_inst_len = 4u, .max_ops_per_inst = 1u, }; -static const ArchCoffOps aa64_coff_ops = { - .machine = IMAGE_FILE_MACHINE_ARM64, - .reloc_to = coff_aarch64_reloc_to, - .reloc_from = coff_aarch64_reloc_from, -}; - static int aa64_apply_label_fixup(Compiler* c, const ArchLabelFixup* fx) { const Section* s; u8 cur[4]; @@ -110,8 +85,8 @@ static int aa64_apply_label_fixup(Compiler* c, const ArchLabelFixup* fx) { u32 imm21 = (u32)(fx->disp & 0x1fffffu); u32 immlo = imm21 & 0x3u; u32 immhi = (imm21 >> 2) & 0x7ffffu; - word = (word & ~((0x3u << 29) | (0x7ffffu << 5))) | - (immlo << 29) | (immhi << 5); + word = (word & ~((0x3u << 29) | (0x7ffffu << 5))) | (immlo << 29) | + (immhi << 5); break; } case R_AARCH64_INTRA_LABEL_ADDR: { @@ -169,7 +144,8 @@ static const CfreePredefinedMacro aa64_predefined_macros[] = { {CFREE_SLICE_LIT("__LP64__"), CFREE_SLICE_LIT("1")}, {CFREE_SLICE_LIT("__ORDER_LITTLE_ENDIAN__"), CFREE_SLICE_LIT("1234")}, {CFREE_SLICE_LIT("__ORDER_BIG_ENDIAN__"), CFREE_SLICE_LIT("4321")}, - {CFREE_SLICE_LIT("__BYTE_ORDER__"), CFREE_SLICE_LIT("__ORDER_LITTLE_ENDIAN__")}, + {CFREE_SLICE_LIT("__BYTE_ORDER__"), + CFREE_SLICE_LIT("__ORDER_LITTLE_ENDIAN__")}, {CFREE_SLICE_LIT("__LITTLE_ENDIAN__"), CFREE_SLICE_LIT("1")}, }; @@ -183,9 +159,6 @@ const ArchImpl arch_impl_aa64 = { .disasm_new = aa64_disasm_new, .apply_label_fixup = aa64_apply_label_fixup, .link = &link_arch_aa64, - .elf = &aa64_elf_ops, - .macho = &aa64_macho_ops, - .coff = &aa64_coff_ops, .dwarf = &aa64_dwarf_ops, .dbg = &aa64_dbg_ops, .predefined_macros = aa64_predefined_macros, diff --git a/src/arch/aa64/link.c b/src/arch/aa64/link.c @@ -15,8 +15,6 @@ #include "core/bytes.h" #include "core/core.h" #include "link/link_arch.h" -#include "obj/elf.h" -#include "obj/macho.h" #include "obj/obj.h" /* Fixed register assignments mandated by the AArch64 PLT ABI. */ @@ -145,8 +143,7 @@ static u32 aa64_emit_iplt_stub(u8* dst, u64 stub_vaddr, u64 slot_vaddr, * sees an unperturbed x30 / argument registers. Page+offset are * baked from the post-shift IAT slot vaddr; no apply-time reloc * needed because both ends move together under image-base shift. */ -static void aa64_emit_coff_iat_stub(u8* dst, u64 stub_vaddr, - u64 iat_slot_vaddr) { +void aa64_emit_coff_iat_stub(u8* dst, u64 stub_vaddr, u64 iat_slot_vaddr) { u32 immlo, immhi; aa64_adrp_imm_halves(stub_vaddr, iat_slot_vaddr, &immlo, &immhi); u32 lo12 = (u32)(iat_slot_vaddr & AA64_PAGE_MASK); @@ -160,7 +157,7 @@ static void aa64_emit_coff_iat_stub(u8* dst, u64 stub_vaddr, wr_u32_le(dst + 8, aa64_br(AA64_PLT_SCRATCH_X16)); } -static void aa64_emit_macho_stub(u8* out, u64 stub_vaddr, u64 got_slot_vaddr) { +void aa64_emit_macho_stub(u8* out, u64 stub_vaddr, u64 got_slot_vaddr) { i64 page_s = ((i64)got_slot_vaddr) & ~(i64)0xfff; i64 page_p = ((i64)stub_vaddr) & ~(i64)0xfff; i64 imm21 = (page_s - page_p) >> 12; @@ -205,16 +202,6 @@ static int aa64_is_direct_page_reloc(RelocKind kind) { } const LinkArchDesc link_arch_aa64 = { - .e_machine = EM_AARCH64, - .default_musl_interp = "/lib/ld-musl-aarch64.so.1", - - .elf_r_relative = ELF_R_AARCH64_RELATIVE, - .elf_r_glob_dat = ELF_R_AARCH64_GLOB_DAT, - .elf_r_jump_slot = ELF_R_AARCH64_JUMP_SLOT, - - .macho_cputype = CPU_TYPE_ARM64, - .macho_cpusubtype = CPU_SUBTYPE_ARM64_ALL, - .plt0_size = AA64_PLT0_SIZE, .plt_entry_size = AA64_PLT_ENTRY_SIZE, .iplt_stub_size = AA64_IPLT_STUB_SIZE, @@ -222,11 +209,6 @@ const LinkArchDesc link_arch_aa64 = { .emit_plt0 = aa64_emit_plt0, .emit_plt_entry = aa64_emit_plt_entry, .emit_iplt_stub = aa64_emit_iplt_stub, - .macho_stub_size = AA64_IPLT_STUB_SIZE, - .emit_macho_stub = aa64_emit_macho_stub, - - .coff_stub_size = AA64_IPLT_STUB_SIZE, - .emit_coff_iat_stub = aa64_emit_coff_iat_stub, .is_branch_reloc = aa64_is_branch_reloc, .is_got_load_reloc = aa64_is_got_load_reloc, diff --git a/src/arch/arch.h b/src/arch/arch.h @@ -256,9 +256,9 @@ typedef struct AliasRoot { typedef struct MemAccess { CfreeCgTypeId type; /* codegen object type accessed */ - u32 size; /* ABI byte size of this access */ - u32 align; /* known byte alignment; 0 means unknown */ - u16 flags; /* MemFlag */ + u32 size; /* ABI byte size of this access */ + u32 align; /* known byte alignment; 0 means unknown */ + u16 flags; /* MemFlag */ u16 addr_space; AliasRoot alias; } MemAccess; @@ -501,22 +501,22 @@ typedef struct CGScopeDesc { u8 pad[3]; Label break_label; /* explicit target for break; LABEL_NONE => target creates one */ - Label continue_label; /* explicit target for continue; LABEL_NONE for - non-loops */ - Operand cond; /* SCOPE_IF condition; ignored otherwise */ + Label continue_label; /* explicit target for continue; LABEL_NONE for + non-loops */ + Operand cond; /* SCOPE_IF condition; ignored otherwise */ CfreeCgTypeId result_type; /* reserved for structured expression results */ } CGScopeDesc; typedef enum AsmDir { ASM_IN, ASM_OUT, ASM_INOUT } AsmDir; typedef struct AsmConstraint { - const char* str; /* GCC-style: "r", "=&r", "+m", "i", "0" ... */ - Sym name; /* GCC `[name]` symbolic operand; 0 if absent */ + const char* str; /* GCC-style: "r", "=&r", "+m", "i", "0" ... */ + Sym name; /* GCC `[name]` symbolic operand; 0 if absent */ CfreeCgTypeId type; /* codegen type of the bound expression (output lvalue or input rvalue). Drives RegClass + width for the binder. NULL only for hand-built test constraints (binder falls back to a 64-bit int default). */ - u8 dir; /* AsmDir */ + u8 dir; /* AsmDir */ u8 pad[3]; } AsmConstraint; @@ -633,13 +633,13 @@ typedef struct CGSwitchCase { } CGSwitchCase; typedef struct CGSwitchDesc { - Operand selector; /* OPK_REG or OPK_IMM */ + Operand selector; /* OPK_REG or OPK_IMM */ CfreeCgTypeId selector_type; - Label default_label; /* LABEL_NONE means "fall through past the switch" */ + Label default_label; /* LABEL_NONE means "fall through past the switch" */ const CGSwitchCase* cases; u32 ncases; - u8 hint; /* CfreeCgSwitchHint */ - u8 opt_level; /* 0/1/2; reads policy in cg_lower_switch_default */ + u8 hint; /* CfreeCgSwitchHint */ + u8 opt_level; /* 0/1/2; reads policy in cg_lower_switch_default */ u8 pad[2]; } CGSwitchDesc; @@ -869,11 +869,11 @@ struct CGTarget { * value stack through load_const into OPK_REG. cg and opt's machinize/emit * both rely on this contract to pass small constants through without * burning a value-stack register on materialization. */ - void (*binop)(CGTarget*, BinOp, Operand dst /*REG*/, - Operand a /*REG|IMM*/, Operand b /*REG|IMM*/); + void (*binop)(CGTarget*, BinOp, Operand dst /*REG*/, Operand a /*REG|IMM*/, + Operand b /*REG|IMM*/); void (*unop)(CGTarget*, UnOp, Operand dst /*REG*/, Operand a /*REG|IMM*/); - void (*cmp)(CGTarget*, CmpOp, Operand dst /*REG*/, - Operand a /*REG|IMM*/, Operand b /*REG|IMM*/); /* materialize 0/1 */ + void (*cmp)(CGTarget*, CmpOp, Operand dst /*REG*/, Operand a /*REG|IMM*/, + Operand b /*REG|IMM*/); /* materialize 0/1 */ void (*convert)(CGTarget*, ConvKind, Operand dst, Operand src); /* ---- calls / return ---- @@ -1047,28 +1047,6 @@ struct ArchDisasm { typedef struct LinkArchDesc LinkArchDesc; typedef struct ABIVtable ABIVtable; -typedef struct ArchElfOps { - u32 e_machine; - u32 e_flags; - u32 (*reloc_to)(u32 kind); - u32 (*reloc_from)(u32 wire_type); -} ArchElfOps; - -typedef struct ArchMachoOps { - u32 cputype; - u32 cpusubtype; - u32 (*reloc_to)(u32 kind); - u32 (*reloc_pcrel)(u32 kind); - u32 (*reloc_length)(u32 kind); - u32 (*reloc_from)(u32 wire_type); -} ArchMachoOps; - -typedef struct ArchCoffOps { - u16 machine; /* IMAGE_FILE_MACHINE_* */ - u32 (*reloc_to)(u32 kind); - u32 (*reloc_from)(u32 wire_type); -} ArchCoffOps; - typedef struct ArchDwarfOps { /* DWARF .debug_line minimum instruction length and maximum operations per * instruction. Fixed-width ISAs normally use their instruction width; x86_64 @@ -1097,9 +1075,8 @@ typedef struct ArchDbgOps { CfreeStatus (*decode_insn)(const u8* bytes, u32 len, u64 pc, ArchDbgInsn* out); CfreeStatus (*build_displaced_shim)(const ArchDbgInsn* insn, - void* scratch_write, - u64 scratch_runtime, u32 scratch_cap, - u32* sentinel_off, + void* scratch_write, u64 scratch_runtime, + u32 scratch_cap, u32* sentinel_off, u64* fallthrough_pc); int (*is_call)(const ArchDbgInsn* insn); } ArchDbgOps; @@ -1134,9 +1111,6 @@ typedef struct ArchImpl { int (*apply_label_fixup)(Compiler*, const ArchLabelFixup*); const LinkArchDesc* link; - const ArchElfOps* elf; - const ArchMachoOps* macho; - const ArchCoffOps* coff; const ArchDwarfOps* dwarf; const ArchDbgOps* dbg; @@ -1161,9 +1135,6 @@ typedef struct ArchImpl { const ArchImpl* arch_lookup(CfreeArchKind); const ArchImpl* arch_for_compiler(const Compiler*); -const ArchImpl* arch_lookup_elf_machine(u32 e_machine); -const ArchImpl* arch_lookup_macho_cputype(u32 cputype); -const ArchImpl* arch_lookup_coff_machine(u16 machine); /* Pick the right CGBackend for a session given the compiler's target arch * and the per-emit CodeOptions. Returns &arch_for_compiler(c)->backend for diff --git a/src/arch/registry.c b/src/arch/registry.c @@ -3,8 +3,8 @@ * This file is the *only* place in the codebase that checks * CFREE_ARCH_*_ENABLED. Everything downstream operates on the registry's * outputs — `const CGBackend*` for session-level code emission, and - * `const ArchImpl*` for arch-specific metadata (ELF/Mach-O/COFF reloc - * tables, ABI selection, DWARF, debugger hooks, register file, etc.). + * `const ArchImpl*` for arch-specific metadata (ABI selection, DWARF, + * debugger hooks, register file, etc.). * * Conceptually: * - A CGBackend is "something that can build a CGTarget from a Compiler + @@ -32,8 +32,8 @@ extern const CGBackend cg_backend_c_target; #endif /* Arch-metadata roster. The arch_lookup_* helpers iterate this list when a - * caller needs ELF/Mach-O/COFF/ABI/etc. metadata — answers only come from - * backends that have an ArchImpl, so c_target is intentionally absent. + * caller needs ABI/debug/etc. metadata — answers only come from backends + * that have an ArchImpl, so c_target is intentionally absent. * cg_backend_for_session() picks the CGBackend (which is &impl->backend or * &cg_backend_c_target) without consulting this list. */ static const ArchImpl* const arch_impls[] = { @@ -64,35 +64,6 @@ const ArchImpl* arch_for_compiler(const Compiler* c) { return arch_lookup(c->target.arch); } -const ArchImpl* arch_lookup_elf_machine(u32 e_machine) { - for (u32 i = 0; i < arch_impls_count(); ++i) { - const ArchImpl* impl = arch_impls[i]; - if (impl->elf && impl->elf->e_machine == e_machine) return impl; - } - return NULL; -} - -const ArchImpl* arch_lookup_macho_cputype(u32 cputype) { - for (u32 i = 0; i < arch_impls_count(); ++i) { - const ArchImpl* impl = arch_impls[i]; - if (impl->macho && impl->macho->cputype == cputype) return impl; - } - return NULL; -} - -const ArchImpl* arch_lookup_coff_machine(u16 machine) { - /* IMAGE_FILE_MACHINE_ARM64EC (0xA641) aliases to AArch64 — the - * instruction encoding is identical; only the ABI differs, and the - * linker treats both as a single image's worth of code on Windows - * targets. */ - if (machine == 0xA641u) machine = 0xAA64u; - for (u32 i = 0; i < arch_impls_count(); ++i) { - const ArchImpl* impl = arch_impls[i]; - if (impl->coff && impl->coff->machine == machine) return impl; - } - return NULL; -} - const CGBackend* cg_backend_for_session(const Compiler* c, const CfreeCodeOptions* opts) { if (opts && opts->emit_c_source) { diff --git a/src/arch/rv64/arch.c b/src/arch/rv64/arch.c @@ -7,7 +7,6 @@ #include "arch/rv64/rv64.h" #include "core/bytes.h" #include "link/link_arch.h" -#include "obj/elf.h" #include "obj/obj.h" extern const LinkArchDesc link_arch_rv64; @@ -19,13 +18,6 @@ static const ABIVtable* rv64_abi_vtable(Compiler* c, CfreeOSKind os) { return &rv64_vtable; } -static const ArchElfOps rv64_elf_ops = { - .e_machine = EM_RISCV, - .e_flags = EF_RISCV_RVC | EF_RISCV_FLOAT_ABI_DOUBLE, - .reloc_to = elf_riscv64_reloc_to, - .reloc_from = elf_riscv64_reloc_from, -}; - static const ArchDwarfOps rv64_dwarf_ops = { .min_inst_len = 4u, .max_ops_per_inst = 1u, @@ -134,7 +126,8 @@ static const CfreePredefinedMacro rv64_predefined_macros[] = { {CFREE_SLICE_LIT("_LP64"), CFREE_SLICE_LIT("1")}, {CFREE_SLICE_LIT("__ORDER_LITTLE_ENDIAN__"), CFREE_SLICE_LIT("1234")}, {CFREE_SLICE_LIT("__ORDER_BIG_ENDIAN__"), CFREE_SLICE_LIT("4321")}, - {CFREE_SLICE_LIT("__BYTE_ORDER__"), CFREE_SLICE_LIT("__ORDER_LITTLE_ENDIAN__")}, + {CFREE_SLICE_LIT("__BYTE_ORDER__"), + CFREE_SLICE_LIT("__ORDER_LITTLE_ENDIAN__")}, {CFREE_SLICE_LIT("__LITTLE_ENDIAN__"), CFREE_SLICE_LIT("1")}, }; @@ -160,8 +153,6 @@ const ArchImpl arch_impl_rv64 = { .disasm_new = rv64_disasm_new, .apply_label_fixup = rv64_apply_label_fixup, .link = &link_arch_rv64, - .elf = &rv64_elf_ops, - .macho = NULL, .dwarf = &rv64_dwarf_ops, .dbg = &rv64_dbg_ops, .predefined_macros = rv64_predefined_macros, diff --git a/src/arch/rv64/link.c b/src/arch/rv64/link.c @@ -9,7 +9,6 @@ #include "core/bytes.h" #include "core/core.h" #include "link/link_arch.h" -#include "obj/elf.h" /* PLT0 is 8 canonical NOPs (32 bytes); each PLT entry and IPLT stub is * 4 instructions (16 bytes) / 3 instructions (12 bytes) respectively. @@ -77,13 +76,6 @@ static u32 rv64_emit_iplt_stub(u8* dst, u64 stub_vaddr, u64 slot_vaddr, } const LinkArchDesc link_arch_rv64 = { - .e_machine = EM_RISCV, - .default_musl_interp = "/lib/ld-musl-riscv64.so.1", - /* RISC-V psABI has no dedicated GLOB_DAT — GOT-slot data imports - * use the generic absolute-64 reloc instead. */ - .elf_r_relative = ELF_R_RISCV_RELATIVE, - .elf_r_glob_dat = ELF_R_RISCV_64, - .elf_r_jump_slot = ELF_R_RISCV_JUMP_SLOT, .plt0_size = RV64_PLT0_SIZE, .plt_entry_size = RV64_PLT_ENTRY_SIZE, .iplt_stub_size = RV64_IPLT_STUB_SIZE, diff --git a/src/arch/x64/arch.c b/src/arch/x64/arch.c @@ -7,9 +7,6 @@ #include "arch/x64/x64.h" #include "core/bytes.h" #include "link/link_arch.h" -#include "obj/coff.h" -#include "obj/elf.h" -#include "obj/macho.h" #include "obj/obj.h" extern const LinkArchDesc link_arch_x64; @@ -27,33 +24,11 @@ static const ABIVtable* x64_abi_vtable(Compiler* c, CfreeOSKind os) { } } -static const ArchElfOps x64_elf_ops = { - .e_machine = EM_X86_64, - .e_flags = 0, - .reloc_to = elf_x86_64_reloc_to, - .reloc_from = elf_x86_64_reloc_from, -}; - -static const ArchMachoOps x64_macho_ops = { - .cputype = CPU_TYPE_X86_64, - .cpusubtype = CPU_SUBTYPE_X86_64_ALL, - .reloc_to = macho_x86_64_reloc_to, - .reloc_pcrel = macho_x86_64_reloc_pcrel, - .reloc_length = macho_x86_64_reloc_length, - .reloc_from = macho_x86_64_reloc_from, -}; - static const ArchDwarfOps x64_dwarf_ops = { .min_inst_len = 1u, .max_ops_per_inst = 1u, }; -static const ArchCoffOps x64_coff_ops = { - .machine = IMAGE_FILE_MACHINE_AMD64, - .reloc_to = coff_x86_64_reloc_to, - .reloc_from = coff_x86_64_reloc_from, -}; - static int x64_apply_label_fixup(Compiler* c, const ArchLabelFixup* fx) { (void)c; if (!fx || fx->kind != R_PC32 || fx->width != 4) return 1; @@ -73,7 +48,8 @@ static const CfreePredefinedMacro x64_predefined_macros[] = { {CFREE_SLICE_LIT("__LP64__"), CFREE_SLICE_LIT("1")}, {CFREE_SLICE_LIT("__ORDER_LITTLE_ENDIAN__"), CFREE_SLICE_LIT("1234")}, {CFREE_SLICE_LIT("__ORDER_BIG_ENDIAN__"), CFREE_SLICE_LIT("4321")}, - {CFREE_SLICE_LIT("__BYTE_ORDER__"), CFREE_SLICE_LIT("__ORDER_LITTLE_ENDIAN__")}, + {CFREE_SLICE_LIT("__BYTE_ORDER__"), + CFREE_SLICE_LIT("__ORDER_LITTLE_ENDIAN__")}, {CFREE_SLICE_LIT("__LITTLE_ENDIAN__"), CFREE_SLICE_LIT("1")}, }; @@ -108,9 +84,6 @@ const ArchImpl arch_impl_x64 = { .disasm_new = x64_disasm_new, .apply_label_fixup = x64_apply_label_fixup, .link = &link_arch_x64, - .elf = &x64_elf_ops, - .macho = &x64_macho_ops, - .coff = &x64_coff_ops, .dwarf = &x64_dwarf_ops, .dbg = &x64_dbg_ops, .predefined_macros = x64_predefined_macros, diff --git a/src/arch/x64/link.c b/src/arch/x64/link.c @@ -7,12 +7,10 @@ * descriptor switchover is a pure refactor. All raw byte values come * from named constants / inline writers in arch/x64/isa.h. */ -#include "link/link_arch.h" - #include "arch/x64/isa.h" #include "core/bytes.h" #include "core/core.h" -#include "obj/elf.h" +#include "link/link_arch.h" /* PLT0 layout under DF_1_NOW: never executed (loader pre-binds every * slot via .rela.plt before user code runs), so we just emit 32 bytes @@ -36,8 +34,7 @@ static void x64_emit_plt_entry(u8* dst, u64 entry_vaddr, u64 slot_vaddr) { i64 disp = (i64)slot_vaddr - (i64)(entry_vaddr + X64_JMP_RIPREL_SIZE); i32 disp32 = (i32)(u32)((u64)disp & 0xffffffffu); x64_write_jmp_riprel(dst, disp32); - x64_write_nop_pad(dst + X64_JMP_RIPREL_SIZE, - 16u - X64_JMP_RIPREL_SIZE); + x64_write_nop_pad(dst + X64_JMP_RIPREL_SIZE, 16u - X64_JMP_RIPREL_SIZE); } /* IPLT (ifunc) trampoline stub (12 B): @@ -77,33 +74,22 @@ static int x64_is_got_load_reloc(RelocKind kind) { * head, minus the trailing NOP pad — Win64 calls don't need a stub * aligned to a fixed entry stride because there's no PLT0 to share * the address space with. */ -static void x64_emit_coff_iat_stub(u8* dst, u64 stub_vaddr, - u64 iat_slot_vaddr) { +void x64_emit_coff_iat_stub(u8* dst, u64 stub_vaddr, u64 iat_slot_vaddr) { i64 disp = (i64)iat_slot_vaddr - (i64)(stub_vaddr + X64_JMP_RIPREL_SIZE); i32 disp32 = (i32)(u32)((u64)disp & 0xffffffffu); x64_write_jmp_riprel(dst, disp32); } const LinkArchDesc link_arch_x64 = { - .e_machine = EM_X86_64, - .default_musl_interp = "/lib/ld-musl-x86_64.so.1", - - .elf_r_relative = ELF_R_X86_64_RELATIVE, - .elf_r_glob_dat = ELF_R_X86_64_GLOB_DAT, - .elf_r_jump_slot = ELF_R_X86_64_JUMP_SLOT, - - .plt0_size = 32u, - .plt_entry_size = 16u, - .iplt_stub_size = 12u, - - .emit_plt0 = x64_emit_plt0, - .emit_plt_entry = x64_emit_plt_entry, - .emit_iplt_stub = x64_emit_iplt_stub, + .plt0_size = 32u, + .plt_entry_size = 16u, + .iplt_stub_size = 12u, - .is_branch_reloc = x64_is_branch_reloc, - .is_got_load_reloc = x64_is_got_load_reloc, - .needs_jit_call_stub = x64_is_branch_reloc, + .emit_plt0 = x64_emit_plt0, + .emit_plt_entry = x64_emit_plt_entry, + .emit_iplt_stub = x64_emit_iplt_stub, - .coff_stub_size = X64_JMP_RIPREL_SIZE, - .emit_coff_iat_stub = x64_emit_coff_iat_stub, + .is_branch_reloc = x64_is_branch_reloc, + .is_got_load_reloc = x64_is_got_load_reloc, + .needs_jit_call_stub = x64_is_branch_reloc, }; diff --git a/src/emu/elf_load.c b/src/emu/elf_load.c @@ -29,7 +29,7 @@ #include "core/slice.h" #include "emu/emu.h" #include "emu/rv64_ops.h" -#include "obj/elf.h" +#include "obj/elf/elf.h" /* ---- Layout knobs ---- */ /* Stack size — large enough for typical libc init in the smoke tests @@ -50,9 +50,7 @@ static u16 rd16(const u8* p) { return (u16)p[0] | ((u16)p[1] << 8); } static u32 rd32(const u8* p) { return (u32)p[0] | ((u32)p[1] << 8) | ((u32)p[2] << 16) | ((u32)p[3] << 24); } -static u64 rd64(const u8* p) { - return (u64)rd32(p) | ((u64)rd32(p + 4) << 32); -} +static u64 rd64(const u8* p) { return (u64)rd32(p) | ((u64)rd32(p + 4) << 32); } static void wr64(u8* p, u64 v) { u32 i; @@ -238,7 +236,10 @@ int emu_load_elf(Compiler* c, CfreeEmuArch arch, const u8* bytes, size_t len, for (i = 0; i < e_phnum; ++i) { const u8* ph = bytes + e_phoff + (u64)i * e_phentsize; u32 p_type = rd32(ph + 0); - if (p_type == PT_INTERP) { have_interp = 1; break; } + if (p_type == PT_INTERP) { + have_interp = 1; + break; + } } if (have_interp) { if (!g_pending_interp.bytes || g_pending_interp.len == 0) { @@ -357,8 +358,7 @@ int emu_load_elf(Compiler* c, CfreeEmuArch arch, const u8* bytes, size_t len, if (envc > 0) { envp_addrs = (u64*)heap->alloc(heap, sizeof(u64) * (size_t)envc, 8u); if (!envp_addrs) { - if (argv_addrs) - heap->free(heap, argv_addrs, sizeof(u64) * (size_t)argc); + if (argv_addrs) heap->free(heap, argv_addrs, sizeof(u64) * (size_t)argc); heap->free(heap, guest_base, (size_t)guest_size); return 14; } @@ -392,10 +392,10 @@ int emu_load_elf(Compiler* c, CfreeEmuArch arch, const u8* bytes, size_t len, /* Table size: argc(8) + (argc+1)*8 + (envc+1)*8 + auxv (6 pairs * * 16). Place the table so that final sp is 16-byte aligned. */ - u64 table_bytes = 8u /* argc */ - + (u64)(argc + 1) * 8u /* argv + NULL */ - + (u64)(envc + 1) * 8u /* envp + NULL */ - + 6u * 16u; /* auxv pairs incl. AT_NULL */ + u64 table_bytes = 8u /* argc */ + + (u64)(argc + 1) * 8u /* argv + NULL */ + + (u64)(envc + 1) * 8u /* envp + NULL */ + + 6u * 16u; /* auxv pairs incl. AT_NULL */ /* Round table_bytes up to 16 so sp lands aligned. */ u64 sp_table = (cursor - table_bytes) & ~(u64)0xfu; sp = sp_table; @@ -447,8 +447,8 @@ int emu_load_elf(Compiler* c, CfreeEmuArch arch, const u8* bytes, size_t len, }; u32 aux_count = sizeof(aux) / sizeof(aux[0]); /* If the table_bytes budget was undersized, recompute and shift sp. */ - u64 needed = 8u + (u64)(argc + 1) * 8u + (u64)(envc + 1) * 8u + - (u64)aux_count * 16u; + u64 needed = + 8u + (u64)(argc + 1) * 8u + (u64)(envc + 1) * 8u + (u64)aux_count * 16u; if (needed > table_bytes) { /* Re-place table_bytes := needed, re-align sp_table. */ sp_table = (cursor - needed) & ~(u64)0xfu; diff --git a/src/link/link.c b/src/link/link.c @@ -80,8 +80,8 @@ Linker* link_new(Compiler* c) { * LC_MAIN names main directly (dyld owns the C runtime startup), * so the on-disk symbol is `_main` (the mangled form of `main`). * Format choice lives in obj_format_default_entry_name. */ - l->entry_name = - pool_intern_slice(c->global, slice_from_cstr(obj_format_default_entry_name(c))); + l->entry_name = pool_intern_slice( + c->global, slice_from_cstr(obj_format_default_entry_name(c))); /* Match the rest of libcfree's lifetime story: the new'd Linker is * registered for cleanup in case a panic fires before link_free. */ l->deferred = compiler_defer(c, linker_cleanup, l); @@ -132,20 +132,19 @@ LinkInputId link_add_obj_bytes(Linker* l, const char* name, const u8* data, fmt = cfree_detect_fmt(data, len); impl = obj_format_lookup_bin(fmt); if (!impl || !impl->read) - compiler_panic(l->c, no_loc(), - "link_add_obj_bytes: unsupported object format " - "(fmt=%u) for '%.*s'", - (u32)fmt, - SLICE_ARG(name ? slice_from_cstr(name) - : SLICE_LIT("(unnamed)"))); + compiler_panic( + l->c, no_loc(), + "link_add_obj_bytes: unsupported object format " + "(fmt=%u) for '%.*s'", + (u32)fmt, + SLICE_ARG(name ? slice_from_cstr(name) : SLICE_LIT("(unnamed)"))); reader_name = impl->read_name; ob = impl->read(l->c, name, data, len); if (!ob) - compiler_panic(l->c, no_loc(), - "link_add_obj_bytes: %.*s returned NULL for '%.*s'", - SLICE_ARG(slice_from_cstr(reader_name)), - SLICE_ARG(name ? slice_from_cstr(name) - : SLICE_LIT("(unnamed)"))); + compiler_panic( + l->c, no_loc(), "link_add_obj_bytes: %.*s returned NULL for '%.*s'", + SLICE_ARG(slice_from_cstr(reader_name)), + SLICE_ARG(name ? slice_from_cstr(name) : SLICE_LIT("(unnamed)"))); in = inputs_push(l, &id); in->order = l->next_input_order++; in->obj = ob; /* re-uses the ObjBuilder slot for ownership */ @@ -174,20 +173,19 @@ LinkInputId link_add_dso_bytes(Linker* l, const char* name, const u8* data, const char* reader_name; if (!l || !data || !len) return LINK_INPUT_NONE; if (!obj_format_dso_reader_for_bytes(data, len, &fmt, &reader)) - compiler_panic(l->c, no_loc(), - "link_add_dso_bytes: unsupported DSO format " - "(fmt=%u) for '%.*s'", - (u32)fmt, - SLICE_ARG(name ? slice_from_cstr(name) - : SLICE_LIT("(unnamed)"))); + compiler_panic( + l->c, no_loc(), + "link_add_dso_bytes: unsupported DSO format " + "(fmt=%u) for '%.*s'", + (u32)fmt, + SLICE_ARG(name ? slice_from_cstr(name) : SLICE_LIT("(unnamed)"))); reader_name = reader.name; ob = reader.read(l->c, name, data, len, &soname); if (!ob) - compiler_panic(l->c, no_loc(), - "link_add_dso_bytes: %.*s returned NULL for '%.*s'", - SLICE_ARG(slice_from_cstr(reader_name)), - SLICE_ARG(name ? slice_from_cstr(name) - : SLICE_LIT("(unnamed)"))); + compiler_panic( + l->c, no_loc(), "link_add_dso_bytes: %.*s returned NULL for '%.*s'", + SLICE_ARG(slice_from_cstr(reader_name)), + SLICE_ARG(name ? slice_from_cstr(name) : SLICE_LIT("(unnamed)"))); in = inputs_push(l, &id); in->kind = LINK_INPUT_DSO_BYTES; in->order = l->next_input_order++; @@ -228,11 +226,13 @@ LinkInputId link_add_archive_bytes(Linker* l, const char* name, const u8* data, in_arc.data = data; in_arc.len = len; - if (cfree_ar_iter_new(cfree_compiler_context(l->c), &in_arc, &it) != CFREE_OK || !it) - compiler_panic(l->c, no_loc(), - "link_add_archive_bytes: '%.*s' is not a valid ar archive", - SLICE_ARG(name ? slice_from_cstr(name) - : SLICE_LIT("(unnamed)"))); + if (cfree_ar_iter_new(cfree_compiler_context(l->c), &in_arc, &it) != + CFREE_OK || + !it) + compiler_panic( + l->c, no_loc(), + "link_add_archive_bytes: '%.*s' is not a valid ar archive", + SLICE_ARG(name ? slice_from_cstr(name) : SLICE_LIT("(unnamed)"))); /* Two-pass: count members so we allocate the member array exactly * once. The linker_release path frees by nmembers, so we need @@ -264,12 +264,14 @@ LinkInputId link_add_archive_bytes(Linker* l, const char* name, const u8* data, * for us, so every member returned here is a real object file. * Format is detected per-member so a single archive could in * principle hold mixed formats (in practice it never does). */ - if (cfree_ar_iter_new(cfree_compiler_context(l->c), &in_arc, &it) != CFREE_OK || !it) - compiler_panic(l->c, no_loc(), - "link_add_archive_bytes: ar_iter_init failed on '%.*s' " - "second pass", - SLICE_ARG(name ? slice_from_cstr(name) - : SLICE_LIT("(unnamed)"))); + if (cfree_ar_iter_new(cfree_compiler_context(l->c), &in_arc, &it) != + CFREE_OK || + !it) + compiler_panic( + l->c, no_loc(), + "link_add_archive_bytes: ar_iter_init failed on '%.*s' " + "second pass", + SLICE_ARG(name ? slice_from_cstr(name) : SLICE_LIT("(unnamed)"))); n = 0; while (cfree_ar_iter_next(it, &mem) == CFREE_ITER_ITEM && n < ar->nmembers) { ObjBuilder* ob = NULL; @@ -295,23 +297,21 @@ LinkInputId link_add_archive_bytes(Linker* l, const char* name, const u8* data, } } if (!member_impl || !member_impl->read) - compiler_panic(l->c, no_loc(), - "link_add_archive_bytes: unsupported member " - "format (fmt=%u) for '%.*s' in archive '%.*s'", - (u32)mfmt, - SLICE_ARG(mem.name.len ? mem.name - : SLICE_LIT("(unnamed)")), - SLICE_ARG(name ? slice_from_cstr(name) - : SLICE_LIT("(unnamed)"))); + compiler_panic( + l->c, no_loc(), + "link_add_archive_bytes: unsupported member " + "format (fmt=%u) for '%.*s' in archive '%.*s'", + (u32)mfmt, + SLICE_ARG(mem.name.len ? mem.name : SLICE_LIT("(unnamed)")), + SLICE_ARG(name ? slice_from_cstr(name) : SLICE_LIT("(unnamed)"))); ob = member_impl->read(l->c, mem.name.s, mem.data, mem.size); if (!ob) - compiler_panic(l->c, no_loc(), - "link_add_archive_bytes: object read failed for " - "member '%.*s' of archive '%.*s'", - SLICE_ARG(mem.name.len ? mem.name - : SLICE_LIT("(unnamed)")), - SLICE_ARG(name ? slice_from_cstr(name) - : SLICE_LIT("(unnamed)"))); + compiler_panic( + l->c, no_loc(), + "link_add_archive_bytes: object read failed for " + "member '%.*s' of archive '%.*s'", + SLICE_ARG(mem.name.len ? mem.name : SLICE_LIT("(unnamed)")), + SLICE_ARG(name ? slice_from_cstr(name) : SLICE_LIT("(unnamed)"))); ar->members[n].name = mem.name.len ? pool_intern_slice(l->c->global, mem.name) : 0; ar->members[n].obj = ob; @@ -546,7 +546,10 @@ static void link_image_release(LinkImage* img) { sizeof(*img->dbg_objs_owned) * img->dbg_objs_n); } symhash_fini(&img->globals); - if (img->dyn) link_dyn_state_free(img); + if (img->dyn) { + const ObjFormatImpl* fmt = obj_format_lookup(img->c->target.obj); + if (fmt && fmt->free_dyn) fmt->free_dyn(img); + } img->heap->free(img->heap, img, sizeof(*img)); } diff --git a/src/link/link_arch.h b/src/link/link_arch.h @@ -32,28 +32,6 @@ typedef struct LinkArchIPltReloc { } LinkArchIPltReloc; typedef struct LinkArchDesc { - /* ---- ELF identity ---- */ - u32 e_machine; /* EM_AARCH64 / EM_X86_64 / EM_RISCV */ - - /* Default PT_INTERP (canonical musl loader for this arch). Drivers - * should override via link_set_interp_path; the default fires only - * when the caller leaves it unset and -static isn't in effect. */ - const char* default_musl_interp; - - /* ---- Dynamic-reloc type numbers (ELF) ---- - * Used by .rela.dyn / .rela.plt emission. Reloc-type numbers are - * arch-specific: aarch64 starts at 1024, x86_64 in the low single - * digits, RISC-V uses its own encoding (and maps GLOB_DAT onto - * R_RISCV_64 since the psABI has no dedicated GLOB_DAT). */ - u32 elf_r_relative; - u32 elf_r_glob_dat; - u32 elf_r_jump_slot; - - /* ---- Mach-O identity ---- - * Zero means the target has no Mach-O executable writer yet. */ - u32 macho_cputype; - u32 macho_cpusubtype; - /* ---- PLT geometry ---- * All three arches today use a 32-byte PLT0 + 16-byte per-import * entry, but exposing the sizes keeps the linker free of magic @@ -97,24 +75,6 @@ typedef struct LinkArchDesc { u32 (*emit_iplt_stub)(u8* dst, u64 stub_vaddr, u64 slot_vaddr, LinkArchIPltReloc out[2]); - /* Mach-O stubs. Used only when macho_cputype is non-zero. */ - u32 macho_stub_size; - void (*emit_macho_stub)(u8* dst, u64 stub_vaddr, u64 got_slot_vaddr); - - /* PE/COFF IAT stub. Used when target.obj == CFREE_OBJ_COFF and a - * relocation targets an imported function. The stub performs an - * indirect jump through the IAT slot: - * x64: ff 25 disp32 ; jmp [rip + disp_to_iat_slot] (6 B) - * aa64: adrp/ldr/br x16 ; load IAT slot, branch to it (12 B) - * - * The 32-bit displacement on x64 and the page-relative pair on - * aa64 are baked into the stub bytes directly (no apply-time - * relocations needed), so callers do not enqueue extra - * LinkRelocApply records — see how emit_iplt_stub returns 0 for - * arches that can encode the displacement inline. */ - u32 coff_stub_size; - void (*emit_coff_iat_stub)(u8* dst, u64 stub_vaddr, u64 iat_slot_vaddr); - /* Relocation classification used by format-specific linker passes. */ int (*is_branch_reloc)(RelocKind); int (*is_got_load_reloc)(RelocKind); diff --git a/src/link/link_coff.c b/src/link/link_coff.c @@ -1,1748 +0,0 @@ -/* link_emit_coff: write a PE32+ MH_EXECUTABLE-style image to the - * caller-provided Writer. - * - * Phase 3.1 deliverable per doc/WINDOWS.md: skeleton + base-reloc - * handling for the four standard PE sections. Import-table synthesis - * (.idata / IAT) lands in Phase 3.2; per-arch IAT stub bytes in 3.3; - * TLS directory in 3.5; debug directory in 3.6 — those code paths - * panic loudly here so the strict-by-default posture surfaces them. - * - * File layout (in write order): - * - * [DOS stub IMAGE_DOS_HEADER] -- 64 bytes; e_lfanew=0x40 - * [PE signature "PE\0\0"] -- 4 bytes - * [IMAGE_FILE_HEADER] -- 20 bytes - * [IMAGE_OPTIONAL_HEADER64] -- 240 bytes (PE32+) - * [IMAGE_SECTION_HEADER * nsec] -- 40 bytes each - * [pad to FileAlignment] - * [.text bytes, padded to FileAlignment] - * [.rdata bytes, padded to FileAlignment] - * [.data bytes, padded to FileAlignment] - * [.reloc bytes, padded to FileAlignment] - * - * .bss is uninitialized — it has a section header (with VirtualSize) - * but no file bytes and PointerToRawData=0. - * - * RVAs follow SectionAlignment (0x1000); FileAlignment is 0x200; the - * first section starts at RVA 0x1000 (right after the headers map). - * ImageBase is the Win64 convention 0x140000000. - * - * Reloc strategy. The link layout pass has already placed every kept - * input section into img->sections / img->segments under the ELF/Mach-O - * coordinate system (image-relative vaddrs, often packed by permission - * bucket). COFF wants a different packing — the four standard - * sections at SectionAlignment-aligned RVAs — so this writer re-derives - * per-input-section vaddrs from scratch and shifts each LinkSection / - * symbol / LinkRelocApply by its section's per-section delta before - * applying relocations. link_emit_macho takes the same tack for its - * __DATA_CONST splits; the ELF writer leaves vaddrs alone because the - * link layout already matches ELF's PT_LOAD shape. */ - -#include <stdlib.h> -#include <string.h> - -#include "arch/arch.h" -#include "core/core.h" -#include "core/heap.h" -#include "core/pool.h" -#include "core/slice.h" -#include "core/util.h" -#include "core/vec.h" -#include "link/link.h" -#include "link/link_arch.h" -#include "link/link_internal.h" -#include "obj/coff.h" - -/* ---- .idata layout constants ---- - * - * Per doc/WINDOWS.md §3.2: the .idata content is a concatenation of an - * IMAGE_IMPORT_DESCRIPTOR table (NULL-terminated), one ILT per DLL - * (each NULL-terminated u64 array), one IAT per DLL (same shape), - * a hint/name table, and a DLL-name string pool. Each block is - * pointer-sized aligned within the section. AArch64 import thunks use - * PAGEOFFSET_12L for 64-bit ILT/IAT slots, so those sub-blocks must be - * 8-byte aligned. */ -#define PE_IDATA_BLOCK_ALIGN 8u -/* Hint field on IMAGE_IMPORT_BY_NAME records. cfree never has a real - * hint (the OS loader doesn't need one to do the bsearch on the DLL's - * export name table), so 0 is the canonical "no hint" value. */ -#define PE_IMPORT_HINT_NONE 0u - -static SrcLoc no_loc(void) { - SrcLoc l = {0, 0, 0}; - return l; -} - -/* ---- PE/Win64 layout constants ---- - * - * Centralised here so the wire-format numbers in this TU stay named - * (and the magic-numbers rule in CLAUDE.md is honoured). Values match - * the PE/COFF spec + Win64 conventions; mingw-w64's ld defaults agree. */ -#define PE_IMAGE_BASE LINK_PE_IMAGE_BASE -#define PE_SECTION_ALIGNMENT 0x1000u -#define PE_FILE_ALIGNMENT 0x200u -#define PE_FIRST_SECTION_RVA 0x1000u -#define PE_DOS_E_LFANEW 0x40u -#define PE_NUM_DATA_DIRS COFF_NUM_DATA_DIRECTORIES -#define PE_OPT_HDR_SIZE COFF_OPT_HDR64_SIZE -#define PE_LINKER_MAJOR 0u -#define PE_LINKER_MINOR 1u -#define PE_OS_MAJOR 6u /* Windows Vista+ — mingw default */ -#define PE_OS_MINOR 0u -#define PE_SUBSYS_MAJOR 6u -#define PE_SUBSYS_MINOR 0u -#define PE_STACK_RESERVE 0x100000ULL -#define PE_STACK_COMMIT 0x1000ULL -#define PE_HEAP_RESERVE 0x100000ULL -#define PE_HEAP_COMMIT 0x1000ULL -#define PE_DLL_CHARS \ - (IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA | \ - IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE | \ - IMAGE_DLLCHARACTERISTICS_NX_COMPAT | \ - IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE) - -/* PE32+ DOS-stub-to-PE-signature offsets (manual, since we marshal - * field-by-field rather than memcpy'ing the packed struct). */ -#define PE_DOS_HDR_SIZE COFF_DOS_HEADER_SIZE -#define PE_SIG_SIZE 4u -#define PE_FILE_HDR_SIZE COFF_FILE_HEADER_SIZE -#define PE_SECTION_HDR_SIZE COFF_SECTION_HEADER_SIZE - -/* Standard PE output buckets, plus .idata (import directory) and - * .reloc — both synthesised here rather than copied from input - * sections. Order matters: it's the on-image RVA order. */ -typedef enum CoffBucket { - COFF_BUCKET_TEXT = 0, - COFF_BUCKET_RDATA = 1, - COFF_BUCKET_IDATA = 2, - COFF_BUCKET_DATA = 3, - COFF_BUCKET_TLS = 4, - COFF_BUCKET_BSS = 5, - COFF_BUCKET_RELOC = 6, - COFF_NBUCKETS = 7, -} CoffBucket; - -/* IMAGE_TLS_DIRECTORY64 wire size: u64*4 + u32*2 = 40 bytes. */ -#define COFF_TLS_DIRECTORY64_SIZE 40u -/* Byte offsets of the four u64 VA fields within IMAGE_TLS_DIRECTORY64 - * — they need base relocations so ASLR can fix them up. */ -#define COFF_TLSDIR_OFF_START_ADDR 0u -#define COFF_TLSDIR_OFF_END_ADDR 8u -#define COFF_TLSDIR_OFF_INDEX_ADDR 16u -#define COFF_TLSDIR_OFF_CALLBACKS 24u - -typedef struct CoffSection { - const char* name; /* short ASCII; <= 8 bytes including NUL pad */ - u32 characteristics; - u8* bytes; /* NULL for .bss / .reloc-before-build */ - u32 size; /* VirtualSize (real bytes; for .bss, mem size) */ - u32 size_raw; /* SizeOfRawData (file size, FileAlignment-padded) */ - u32 rva; /* VirtualAddress in image */ - u32 file_offset; /* PointerToRawData; 0 for .bss */ - u8 in_image; /* 1 if this bucket is emitted as a section */ - u8 has_file_bytes; /* 0 for .bss */ - u8 pad[2]; -} CoffSection; - -/* ---- byte writer helpers ---- */ - -static void coff_write_zeroes(Writer* w, u64 n) { - static const u8 zeroes[256] = {0}; - while (n) { - u64 step = n > sizeof(zeroes) ? sizeof(zeroes) : n; - cfree_writer_write(w, zeroes, (size_t)step); - n -= step; - } -} - -/* Return the COFF bucket for a kept LinkSection. SF_TLS sections route - * into the dedicated .tls bucket so SECREL relocations from TLS access - * code resolve against the merged TLS image, not against .data. - * Everything else partitions on SF_EXEC / SF_WRITE plus the SSEM_NOBITS - * bit for .bss. */ -static CoffBucket coff_bucket_for(const LinkSection* ls) { - if (ls->flags & SF_EXEC) return COFF_BUCKET_TEXT; - if (ls->flags & SF_TLS) return COFF_BUCKET_TLS; - if (ls->sem == SSEM_NOBITS) return COFF_BUCKET_BSS; - if (ls->flags & SF_WRITE) return COFF_BUCKET_DATA; - return COFF_BUCKET_RDATA; -} - -/* True for relocation kinds that need an entry in .reloc so the OS - * loader can patch the site after ASLR picks a runtime ImageBase. - * PC-relative fixups don't need base-relocs — the displacement is - * load-invariant. */ -static int coff_reloc_needs_base_reloc(RelocKind k) { - return k == R_ABS64 || k == R_ABS32; -} - -/* Look up the LinkSection whose [vaddr, vaddr+size] range covers the - * given image-relative address `v`, or return NULL. Used to attribute - * symbol vaddrs to a containing section so we can apply per-section - * vaddr deltas after re-laying out for PE. */ -static const LinkSection* coff_section_at(const LinkImage* img, u64 v) { - u32 i; - for (i = 0; i < img->nsections; ++i) { - const LinkSection* ls = &img->sections[i]; - if (v >= ls->vaddr && v <= ls->vaddr + ls->size) return ls; - } - return NULL; -} - -/* Per-input-section delta map. Indexed by `LinkSection.id - 1`. - * Populated by coff_build_buckets. Consumed by every subsequent pass - * that needs to translate input-coordinate offsets (the world that - * img->sections / img->relocs live in) into PE-coordinate ones (where - * the writer plants bytes). delta is stored explicitly so callers - * avoid recomputing (new_rva + bucket.rva - old_vaddr) for every - * LinkRelocApply whose link_section_id points at the section. */ -typedef struct CoffSecMap { - u32 new_rva; /* image-relative RVA after PE relayout */ - u32 new_file_off; /* file offset of the patched byte */ - i64 delta; /* new_rva - old_vaddr */ - u8 bucket; - u8 pad[3]; -} CoffSecMap; - -/* TLS directory placement state. Populated when at least one SF_TLS - * section survives dead-strip; consumed by the optional-header writer, - * the .reloc builder (base-relocs for the four absolute VA fields), - * and the .rdata emit pass that writes the final 40-byte record. */ -typedef struct CoffTlsLayout { - int present; /* 1 iff at least one TLS section was kept */ - u32 dir_rdata_off; /* byte offset of the IMAGE_TLS_DIRECTORY64 within .rdata */ - u32 tls_size; /* size of the merged .tls bucket */ - LinkSymId tls_index_sym; /* resolved _tls_index LinkSymbol */ - LinkSymId callbacks_sym; /* __xl_a when mingw's TLS callbacks are linked */ - u64 callbacks_addend; /* mingw points past the leading NULL sentinel */ -} CoffTlsLayout; - -static LinkSymId coff_find_sym(LinkImage* img, const char* name) { - Sym sym = pool_intern_slice(img->c->global, slice_from_cstr(name)); - u32 n = LinkSyms_count(&img->syms); - u32 i; - for (i = 0; i < n; ++i) { - const LinkSymbol* s = LinkSyms_at(&img->syms, i); - if (s->name == sym) return (LinkSymId)(i + 1); - } - return LINK_SYM_NONE; -} - -/* Locate _tls_index by name in the resolved symbol table. mingw's - * libmingwex defines it (as part of tlsmcrt); without a CRT the link - * fails here with a clear message rather than producing a TLS - * directory pointing at a stale address. */ -static LinkSymId coff_find_tls_index_sym(LinkImage* img) { - return coff_find_sym(img, "_tls_index"); -} - -static const LinkSection* coff_symbol_section(const LinkImage* img, - const LinkSymbol* s) { - if (s->name) { - Slice nm_s = pool_slice(img->c->global, s->name); - const char* nm = nm_s.s; - size_t n = nm_s.len; - const char* sec_name = NULL; - if (nm && n == 6 && memcmp(nm, "__xd_a", 6) == 0) - sec_name = ".CRT$XDA"; - else if (nm && n == 6 && memcmp(nm, "__xd_z", 6) == 0) - sec_name = ".CRT$XDZ"; - else if (nm && n == 6 && memcmp(nm, "__xl_a", 6) == 0) - sec_name = ".CRT$XLA"; - else if (nm && n == 6 && memcmp(nm, "__xl_c", 6) == 0) - sec_name = ".CRT$XLC"; - else if (nm && n == 6 && memcmp(nm, "__xl_d", 6) == 0) - sec_name = ".CRT$XLD"; - else if (nm && n == 6 && memcmp(nm, "__xl_z", 6) == 0) - sec_name = ".CRT$XLZ"; - if (sec_name) { - u32 i; - for (i = 0; i < img->nsections; ++i) { - const LinkSection* ls = &img->sections[i]; - if (ls->name && - slice_eq_cstr(pool_slice(img->c->global, ls->name), sec_name)) - return ls; - } - } - } - if (s->section_id != LINK_SEC_NONE && s->section_id <= img->nsections) - return &img->sections[s->section_id - 1]; - return coff_section_at(img, s->vaddr); -} - -static u64 coff_symbol_final_va(const LinkImage* img, - const CoffSection out[COFF_NBUCKETS], - const CoffSecMap* map, - LinkSymId id, - const char* what) { - const LinkSymbol* s = LinkSyms_at(&img->syms, id - 1); - if (!s->defined || s->kind == SK_ABS) { - compiler_panic(img->c, no_loc(), - "link_emit_coff: `%.*s` is not a defined section-bound " - "symbol", - SLICE_ARG(slice_from_cstr(what))); - } - const LinkSection* sec = coff_symbol_section(img, s); - if (!sec) { - compiler_panic(img->c, no_loc(), - "link_emit_coff: `%.*s` has no containing section", - SLICE_ARG(slice_from_cstr(what))); - } - u8 b = map[sec->id - 1].bucket; - return PE_IMAGE_BASE + (u64)out[b].rva + - (u64)map[sec->id - 1].new_rva + (s->vaddr - sec->vaddr); -} - -/* Reserve 40 bytes at the tail of the .rdata bucket for the - * IMAGE_TLS_DIRECTORY64 record. Records the offset for later emit and - * grows the bucket if needed. The bytes start zeroed; coff_emit_tls_dir - * fills them in once final RVAs are known. */ -static void coff_plan_tls_layout(LinkImage* img, - CoffSection out[COFF_NBUCKETS], - u32* rdata_cap, CoffTlsLayout* tls) { - memset(tls, 0, sizeof(*tls)); - if (out[COFF_BUCKET_TLS].size == 0) return; - tls->present = 1; - tls->tls_size = out[COFF_BUCKET_TLS].size; - tls->tls_index_sym = coff_find_tls_index_sym(img); - if (tls->tls_index_sym == LINK_SYM_NONE) { - compiler_panic(img->c, no_loc(), - "link_emit_coff: .tls section requires `_tls_index` " - "(provided by mingw libmingwex / tlsmcrt.o) — none of " - "the linked inputs define it"); - } - /* IMAGE_TLS_DIRECTORY64 needs 8-byte alignment for its u64 fields; - * round the .rdata size up before reserving the 40-byte record. */ - tls->callbacks_sym = coff_find_sym(img, "__xl_a"); - if (tls->callbacks_sym != LINK_SYM_NONE) { - tls->callbacks_addend = 8; - } else { - tls->callbacks_sym = coff_find_sym(img, "__xl_c"); - tls->callbacks_addend = 0; - } - u32 rdata_size = (u32)ALIGN_UP((u64)out[COFF_BUCKET_RDATA].size, 8ull); - u32 need = rdata_size + COFF_TLS_DIRECTORY64_SIZE; - if (need > *rdata_cap) { - (void)VEC_GROW(img->heap, out[COFF_BUCKET_RDATA].bytes, *rdata_cap, need); - } - /* Zero any padding bytes introduced by the alignment bump and the - * directory slot itself. */ - if (rdata_size > out[COFF_BUCKET_RDATA].size) { - memset(out[COFF_BUCKET_RDATA].bytes + out[COFF_BUCKET_RDATA].size, 0, - rdata_size - out[COFF_BUCKET_RDATA].size); - } - memset(out[COFF_BUCKET_RDATA].bytes + rdata_size, 0, - COFF_TLS_DIRECTORY64_SIZE); - tls->dir_rdata_off = rdata_size; - out[COFF_BUCKET_RDATA].size = need; -} - -/* Write the IMAGE_TLS_DIRECTORY64 bytes once all bucket RVAs are - * final. Each u64 VA field gets ImageBase + RVA; the base-reloc pass - * will emit IMAGE_REL_BASED_DIR64 entries so ASLR keeps them valid. */ -static void coff_emit_tls_dir(const LinkImage* img, - const CoffSection out[COFF_NBUCKETS], - const CoffSecMap* map, - const CoffTlsLayout* tls) { - if (!tls->present) return; - u64 tls_start = PE_IMAGE_BASE + (u64)out[COFF_BUCKET_TLS].rva; - u64 tls_end = tls_start + (u64)tls->tls_size; - u64 idx_vaddr = - coff_symbol_final_va(img, out, map, tls->tls_index_sym, "_tls_index"); - const char* callbacks_name = - tls->callbacks_addend ? "__xl_a" : "__xl_c"; - u64 callbacks_vaddr = - tls->callbacks_sym - ? coff_symbol_final_va(img, out, map, tls->callbacks_sym, - callbacks_name) + - tls->callbacks_addend - : 0; - - u8* p = out[COFF_BUCKET_RDATA].bytes + tls->dir_rdata_off; - wr_u64_le(p + COFF_TLSDIR_OFF_START_ADDR, tls_start); - wr_u64_le(p + COFF_TLSDIR_OFF_END_ADDR, tls_end); - wr_u64_le(p + COFF_TLSDIR_OFF_INDEX_ADDR, idx_vaddr); - wr_u64_le(p + COFF_TLSDIR_OFF_CALLBACKS, callbacks_vaddr); - wr_u32_le(p + 32, 0); /* SizeOfZeroFill */ - wr_u32_le(p + 36, 0); /* Characteristics */ -} - -static void coff_define_tls_used(LinkImage* img, - const CoffSection out[COFF_NBUCKETS], - const CoffTlsLayout* tls) { - if (!tls->present) return; - if (!img->linker) return; - link_emit_boundary_sym(img->linker, img, "_tls_used", - PE_IMAGE_BASE + (u64)out[COFF_BUCKET_RDATA].rva + - (u64)tls->dir_rdata_off); -} - -/* ---- import-table synthesis (Phase 3.2) --------------------------- - * - * Per doc/WINDOWS.md §3.2: every LinkSymbol with `imported = 1` gets - * routed through an IAT slot synthesized in `.idata`. Function - * imports additionally receive a small per-arch stub in `.text` - * (`ff 25 disp32` on x64 / `adrp;ldr;br` on aa64) so a direct CALL26 - * or PC32 against the symbol lands on a stub that indirects through - * the IAT. Data imports skip the stub — the symbol's final vaddr is - * just the IAT slot vaddr, and code-gen emits a `mov rax, [slot]` - * sequence the same way it would for any other GOT-style load. - * - * cfree's COFF code-gen uses direct symbol references; there is no - * separate `__imp_<name>` LinkSymbol consulted at link time. The - * IAT-slot rewrite happens entirely by overriding the imported - * symbol's vaddr in apply_all_relocs. */ - -typedef struct CoffImport { - LinkSymId sym; /* canonical LinkSymId from img->syms */ - u32 dll_idx; /* index into CoffImportTable.dlls */ - u32 stub_off; /* offset in .text bucket (functions only) */ - u32 iat_off; /* offset in .idata IAT block */ - u32 ilt_off; /* offset in .idata ILT block */ - u32 hint_off; /* offset in .idata hint/name table */ - u8 is_func; - u8 pad[3]; -} CoffImport; - -typedef struct CoffImportDll { - Sym soname; - u32 first; /* index of first import in CoffImportTable.imports */ - u32 count; - u32 ilt_off; /* offset of this DLL's ILT block in .idata */ - u32 iat_off; /* offset of this DLL's IAT block in .idata */ - u32 name_off; /* offset of DLL name string in .idata */ -} CoffImportDll; - -typedef struct CoffImportTable { - CoffImport* imports; - u32 nimports; - u32 imports_cap; /* heap-allocation size for cleanup */ - u32 nfunc_imports; /* subset of nimports that needs a .text stub */ - CoffImportDll* dlls; - u32 ndlls; - u32 dlls_cap; /* heap-allocation size for cleanup */ - /* Offsets within .idata of the five sub-blocks. Filled in by - * coff_plan_idata_layout once nimports / ndlls is known. */ - u32 desc_off; /* always 0 — descriptors come first */ - u32 desc_size; - u32 ilt_base; - u32 ilt_total; - u32 iat_base; - u32 iat_total; - u32 hint_base; - u32 hint_total; - u32 name_base; - u32 name_total; - u32 idata_size; - /* Stub region in .text bucket. Stubs are appended after every - * input .text section has been bucketed. stub_text_off is the - * bucket-local offset of the first stub; per-import stub offsets - * are stored in CoffImport.stub_off. */ - u32 stub_text_off; - u32 stub_total; -} CoffImportTable; - -/* Sort comparator: imports grouped by DLL slot, stable on input - * order within a DLL (sort is stable enough via secondary key). */ -static int coff_import_cmp(const void* a, const void* b) { - const CoffImport* ia = (const CoffImport*)a; - const CoffImport* ib = (const CoffImport*)b; - if (ia->dll_idx < ib->dll_idx) return -1; - if (ia->dll_idx > ib->dll_idx) return 1; - /* Secondary: LinkSymId so the order is reproducible. */ - if (ia->sym < ib->sym) return -1; - if (ia->sym > ib->sym) return 1; - return 0; -} - -static const char* coff_import_lookup_name(Compiler* c, const LinkSymbol* s, - size_t* nlen_out) { - Slice nm_s = s->name ? pool_slice(c->global, s->name) : SLICE_NULL; - const char* nm = nm_s.s; - size_t nlen = nm_s.len; - static const char kImpPrefix[] = "__imp_"; - const size_t kImpPrefixLen = sizeof(kImpPrefix) - 1u; - if (nm && nlen > kImpPrefixLen && - memcmp(nm, kImpPrefix, kImpPrefixLen) == 0) { - nm += kImpPrefixLen; - nlen -= kImpPrefixLen; - } - if (nlen_out) *nlen_out = nlen; - return nm; -} - -/* True iff the import classifies as function-like. Mirrors the ELF - * `sym_is_func_import` heuristic: if the canonical kind is known - * we trust it, otherwise we default to function (which matches the - * COFF code-gen contract — direct calls are by far the common case - * and a data import wrongly stubbed would still fail loudly via the - * IAT-routed call). */ -static int coff_import_is_func(Compiler* c, const LinkSymbol* s) { - if (s->name) { - Slice nm_s = pool_slice(c->global, s->name); - const char* nm = nm_s.s; - size_t nlen = nm_s.len; - if (nm && nlen > 6u && memcmp(nm, "__imp_", 6u) == 0) return 0; - } - if (s->kind == SK_FUNC || s->kind == SK_IFUNC) return 1; - if (s->kind == SK_OBJ) return 0; - /* SK_UNDEF / SK_NOTYPE: assume function (the common case). */ - return 1; -} - -/* Walk LinkSyms, collect imports, group by DLL soname. Returns 1 if - * any imports were collected, 0 otherwise (caller skips the entire - * .idata path). */ -static int coff_collect_imports(LinkImage* img, CoffImportTable* it) { - Heap* heap = img->heap; - Compiler* c = img->c; - Linker* l = img->linker; - u32 nsyms = LinkSyms_count(&img->syms); - u32 imp_cap = 0; - u32 dll_cap = 0; - u32 i; - - memset(it, 0, sizeof(*it)); - if (!l) return 0; - for (i = 0; i < nsyms; ++i) { - LinkSymbol* s = LinkSyms_at(&img->syms, i); - LinkInput* in; - u32 dll_idx = (u32)-1; - u32 d; - if (!s->imported) continue; - if (s->name == 0) continue; - if (s->dso_input_id == LINK_INPUT_NONE) { - compiler_panic(c, no_loc(), - "link_emit_coff: imported symbol has no providing DSO"); - } - /* img->globals only carries defined globals/weaks; imported undefs - * never land there. Dedup by name: skip if any earlier slot - * already collected this name. */ - { - int dup = 0; - for (u32 k = 0; k < it->nimports; ++k) { - LinkSymbol* prev = LinkSyms_at(&img->syms, it->imports[k].sym - 1); - if (prev->name == s->name) { dup = 1; break; } - } - if (dup) continue; - } - if (s->dso_input_id - 1u >= LinkInputs_count(&l->inputs)) { - compiler_panic(c, no_loc(), - "link_emit_coff: import dso_input_id out of range"); - } - in = LinkInputs_at(&l->inputs, s->dso_input_id - 1u); - if (in->soname == 0) { - compiler_panic(c, no_loc(), - "link_emit_coff: providing DSO has no soname; cannot " - "emit IMAGE_IMPORT_DESCRIPTOR.Name"); - } - /* Find-or-add the DLL slot. */ - for (d = 0; d < it->ndlls; ++d) { - if (it->dlls[d].soname == in->soname) { dll_idx = d; break; } - } - if (dll_idx == (u32)-1) { - if (VEC_GROW(heap, it->dlls, dll_cap, it->ndlls + 1u)) - compiler_panic(c, no_loc(), "link_emit_coff: oom on import dlls"); - dll_idx = it->ndlls++; - memset(&it->dlls[dll_idx], 0, sizeof(it->dlls[dll_idx])); - it->dlls[dll_idx].soname = in->soname; - } - if (VEC_GROW(heap, it->imports, imp_cap, it->nimports + 1u)) - compiler_panic(c, no_loc(), "link_emit_coff: oom on imports"); - memset(&it->imports[it->nimports], 0, - sizeof(it->imports[it->nimports])); - it->imports[it->nimports].sym = s->id; - it->imports[it->nimports].dll_idx = dll_idx; - it->imports[it->nimports].is_func = (u8)coff_import_is_func(c, s); - if (it->imports[it->nimports].is_func) ++it->nfunc_imports; - ++it->nimports; - it->dlls[dll_idx].count++; - } - if (it->nimports == 0) return 0; - /* Re-bucket the imports array so each DLL's run is contiguous. */ - qsort(it->imports, it->nimports, sizeof(*it->imports), coff_import_cmp); - /* Fix up CoffImportDll.first now that imports[] is sorted. */ - { - u32 cur = 0; - for (u32 d = 0; d < it->ndlls; ++d) { - it->dlls[d].first = cur; - cur += it->dlls[d].count; - } - } - it->imports_cap = imp_cap; - it->dlls_cap = dll_cap; - return 1; -} - -static void coff_imports_free(LinkImage* img, CoffImportTable* it) { - Heap* heap = img->heap; - if (it->imports) { - heap->free(heap, it->imports, - (size_t)it->imports_cap * sizeof(*it->imports)); - } - if (it->dlls) { - heap->free(heap, it->dlls, - (size_t)it->dlls_cap * sizeof(*it->dlls)); - } -} - -/* Compute every per-block / per-import offset inside .idata and the - * total .idata size in bytes. Also assigns per-import hint/name and - * dll-name offsets so the descriptor table can reference them by RVA - * later (RVAs need the bucket's final RVA, added in coff_emit_idata). */ -static void coff_plan_idata_layout(LinkImage* img, CoffImportTable* it) { - Compiler* c = img->c; - u32 off; - - /* Block 1: import descriptors (one per DLL + zero terminator). */ - it->desc_off = 0; - it->desc_size = (it->ndlls + 1u) * COFF_IMPORT_DESCRIPTOR_SIZE; - off = (u32)ALIGN_UP((u64)it->desc_size, (u64)PE_IDATA_BLOCK_ALIGN); - - /* Block 2: ILTs. Per DLL: count entries + 1 (terminator), 8 B each. */ - it->ilt_base = off; - for (u32 d = 0; d < it->ndlls; ++d) { - it->dlls[d].ilt_off = off; - /* Per-import: assign ilt_off within this DLL's block. */ - for (u32 k = 0; k < it->dlls[d].count; ++k) { - it->imports[it->dlls[d].first + k].ilt_off = - off + k * (u32)COFF_THUNK_DATA64_SIZE; - } - off += (it->dlls[d].count + 1u) * (u32)COFF_THUNK_DATA64_SIZE; - } - it->ilt_total = off - it->ilt_base; - off = (u32)ALIGN_UP((u64)off, (u64)PE_IDATA_BLOCK_ALIGN); - - /* Block 3: IATs (same shape as ILTs). */ - it->iat_base = off; - for (u32 d = 0; d < it->ndlls; ++d) { - it->dlls[d].iat_off = off; - for (u32 k = 0; k < it->dlls[d].count; ++k) { - it->imports[it->dlls[d].first + k].iat_off = - off + k * (u32)COFF_THUNK_DATA64_SIZE; - } - off += (it->dlls[d].count + 1u) * (u32)COFF_THUNK_DATA64_SIZE; - } - it->iat_total = off - it->iat_base; - off = (u32)ALIGN_UP((u64)off, (u64)PE_IDATA_BLOCK_ALIGN); - - /* Block 4: hint/name records. Each: u16 hint + NUL-term name + - * 1-byte pad if the resulting size is odd (PE/COFF spec). */ - it->hint_base = off; - for (u32 i = 0; i < it->nimports; ++i) { - LinkSymbol* s = LinkSyms_at(&img->syms, it->imports[i].sym - 1); - size_t nlen = 0; - const char* nm = coff_import_lookup_name(c, s, &nlen); - if (!nm || nlen == 0) - compiler_panic(c, no_loc(), - "link_emit_coff: imported symbol has empty name"); - it->imports[i].hint_off = off; - /* hint (2 B) + name (nlen + 1) + optional pad to even. */ - u32 rec = 2u + (u32)nlen + 1u; - if (rec & 1u) ++rec; - off += rec; - } - it->hint_total = off - it->hint_base; - off = (u32)ALIGN_UP((u64)off, (u64)PE_IDATA_BLOCK_ALIGN); - - /* Block 5: DLL name strings (NUL-terminated). */ - it->name_base = off; - for (u32 d = 0; d < it->ndlls; ++d) { - Slice nm_s = pool_slice(c->global, it->dlls[d].soname); - const char* nm = nm_s.s; - size_t nlen = nm_s.len; - if (!nm || nlen == 0) - compiler_panic(c, no_loc(), - "link_emit_coff: providing DSO has empty soname"); - it->dlls[d].name_off = off; - off += (u32)nlen + 1u; - } - it->name_total = off - it->name_base; - it->idata_size = off; -} - -/* Append the function-import stubs to the .text bucket. Each stub is - * `coff_stub_size` bytes (arch-specific). Records each stub's bucket- - * local offset on the matching CoffImport so the per-symbol stub vaddr - * can be computed once the .text bucket's RVA is final. */ -static void coff_append_stubs(LinkImage* img, CoffImportTable* it, - CoffSection* text_bucket, - u32* text_bucket_cap) { - Heap* heap = img->heap; - Compiler* c = img->c; - const LinkArchDesc* arch = link_arch_desc_for(c); - u32 stub_size; - u32 stub_align; - u64 cur; - if (!arch || arch->coff_stub_size == 0 || !arch->emit_coff_iat_stub) { - compiler_panic(c, no_loc(), - "link_emit_coff: arch has no COFF IAT stub emitter"); - } - stub_size = arch->coff_stub_size; - /* Stubs are pure code; aligning to instruction alignment is enough. - * x64 wants byte-granular, aa64 wants 4 B; align to stub size as a - * convenient upper bound. */ - stub_align = stub_size; - cur = (u64)text_bucket->size; - cur = ALIGN_UP(cur, (u64)stub_align); - it->stub_text_off = (u32)cur; - for (u32 i = 0; i < it->nimports; ++i) { - if (!it->imports[i].is_func) continue; - it->imports[i].stub_off = (u32)cur; - cur += stub_size; - } - it->stub_total = (u32)cur - it->stub_text_off; - if (it->stub_total == 0) return; - /* Grow the .text bucket buffer to hold the new region. */ - u32 need = (u32)cur; - if (need > *text_bucket_cap) { - (void)VEC_GROW(heap, text_bucket->bytes, *text_bucket_cap, need); - } - /* Zero the alignment pad; stub bytes are written later by - * coff_emit_stubs once vaddrs are known. */ - if ((u32)cur > text_bucket->size) { - memset(text_bucket->bytes + text_bucket->size, 0, - (size_t)((u32)cur - text_bucket->size)); - } - text_bucket->size = (u32)cur; -} - -/* Emit each function import's IAT stub into the .text bucket. Must - * run after coff_assign_layout has fixed both .text's RVA and - * .idata's RVA, since the stub bakes in the post-shift IAT slot - * displacement. */ -static void coff_emit_stubs(LinkImage* img, const CoffImportTable* it, - const CoffSection out[COFF_NBUCKETS]) { - Compiler* c = img->c; - const LinkArchDesc* arch = link_arch_desc_for(c); - u64 img_base = PE_IMAGE_BASE; - u32 text_rva = out[COFF_BUCKET_TEXT].rva; - u32 idata_rva = out[COFF_BUCKET_IDATA].rva; - if (!arch || !arch->emit_coff_iat_stub) { - compiler_panic(c, no_loc(), - "link_emit_coff: arch has no COFF IAT stub emitter"); - } - for (u32 i = 0; i < it->nimports; ++i) { - u64 stub_va, slot_va; - if (!it->imports[i].is_func) continue; - stub_va = img_base + (u64)text_rva + (u64)it->imports[i].stub_off; - slot_va = img_base + (u64)idata_rva + (u64)it->imports[i].iat_off; - arch->emit_coff_iat_stub(out[COFF_BUCKET_TEXT].bytes + - it->imports[i].stub_off, - stub_va, slot_va); - } -} - -/* Emit .idata content into the bucket buffer. Allocates the buffer - * here (size is already known from coff_plan_idata_layout). */ -static void coff_emit_idata(LinkImage* img, const CoffImportTable* it, - CoffSection out[COFF_NBUCKETS], - u32* idata_bucket_cap) { - Heap* heap = img->heap; - Compiler* c = img->c; - CoffSection* idata = &out[COFF_BUCKET_IDATA]; - u32 idata_rva = idata->rva; - u8* buf; - /* Allocate the bucket buffer (idata_size is already block-aligned). */ - buf = (u8*)heap->alloc(heap, it->idata_size, _Alignof(u64)); - if (!buf) - compiler_panic(c, no_loc(), "link_emit_coff: oom on .idata buffer"); - memset(buf, 0, it->idata_size); - idata->bytes = buf; - idata->size = it->idata_size; - *idata_bucket_cap = it->idata_size; - - /* Block 1: IMAGE_IMPORT_DESCRIPTOR table. */ - for (u32 d = 0; d < it->ndlls; ++d) { - u8* p = buf + d * (u32)COFF_IMPORT_DESCRIPTOR_SIZE; - u32 ilt_rva = idata_rva + it->dlls[d].ilt_off; - u32 iat_rva = idata_rva + it->dlls[d].iat_off; - u32 name_rva = idata_rva + it->dlls[d].name_off; - wr_u32_le(p + 0, ilt_rva); /* OriginalFirstThunk */ - wr_u32_le(p + 4, 0u); /* TimeDateStamp */ - wr_u32_le(p + 8, 0u); /* ForwarderChain */ - wr_u32_le(p + 12, name_rva); /* Name */ - wr_u32_le(p + 16, iat_rva); /* FirstThunk */ - } - /* Trailing zero descriptor already zero-filled by memset. */ - - /* Blocks 2+3: ILT + IAT. Both initially point at the same hint/name - * record for each import; the OS loader rewrites IAT entries at - * load time. */ - for (u32 i = 0; i < it->nimports; ++i) { - u64 hint_rva = (u64)idata_rva + (u64)it->imports[i].hint_off; - wr_u64_le(buf + it->imports[i].ilt_off, hint_rva); - wr_u64_le(buf + it->imports[i].iat_off, hint_rva); - } - /* Per-DLL ILT/IAT terminators are u64 0, already zero-filled. */ - - /* Block 4: hint/name records. */ - for (u32 i = 0; i < it->nimports; ++i) { - LinkSymbol* s = LinkSyms_at(&img->syms, it->imports[i].sym - 1); - size_t nlen = 0; - const char* nm = coff_import_lookup_name(c, s, &nlen); - u8* p = buf + it->imports[i].hint_off; - wr_u16_le(p, PE_IMPORT_HINT_NONE); - memcpy(p + 2, nm, nlen); - /* NUL terminator + optional pad already zero. */ - } - - /* Block 5: DLL name strings. */ - for (u32 d = 0; d < it->ndlls; ++d) { - Slice nm_s = pool_slice(c->global, it->dlls[d].soname); - const char* nm = nm_s.s; - size_t nlen = nm_s.len; - memcpy(buf + it->dlls[d].name_off, nm, nlen); - /* NUL already zero. */ - } -} - -/* Per-LinkSymId vaddr override table for imports. Indexed by - * LinkSymId-1; 0 means "not an import". Built once after the .idata - * bucket RVA is final. Consumed by coff_apply_all_relocs in lieu of - * the symbol's own vaddr field (which is 0 for imports). */ -typedef struct CoffImportVaddr { - u64* by_sym; /* size = nsyms; 0 entries mean "not imported" */ - u32 nsyms; -} CoffImportVaddr; - -static void coff_import_vaddr_build(LinkImage* img, const CoffImportTable* it, - const CoffSection out[COFF_NBUCKETS], - CoffImportVaddr* iv) { - Heap* heap = img->heap; - u64 img_base = PE_IMAGE_BASE; - u32 text_rva = out[COFF_BUCKET_TEXT].rva; - u32 idata_rva = out[COFF_BUCKET_IDATA].rva; - iv->nsyms = LinkSyms_count(&img->syms); - iv->by_sym = (u64*)heap->alloc(heap, sizeof(u64) * (size_t)(iv->nsyms + 1u), - _Alignof(u64)); - if (!iv->by_sym) - compiler_panic(img->c, no_loc(), - "link_emit_coff: oom on import vaddr table"); - memset(iv->by_sym, 0, sizeof(u64) * (size_t)(iv->nsyms + 1u)); - for (u32 i = 0; i < it->nimports; ++i) { - LinkSymId sid = it->imports[i].sym; - u64 va; - if (it->imports[i].is_func) { - va = img_base + (u64)text_rva + (u64)it->imports[i].stub_off; - } else { - va = img_base + (u64)idata_rva + (u64)it->imports[i].iat_off; - } - iv->by_sym[sid - 1u] = va; - /* Fan out across every shadow LinkSymId with the same name so a - * per-input undef reference resolves to the same import slot. */ - { - LinkSymbol* canonical = LinkSyms_at(&img->syms, sid - 1u); - for (u32 j = 0; j < iv->nsyms; ++j) { - LinkSymbol* s = LinkSyms_at(&img->syms, j); - if (s->name == canonical->name && s->imported) { - iv->by_sym[s->id - 1u] = va; - } - } - } - } -} - -static void coff_import_vaddr_free(LinkImage* img, CoffImportVaddr* iv) { - Heap* heap = img->heap; - if (iv->by_sym) { - heap->free(heap, iv->by_sym, - sizeof(u64) * (size_t)(iv->nsyms + 1u)); - } -} - -/* Resolve Compiler.target.arch -> IMAGE_FILE_MACHINE_* via the per-arch - * coff ops table. Panic if the arch has no COFF descriptor or the - * machine value is one cfree doesn't ship (Phase 1 supports AMD64 and - * ARM64 only). */ -static u16 coff_machine_or_panic(Compiler* c) { - const ArchImpl* arch = arch_for_compiler(c); - u16 m; - if (!arch || !arch->coff) - compiler_panic(c, no_loc(), "link_emit_coff: no COFF arch descriptor"); - m = arch->coff->machine; - if (m != IMAGE_FILE_MACHINE_AMD64 && m != IMAGE_FILE_MACHINE_ARM64) - compiler_panic(c, no_loc(), - "link_emit_coff: unsupported machine 0x%x", (unsigned)m); - return m; -} - -static int coff_section_name_starts(Compiler* c, const LinkSection* ls, - const char* prefix) { - size_t pn = slice_from_cstr(prefix).len; - Slice s_s = ls->name ? pool_slice(c->global, ls->name) : SLICE_NULL; - const char* s = s_s.s; - size_t n = s_s.len; - return s && n >= pn && memcmp(s, prefix, pn) == 0; -} - -static int coff_section_name_cmp(Compiler* c, const LinkSection* a, - const LinkSection* b) { - Slice as_s = a->name ? pool_slice(c->global, a->name) : SLICE_NULL; - Slice bs_s = b->name ? pool_slice(c->global, b->name) : SLICE_NULL; - const char* as = as_s.s ? as_s.s : ""; - const char* bs = bs_s.s ? bs_s.s : ""; - size_t an = as_s.len, bn = bs_s.len; - size_t n = an < bn ? an : bn; - int cmp = n ? memcmp(as, bs, n) : 0; - if (cmp) return cmp; - if (an < bn) return -1; - if (an > bn) return 1; - if (a->id < b->id) return -1; - if (a->id > b->id) return 1; - return 0; -} - -static void coff_place_section(LinkImage* img, CoffSection out[COFF_NBUCKETS], - CoffSecMap* map, u64 bucket_cur[COFF_NBUCKETS], - u32 bucket_cap[COFF_NBUCKETS], - const LinkSection* ls) { - Heap* heap = img->heap; - CoffBucket b2 = coff_bucket_for(ls); - u32 align = ls->align ? ls->align : 1u; - u64 cur = bucket_cur[b2]; - cur = ALIGN_UP(cur, (u64)align); - map[ls->id - 1].bucket = (u8)b2; - /* Record the bucket-local offset; the absolute RVA / file offset - * are filled in after bucket placement (RVAs need - * SectionAlignment, file offsets need FileAlignment). */ - map[ls->id - 1].new_rva = (u32)cur; - if (b2 != COFF_BUCKET_BSS) { - /* Copy bytes from the source segment buffer into the bucket. */ - if (ls->size) { - u32 need = (u32)(cur + ls->size); - if (need > bucket_cap[b2]) { - (void)VEC_GROW(heap, out[b2].bytes, bucket_cap[b2], need); - } - memset(out[b2].bytes + bucket_cur[b2], 0, - (size_t)(cur - bucket_cur[b2])); - if (ls->sem != SSEM_NOBITS) { - const LinkSegment* seg = &img->segments[ls->segment_id - 1]; - const u8* src = img->segment_bytes[seg->id - 1] + - (size_t)(ls->file_offset - seg->file_offset); - memcpy(out[b2].bytes + cur, src, (size_t)ls->size); - } else { - memset(out[b2].bytes + cur, 0, (size_t)ls->size); - } - } - } - cur += ls->size; - bucket_cur[b2] = cur; - out[b2].size = (u32)cur; -} - -static void coff_insert_sorted_section(Compiler* c, const LinkSection** a, - u32* n, const LinkSection* ls) { - u32 i = *n; - while (i > 0 && coff_section_name_cmp(c, ls, a[i - 1u]) < 0) { - a[i] = a[i - 1u]; - --i; - } - a[i] = ls; - *n += 1u; -} - -/* ---- pass 1: bucket input sections, assemble bytes, assign deltas ---- - * CoffSecMap is defined above (alongside CoffTlsLayout) because the - * TLS planning helpers need to consume one. */ - -/* Build the four payload buckets (.text/.rdata/.data/.bss). - * - * `map[secid-1]` is populated for every kept LinkSection with the - * section's new RVA, new file offset, the bucket it landed in, and the - * delta to add to in-section vaddrs. Bucket buffers are - * heap-allocated; the caller frees them after emit. */ -static void coff_build_buckets(LinkImage* img, CoffSection out[COFF_NBUCKETS], - CoffSecMap* map) { - Heap* heap = img->heap; - Compiler* c = img->c; - const LinkSection** tls_sorted = NULL; - const LinkSection** crt_sorted = NULL; - u32 ntls_sorted = 0; - u32 ncrt_sorted = 0; - u32 i, b; - - for (b = 0; b < COFF_NBUCKETS; ++b) { - memset(&out[b], 0, sizeof(out[b])); - } - out[COFF_BUCKET_TEXT].name = ".text"; - out[COFF_BUCKET_TEXT].characteristics = - IMAGE_SCN_CNT_CODE | IMAGE_SCN_MEM_EXECUTE | IMAGE_SCN_MEM_READ; - out[COFF_BUCKET_TEXT].has_file_bytes = 1; - out[COFF_BUCKET_RDATA].name = ".rdata"; - out[COFF_BUCKET_RDATA].characteristics = - IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ; - out[COFF_BUCKET_RDATA].has_file_bytes = 1; - out[COFF_BUCKET_IDATA].name = ".idata"; - out[COFF_BUCKET_IDATA].characteristics = - IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ; - out[COFF_BUCKET_IDATA].has_file_bytes = 1; - out[COFF_BUCKET_DATA].name = ".data"; - out[COFF_BUCKET_DATA].characteristics = - IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | - IMAGE_SCN_MEM_WRITE; - out[COFF_BUCKET_DATA].has_file_bytes = 1; - /* The Windows loader uses .tls as a *template*: the bytes on disk - * seed each thread's per-TLS copy at thread creation, and threads - * write to their copies, not the template. The PE section is still - * marked writable because that's what mingw and link.exe emit; the - * loader special-cases it via the TLS directory. */ - out[COFF_BUCKET_TLS].name = ".tls"; - out[COFF_BUCKET_TLS].characteristics = - IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | - IMAGE_SCN_MEM_WRITE; - out[COFF_BUCKET_TLS].has_file_bytes = 1; - out[COFF_BUCKET_BSS].name = ".bss"; - out[COFF_BUCKET_BSS].characteristics = - IMAGE_SCN_CNT_UNINITIALIZED_DATA | IMAGE_SCN_MEM_READ | - IMAGE_SCN_MEM_WRITE; - out[COFF_BUCKET_BSS].has_file_bytes = 0; - out[COFF_BUCKET_RELOC].name = ".reloc"; - out[COFF_BUCKET_RELOC].characteristics = - IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | - IMAGE_SCN_MEM_DISCARDABLE; - out[COFF_BUCKET_RELOC].has_file_bytes = 1; - - /* Track per-bucket cursors. Bucket sizes are bounded by the sum of - * input section sizes plus per-section alignment padding; we grow - * lazily via VEC_GROW. */ - u64 bucket_cur[COFF_NBUCKETS]; - u32 bucket_cap[COFF_NBUCKETS]; - for (b = 0; b < COFF_NBUCKETS; ++b) { - bucket_cur[b] = 0; - bucket_cap[b] = 0; - } - - tls_sorted = img->nsections - ? (const LinkSection**)heap->alloc( - heap, sizeof(*tls_sorted) * img->nsections, - _Alignof(const LinkSection*)) - : NULL; - crt_sorted = img->nsections - ? (const LinkSection**)heap->alloc( - heap, sizeof(*crt_sorted) * img->nsections, - _Alignof(const LinkSection*)) - : NULL; - if (img->nsections && (!tls_sorted || !crt_sorted)) - compiler_panic(c, no_loc(), "link_emit_coff: oom sorting sections"); - - for (i = 0; i < img->nsections; ++i) { - const LinkSection* ls = &img->sections[i]; - if (!(ls->flags & SF_ALLOC)) continue; - if (ls->flags & SF_TLS) { - coff_insert_sorted_section(c, tls_sorted, &ntls_sorted, ls); - continue; - } - if (coff_section_name_starts(c, ls, ".CRT$")) { - coff_insert_sorted_section(c, crt_sorted, &ncrt_sorted, ls); - continue; - } - coff_place_section(img, out, map, bucket_cur, bucket_cap, ls); - } - - for (i = 0; i < ntls_sorted; ++i) { - coff_place_section(img, out, map, bucket_cur, bucket_cap, tls_sorted[i]); - } - for (i = 0; i < ncrt_sorted; ++i) { - coff_place_section(img, out, map, bucket_cur, bucket_cap, crt_sorted[i]); - } - - /* Track caps so we can free with the right size later (heap->free - * needs the original allocation size). Stash into size_raw - * temporarily — overwritten below with the proper PE value. */ - for (b = 0; b < COFF_NBUCKETS; ++b) out[b].size_raw = bucket_cap[b]; - if (tls_sorted) - heap->free(heap, tls_sorted, sizeof(*tls_sorted) * img->nsections); - if (crt_sorted) - heap->free(heap, crt_sorted, sizeof(*crt_sorted) * img->nsections); -} - -/* Assign RVAs and file offsets to the buckets that participate in the - * image. Returns the file offset at which trailing pad-to-EOF should - * land (== file size). */ -static u64 coff_assign_layout(CoffSection out[COFF_NBUCKETS], - u32 headers_file_size, - u32 first_section_rva) { - u32 rva = first_section_rva; - u64 file = ALIGN_UP((u64)headers_file_size, (u64)PE_FILE_ALIGNMENT); - u32 b; - for (b = 0; b < COFF_NBUCKETS; ++b) { - if (out[b].size == 0) { - out[b].in_image = 0; - out[b].rva = 0; - out[b].file_offset = 0; - out[b].size_raw = 0; - continue; - } - out[b].in_image = 1; - out[b].rva = (u32)ALIGN_UP((u64)rva, (u64)PE_SECTION_ALIGNMENT); - if (out[b].has_file_bytes) { - out[b].file_offset = (u32)file; - out[b].size_raw = (u32)ALIGN_UP((u64)out[b].size, - (u64)PE_FILE_ALIGNMENT); - file += out[b].size_raw; - } else { - out[b].file_offset = 0; - out[b].size_raw = 0; - } - rva = out[b].rva + out[b].size; - } - return file; -} - -/* Build the .reloc bytes by grouping absolute relocs by 4-KiB page. - * The map[] array maps LinkSectionId-1 to the per-section post-PE-relayout - * RVA, so we can compute each reloc's site_rva = section_rva + (orig - * write_vaddr - orig section_vaddr). - * - * Layout per page: - * u32 page_rva - * u32 size_of_block (8 + n_entries*2, padded to a multiple of 4) - * u16 entries[]: (type << 12) | (offset & 0xfff) - * optional trailing u16 = 0 (IMAGE_REL_BASED_ABSOLUTE) for u32 alignment */ -typedef struct CoffRelocEntry { - u32 site_rva; - u16 type; - u16 pad; -} CoffRelocEntry; - -static int coff_reloc_entry_cmp(const void* a, const void* b) { - const CoffRelocEntry* ea = (const CoffRelocEntry*)a; - const CoffRelocEntry* eb = (const CoffRelocEntry*)b; - if (ea->site_rva < eb->site_rva) return -1; - if (ea->site_rva > eb->site_rva) return 1; - return 0; -} - -static void coff_build_reloc_section(LinkImage* img, - const CoffSection out[COFF_NBUCKETS], - const CoffSecMap* map, - CoffSection* reloc, - const CoffRelocEntry* extras, - u32 n_extras) { - Heap* heap = img->heap; - Compiler* c = img->c; - u32 nrel = LinkRelocs_count(&img->relocs); - CoffRelocEntry* entries = NULL; - u32 nentries = 0; - u32 cap = 0; - u32 i; - - if (!img->pie) { - reloc->bytes = NULL; - reloc->size = 0; - return; - } - for (i = 0; i < nrel; ++i) { - const LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); - const LinkSection* ls; - u64 site_old_vaddr; - u32 site_rva; - u16 type; - if (!coff_reloc_needs_base_reloc(r->kind)) continue; - if (r->link_section_id == LINK_SEC_NONE) continue; - ls = &img->sections[r->link_section_id - 1]; - /* r->write_vaddr is in the pre-relayout coordinate system (same as - * ls->vaddr), so the offset into the section is stable. Add the - * containing bucket's final RVA to land at the image RVA. */ - site_old_vaddr = r->write_vaddr; - u8 sb = map[ls->id - 1].bucket; - site_rva = out[sb].rva + map[ls->id - 1].new_rva + - (u32)(site_old_vaddr - ls->vaddr); - if (r->kind == R_ABS64) { - type = (u16)IMAGE_REL_BASED_DIR64; - } else { - type = (u16)IMAGE_REL_BASED_HIGHLOW; - } - if (nentries == cap) { - (void)VEC_GROW(heap, entries, cap, nentries + 1u); - } - entries[nentries].site_rva = site_rva; - entries[nentries].type = type; - entries[nentries].pad = 0; - ++nentries; - } - /* Append caller-supplied extras (TLS directory absolute-VA fields, - * etc.). These are already site-RVAs in the final image. */ - for (i = 0; i < n_extras; ++i) { - if (nentries == cap) { - (void)VEC_GROW(heap, entries, cap, nentries + 1u); - } - entries[nentries] = extras[i]; - ++nentries; - } - if (nentries == 0) { - reloc->bytes = NULL; - reloc->size = 0; - if (entries) heap->free(heap, entries, cap * sizeof(*entries)); - (void)c; - return; - } - /* Sort entries by RVA so we can group runs sharing a 4-KiB page. */ - qsort(entries, nentries, sizeof(*entries), coff_reloc_entry_cmp); - - /* Two-pass: first compute the total size (so we can allocate the - * blob exactly), then emit. */ - u32 blob_size = 0; - u32 run_start = 0; - while (run_start < nentries) { - u32 page = entries[run_start].site_rva & ~0xfffu; - u32 run_end = run_start; - while (run_end < nentries && - (entries[run_end].site_rva & ~0xfffu) == page) { - ++run_end; - } - u32 n = run_end - run_start; - u32 block = COFF_BASE_RELOCATION_SIZE + n * 2u; - block = (u32)ALIGN_UP((u64)block, 4ull); - blob_size += block; - run_start = run_end; - } - reloc->bytes = (u8*)heap->alloc(heap, blob_size, 4); - if (!reloc->bytes && blob_size) - compiler_panic(c, no_loc(), "link_emit_coff: oom on .reloc blob"); - memset(reloc->bytes, 0, blob_size); - reloc->size = blob_size; - /* Stash allocation size for free path. */ - reloc->size_raw = blob_size; - - u32 cursor = 0; - run_start = 0; - while (run_start < nentries) { - u32 page = entries[run_start].site_rva & ~0xfffu; - u32 run_end = run_start; - while (run_end < nentries && - (entries[run_end].site_rva & ~0xfffu) == page) { - ++run_end; - } - u32 n = run_end - run_start; - u32 raw_size = COFF_BASE_RELOCATION_SIZE + n * 2u; - u32 block = (u32)ALIGN_UP((u64)raw_size, 4ull); - u8* p = reloc->bytes + cursor; - wr_u32_le(p, page); - wr_u32_le(p + 4, block); - u32 k; - for (k = 0; k < n; ++k) { - u16 entry = (u16)(((u16)entries[run_start + k].type << 12) | - (entries[run_start + k].site_rva & 0xfffu)); - wr_u16_le(p + 8 + k * 2u, entry); - } - /* Optional trailing pad: a single IMAGE_REL_BASED_ABSOLUTE (0). */ - if (block > raw_size) { - wr_u16_le(p + 8 + n * 2u, 0); - } - cursor += block; - run_start = run_end; - } - heap->free(heap, entries, cap * sizeof(*entries)); -} - -/* Patch each LinkRelocApply against the PE-relayout coordinates and - * apply. `bucket_bytes[bucket]` gives the writable buffer for that - * bucket; the per-section delta in map[] turns the old in-section - * offsets into bucket-local offsets. - * - * Imported targets (LinkSymbol.imported == 1) have no vaddr of their - * own — instead the .idata pass populated `iv->by_sym[id-1]` with the - * function stub's vaddr (for callable imports) or the IAT slot's - * vaddr (for data imports). This is the spot where that table is - * consulted in lieu of the symbol's own zero vaddr. */ -static void coff_apply_all_relocs(LinkImage* img, - const CoffSection out[COFF_NBUCKETS], - const CoffSecMap* map, - const CoffImportVaddr* iv) { - Compiler* c = img->c; - u32 i; - u64 img_base = PE_IMAGE_BASE; - u32 nrel = LinkRelocs_count(&img->relocs); - for (i = 0; i < nrel; ++i) { - LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); - const LinkSymbol* tgt = LinkSyms_at(&img->syms, r->target - 1); - const LinkSection* sec; - const LinkSection* tgt_sec; - u64 S, P; - u8* P_bytes; - u8 bucket; - u32 site_off_in_sec; - u32 site_bucket_off; - if (r->link_section_id == LINK_SEC_NONE) continue; - sec = &img->sections[r->link_section_id - 1]; - bucket = map[sec->id - 1].bucket; - if (!out[bucket].has_file_bytes || !out[bucket].bytes) { - /* Shouldn't happen — .bss has no relocations. */ - continue; - } - site_off_in_sec = (u32)(r->write_vaddr - sec->vaddr); - site_bucket_off = map[sec->id - 1].new_rva + site_off_in_sec; - P_bytes = out[bucket].bytes + site_bucket_off; - /* P = ImageBase + bucket_rva + map[].new_rva + site_off_in_sec - * — i.e. the final runtime address of the patch site. */ - P = img_base + (u64)out[bucket].rva + - (u64)map[sec->id - 1].new_rva + site_off_in_sec; - - /* Resolve S: target symbol's new image-relative address. Look up - * the LinkSection that contains the symbol's original vaddr, then - * apply that section's delta. */ - if (tgt->imported) { - /* IAT-routed: stub vaddr (functions) / slot vaddr (data). */ - if (!iv || iv->by_sym[r->target - 1u] == 0) - compiler_panic(c, no_loc(), - "link_emit_coff: imported target lacks IAT slot"); - S = iv->by_sym[r->target - 1u]; - } else if (tgt->kind == SK_ABS) { - S = tgt->vaddr; - } else if (tgt->defined) { - tgt_sec = coff_symbol_section(img, tgt); - if (!tgt_sec) { - compiler_panic(c, no_loc(), - "link_emit_coff: symbol vaddr 0x%llx has no " - "containing section", - (unsigned long long)tgt->vaddr); - } - u8 tb = map[tgt_sec->id - 1].bucket; - u64 sym_off = tgt->vaddr - tgt_sec->vaddr; - S = img_base + (u64)out[tb].rva + - (u64)map[tgt_sec->id - 1].new_rva + sym_off; - } else { - /* Undef and not imported — shouldn't survive resolve_undefs. */ - compiler_panic(c, no_loc(), - "link_emit_coff: unresolved non-imported symbol"); - } - /* COFF-only section-relative kinds: the SECREL value is the - * symbol's offset from the start of its containing output section - * (PE bucket), and SECTION is the 1-based PE section index. - * link_reloc_apply only sees S and P, so we patch these inline - * before delegating common kinds. */ - if (r->kind == R_COFF_SECREL || r->kind == R_COFF_SECTION || - r->kind == R_COFF_AARCH64_SECREL_LOW12A || - r->kind == R_COFF_AARCH64_SECREL_HIGH12A) { - if (!tgt->defined || tgt->kind == SK_ABS) { - compiler_panic(c, no_loc(), - "link_emit_coff: COFF SECREL/SECTION requires a " - "defined section-bound target symbol"); - } - u8 tb = map[tgt_sec->id - 1].bucket; - u64 sym_off_in_bucket = - (u64)map[tgt_sec->id - 1].new_rva + (tgt->vaddr - tgt_sec->vaddr); - if (r->kind == R_COFF_SECREL) { - u64 v = sym_off_in_bucket + (u64)r->addend; - wr_u32_le(P_bytes, (u32)(v & 0xffffffffu)); - } else if (r->kind == R_COFF_SECTION) { - /* PE section indices are 1-based; buckets are 0-based, so add 1. */ - wr_u16_le(P_bytes, (u16)((tb + 1u) & 0xffffu)); - } else { - /* AArch64 SECREL_{LOW,HIGH}12A: patch the imm12 field of an - * existing ADD-imm12 instruction. LOW12A = bits [11:0] of the - * SECREL; HIGH12A = bits [23:12]. The instruction's sh bit was - * already set by the codegen (0 for LOW, 1 for HIGH). */ - u64 v = sym_off_in_bucket + (u64)r->addend; - u32 imm12 = (r->kind == R_COFF_AARCH64_SECREL_HIGH12A) - ? (u32)((v >> 12) & 0xfffu) - : (u32)(v & 0xfffu); - u32 instr = rd_u32_le(P_bytes); - instr = (instr & ~(0xfffu << 10)) | (imm12 << 10); - wr_u32_le(P_bytes, instr); - } - continue; - } - link_reloc_apply(c, r->kind, P_bytes, S, r->addend, P); - } -} - -/* ---- header marshalling ---- - * - * Each helper streams its on-disk shape to the writer field-by-field; - * we avoid sizeof(struct) on the packed PE wire types since they carry - * implicit-padding hazards on hosts that disagree with #pragma pack(1) - * defaults. */ - -static void coff_write_dos_stub(Writer* w) { - u8 buf[PE_DOS_HDR_SIZE]; - memset(buf, 0, sizeof(buf)); - /* e_magic ("MZ") + e_lfanew (offset of PE signature). All other - * legacy fields zero. */ - buf[0] = (u8)(IMAGE_DOS_SIGNATURE & 0xffu); - buf[1] = (u8)((IMAGE_DOS_SIGNATURE >> 8) & 0xffu); - wr_u32_le(buf + 0x3c, PE_DOS_E_LFANEW); - cfree_writer_write(w, buf, sizeof(buf)); -} - -static void coff_write_file_header(Writer* w, u16 machine, u16 nsec, - u16 characteristics) { - coff_wr_u16(w, machine); - coff_wr_u16(w, nsec); - coff_wr_u32(w, 0u); /* TimeDateStamp */ - coff_wr_u32(w, 0u); /* PointerToSymbolTable */ - coff_wr_u32(w, 0u); /* NumberOfSymbols */ - coff_wr_u16(w, (u16)PE_OPT_HDR_SIZE); /* SizeOfOptionalHeader */ - coff_wr_u16(w, characteristics); -} - -/* Per-section meta used by both the data-directory fill and the - * IMAGE_SECTION_HEADER emit. Compactly captures everything the writer - * needs to know about the four-or-five output sections. */ -typedef struct CoffOutHdr { - const char* name; - u32 vsize; - u32 rva; - u32 size_raw; - u32 file_offset; - u32 characteristics; -} CoffOutHdr; - -static void coff_write_optional_header(Writer* w, u32 entry_rva, - const CoffSection out[COFF_NBUCKETS], - u32 headers_size_padded, - u32 image_size, - int pie, - u16 subsystem, - const CoffImportTable* it, - const CoffTlsLayout* tls) { - /* Standard fields. */ - coff_wr_u16(w, IMAGE_NT_OPTIONAL_HDR64_MAGIC); - coff_wr_u8(w, PE_LINKER_MAJOR); - coff_wr_u8(w, PE_LINKER_MINOR); - /* SizeOfCode / SizeOfInitializedData / SizeOfUninitializedData. */ - u32 size_code = out[COFF_BUCKET_TEXT].in_image - ? out[COFF_BUCKET_TEXT].size_raw : 0; - u32 size_init = (out[COFF_BUCKET_RDATA].in_image - ? out[COFF_BUCKET_RDATA].size_raw : 0) + - (out[COFF_BUCKET_DATA].in_image - ? out[COFF_BUCKET_DATA].size_raw : 0); - u32 size_uninit = out[COFF_BUCKET_BSS].in_image - ? out[COFF_BUCKET_BSS].size : 0; - coff_wr_u32(w, size_code); - coff_wr_u32(w, size_init); - coff_wr_u32(w, size_uninit); - coff_wr_u32(w, entry_rva); - coff_wr_u32(w, out[COFF_BUCKET_TEXT].in_image - ? out[COFF_BUCKET_TEXT].rva : 0); - /* Windows-specific fields. */ - coff_wr_u64(w, PE_IMAGE_BASE); - coff_wr_u32(w, PE_SECTION_ALIGNMENT); - coff_wr_u32(w, PE_FILE_ALIGNMENT); - coff_wr_u16(w, PE_OS_MAJOR); - coff_wr_u16(w, PE_OS_MINOR); - coff_wr_u16(w, 0u); /* MajorImageVersion */ - coff_wr_u16(w, 0u); /* MinorImageVersion */ - coff_wr_u16(w, PE_SUBSYS_MAJOR); - coff_wr_u16(w, PE_SUBSYS_MINOR); - coff_wr_u32(w, 0u); /* Win32VersionValue */ - coff_wr_u32(w, image_size); - coff_wr_u32(w, headers_size_padded); - coff_wr_u32(w, 0u); /* CheckSum */ - coff_wr_u16(w, subsystem ? subsystem : IMAGE_SUBSYSTEM_WINDOWS_CUI); - coff_wr_u16(w, PE_DLL_CHARS); - coff_wr_u64(w, PE_STACK_RESERVE); - coff_wr_u64(w, PE_STACK_COMMIT); - coff_wr_u64(w, PE_HEAP_RESERVE); - coff_wr_u64(w, PE_HEAP_COMMIT); - coff_wr_u32(w, 0u); /* LoaderFlags */ - coff_wr_u32(w, (u32)PE_NUM_DATA_DIRS); - /* DataDirectory[16]. Populated entries: - * [1] IMPORT — descriptor table RVA + total descriptor bytes - * [5] BASERELOC — when PIE and .reloc is in the image - * [12] IAT — first IAT block RVA + sum of per-DLL IAT sizes - * Everything else stays zero. */ - u32 i; - int has_idata = it && it->nimports > 0 && - out[COFF_BUCKET_IDATA].in_image; - for (i = 0; i < PE_NUM_DATA_DIRS; ++i) { - if (i == IMAGE_DIRECTORY_ENTRY_IMPORT && has_idata) { - coff_wr_u32(w, out[COFF_BUCKET_IDATA].rva + it->desc_off); - coff_wr_u32(w, it->desc_size); - } else if (i == IMAGE_DIRECTORY_ENTRY_IAT && has_idata) { - coff_wr_u32(w, out[COFF_BUCKET_IDATA].rva + it->iat_base); - coff_wr_u32(w, it->iat_total); - } else if (i == IMAGE_DIRECTORY_ENTRY_BASERELOC && pie && - out[COFF_BUCKET_RELOC].in_image) { - coff_wr_u32(w, out[COFF_BUCKET_RELOC].rva); - coff_wr_u32(w, out[COFF_BUCKET_RELOC].size); - } else if (i == IMAGE_DIRECTORY_ENTRY_TLS && tls && tls->present) { - coff_wr_u32(w, out[COFF_BUCKET_RDATA].rva + tls->dir_rdata_off); - coff_wr_u32(w, COFF_TLS_DIRECTORY64_SIZE); - } else { - coff_wr_u32(w, 0u); - coff_wr_u32(w, 0u); - } - } -} - -static void coff_write_section_header(Writer* w, const char* name, - u32 vsize, u32 rva, u32 size_raw, - u32 file_offset, - u32 characteristics) { - u8 nm[8] = {0, 0, 0, 0, 0, 0, 0, 0}; - size_t n = slice_from_cstr(name).len; - if (n > 8) n = 8; - memcpy(nm, name, n); - cfree_writer_write(w, nm, 8); - coff_wr_u32(w, vsize); - coff_wr_u32(w, rva); - coff_wr_u32(w, size_raw); - coff_wr_u32(w, file_offset); - coff_wr_u32(w, 0u); /* PointerToRelocations */ - coff_wr_u32(w, 0u); /* PointerToLinenumbers */ - coff_wr_u16(w, 0u); /* NumberOfRelocations */ - coff_wr_u16(w, 0u); /* NumberOfLinenumbers */ - coff_wr_u32(w, characteristics); -} - -/* ---- main entry ---- */ - -void link_emit_coff(LinkImage* img, Writer* w) { - Heap* heap = img->heap; - Compiler* c = img->c; - u16 machine = coff_machine_or_panic(c); - if (img->entry_sym == LINK_SYM_NONE) - compiler_panic(c, no_loc(), - "link_emit_coff: no resolved entry symbol"); - - /* ---- pass 1: build buckets + per-section delta map ---- */ - CoffSection out[COFF_NBUCKETS]; - CoffSecMap* map = (CoffSecMap*)heap->alloc( - heap, sizeof(CoffSecMap) * (img->nsections + 1u), - _Alignof(CoffSecMap)); - if (!map && img->nsections) - compiler_panic(c, no_loc(), "link_emit_coff: oom on section map"); - memset(map, 0, sizeof(CoffSecMap) * (img->nsections + 1u)); - - /* coff_build_buckets stashes per-bucket allocation caps in size_raw; - * we read them back into a local before size_raw is recomputed by - * coff_assign_layout so the cleanup path can free with the right - * size. */ - coff_build_buckets(img, out, map); - /* coff_build_buckets stashes per-bucket allocation caps in size_raw - * (the only bucket field we own for the duration of layout); read - * them out before coff_assign_layout overwrites the field. .reloc - * and .idata aren't touched by coff_build_buckets — their caps are - * filled in below once coff_build_reloc_section / coff_emit_idata - * run. */ - u32 bucket_caps[COFF_NBUCKETS]; - u32 b; - for (b = 0; b < COFF_NBUCKETS; ++b) bucket_caps[b] = out[b].size_raw; - - /* ---- pass 1b: collect imports and reserve .idata + .text stubs ---- - * - * Builds the per-DLL / per-import layout and appends one IAT-routing - * stub per imported function to the .text bucket. The .idata bucket - * size is set here (so it counts in nsec); the stub vaddrs and - * IAT-slot vaddrs are finalised after coff_assign_layout. */ - CoffImportTable imports; - int have_imports = coff_collect_imports(img, &imports); - if (have_imports) { - coff_plan_idata_layout(img, &imports); - coff_append_stubs(img, &imports, &out[COFF_BUCKET_TEXT], - &bucket_caps[COFF_BUCKET_TEXT]); - /* Reserve the .idata bucket size so coff_assign_layout / nsec - * accounting sees it. Actual bytes are written by coff_emit_idata - * once the bucket RVA is known. */ - out[COFF_BUCKET_IDATA].size = imports.idata_size; - } - - /* ---- pass 1c: plan the TLS directory record ---- - * - * If any SF_TLS sections survived, reserve 40 bytes at the tail of - * .rdata for the IMAGE_TLS_DIRECTORY64. Bytes are zeroed now and - * filled in by coff_emit_tls_dir once the bucket RVAs are final. */ - CoffTlsLayout tls; - coff_plan_tls_layout(img, out, &bucket_caps[COFF_BUCKET_RDATA], &tls); - - /* ---- pass 2: decide whether .reloc will be in the image ---- - * - * The headers' file size (and therefore every section's file - * offset) depends on the section-table entry count, so we need to - * commit to "is .reloc emitted?" before laying out file offsets. - * .reloc lights up iff PIE and at least one absolute reloc points - * into a kept section, OR a TLS directory is emitted (its four u64 - * VA fields all need base-relocs). */ - int emit_reloc = 0; - if (img->pie) { - u32 i; - u32 nrel = LinkRelocs_count(&img->relocs); - for (i = 0; i < nrel; ++i) { - const LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); - if (!coff_reloc_needs_base_reloc(r->kind)) continue; - if (r->link_section_id == LINK_SEC_NONE) continue; - emit_reloc = 1; - break; - } - if (!emit_reloc && tls.present) emit_reloc = 1; - } - - u32 nsec = 0; - for (b = 0; b < COFF_NBUCKETS; ++b) { - if (b == COFF_BUCKET_RELOC) { - if (emit_reloc) ++nsec; /* tentative; size set below */ - continue; - } - if (out[b].size) ++nsec; - } - u32 headers_size_unpadded = - PE_DOS_HDR_SIZE + PE_SIG_SIZE + PE_FILE_HDR_SIZE + PE_OPT_HDR_SIZE + - nsec * PE_SECTION_HDR_SIZE; - u32 headers_size_padded = - (u32)ALIGN_UP((u64)headers_size_unpadded, (u64)PE_FILE_ALIGNMENT); - - /* First layout pass: fixes RVAs / file offsets for buckets that - * already have a finalised size (.text, .rdata, .idata, .data, .bss). - * .reloc's RVA is provisional — it depends on .reloc's own size, - * which is still 0 at this point. */ - (void)coff_assign_layout(out, headers_size_padded, PE_FIRST_SECTION_RVA); - - /* ---- pass 2b: emit .idata bytes + per-arch IAT stubs ---- - * - * The .idata bucket's RVA is final after the first assign_layout; - * stubs need it (the indirect-jump displacement targets an IAT slot) - * and .idata's own descriptor / ILT / IAT records all carry RVAs. - * coff_import_vaddr_build builds the per-LinkSymId override table - * that apply_all_relocs consults in place of the (zero) symbol - * vaddr for imported targets. */ - CoffImportVaddr import_vaddr; - memset(&import_vaddr, 0, sizeof(import_vaddr)); - if (have_imports) { - coff_emit_idata(img, &imports, out, &bucket_caps[COFF_BUCKET_IDATA]); - coff_emit_stubs(img, &imports, out); - coff_import_vaddr_build(img, &imports, out, &import_vaddr); - } - - /* Write the TLS directory bytes now that bucket RVAs are final. */ - coff_emit_tls_dir(img, out, map, &tls); - - /* ---- pass 3: build .reloc using the now-final bucket RVAs ---- - * - * coff_build_reloc_section reads out[bucket].rva indirectly via - * map[].new_rva + (write_vaddr - sec->vaddr) → site offset within - * the bucket; the absolute site_rva is bucket.rva + that offset. - * Patch site RVAs are page-quantised in the emitted blob, so this - * is the spot where the bucket RVAs need to be already final. - * - * TLS directory's four absolute-VA fields ride into the entries via - * the `extras` array — they aren't ordinary symbol relocations, so - * they don't show up in img->relocs. */ - if (emit_reloc) { - CoffRelocEntry tls_extras[4]; - u32 n_tls_extras = 0; - if (tls.present) { - u32 dir_rva = out[COFF_BUCKET_RDATA].rva + tls.dir_rdata_off; - static const u32 field_offs[4] = { - COFF_TLSDIR_OFF_START_ADDR, COFF_TLSDIR_OFF_END_ADDR, - COFF_TLSDIR_OFF_INDEX_ADDR, COFF_TLSDIR_OFF_CALLBACKS, - }; - u32 k; - for (k = 0; k < 4; ++k) { - if (field_offs[k] == COFF_TLSDIR_OFF_CALLBACKS && - !tls.callbacks_sym) - continue; - tls_extras[n_tls_extras].site_rva = dir_rva + field_offs[k]; - tls_extras[n_tls_extras].type = (u16)IMAGE_REL_BASED_DIR64; - tls_extras[n_tls_extras].pad = 0; - ++n_tls_extras; - } - } - coff_build_reloc_section(img, out, map, &out[COFF_BUCKET_RELOC], - tls_extras, n_tls_extras); - bucket_caps[COFF_BUCKET_RELOC] = out[COFF_BUCKET_RELOC].size_raw; - /* size_raw was stashed by build; assign_layout below recomputes it - * as the FileAlignment-padded length. */ - (void)coff_assign_layout(out, headers_size_padded, - PE_FIRST_SECTION_RVA); - } - - /* `_tls_used` is the public mingw/PE name for the TLS directory - * record. Keep it in lockstep with the optional-header TLS data - * directory, rather than leaving references bound to mingw's tlssup.o - * placeholder record. */ - coff_define_tls_used(img, out, &tls); - - /* ---- pass 4: resolve entry symbol's PE RVA ---- - * - * Done before apply so the optional-header field has its final - * value. */ - const LinkSymbol* entry_sym = - LinkSyms_at(&img->syms, img->entry_sym - 1); - if (!entry_sym->defined || entry_sym->kind == SK_ABS) - compiler_panic(c, no_loc(), - "link_emit_coff: entry symbol is not a defined " - "image-relative function"); - const LinkSection* entry_sec = coff_section_at(img, entry_sym->vaddr); - if (!entry_sec) - compiler_panic(c, no_loc(), - "link_emit_coff: entry symbol has no containing " - "section"); - u8 entry_bucket = map[entry_sec->id - 1].bucket; - u32 entry_rva = out[entry_bucket].rva + - map[entry_sec->id - 1].new_rva + - (u32)(entry_sym->vaddr - entry_sec->vaddr); - - /* ---- pass 5: apply all relocations into bucket bytes ---- */ - coff_apply_all_relocs(img, out, map, - have_imports ? &import_vaddr : NULL); - - /* ---- pass 6: compute SizeOfImage (in-memory size) ---- */ - u32 image_size = 0; - for (b = 0; b < COFF_NBUCKETS; ++b) { - if (!out[b].in_image) continue; - u32 end = out[b].rva + out[b].size; - if (end > image_size) image_size = end; - } - image_size = (u32)ALIGN_UP((u64)image_size, (u64)PE_SECTION_ALIGNMENT); - - /* ---- pass 7: write everything ---- */ - u16 file_chars = IMAGE_FILE_EXECUTABLE_IMAGE | - IMAGE_FILE_LARGE_ADDRESS_AWARE; - if (!img->pie || !out[COFF_BUCKET_RELOC].in_image) { - file_chars |= IMAGE_FILE_RELOCS_STRIPPED; - } - - coff_write_dos_stub(w); - /* PE signature. */ - coff_wr_u32(w, IMAGE_NT_SIGNATURE); - coff_write_file_header(w, machine, (u16)nsec, file_chars); - u16 subsystem = img->linker ? img->linker->pe_subsystem : 0; - coff_write_optional_header(w, entry_rva, out, headers_size_padded, - image_size, img->pie, subsystem, - have_imports ? &imports : NULL, &tls); - - /* Section table. */ - for (b = 0; b < COFF_NBUCKETS; ++b) { - if (!out[b].in_image) continue; - coff_write_section_header(w, out[b].name, out[b].size, out[b].rva, - out[b].size_raw, out[b].file_offset, - out[b].characteristics); - } - - /* Pad to first section's file offset. */ - u64 cur = (u64)headers_size_unpadded; - u64 first_file_off = headers_size_padded; - if (cur < first_file_off) { - coff_write_zeroes(w, first_file_off - cur); - cur = first_file_off; - } - - /* Section bodies. */ - for (b = 0; b < COFF_NBUCKETS; ++b) { - if (!out[b].in_image) continue; - if (!out[b].has_file_bytes) continue; - if (cur < out[b].file_offset) { - coff_write_zeroes(w, out[b].file_offset - cur); - cur = out[b].file_offset; - } - cfree_writer_write(w, out[b].bytes, out[b].size); - cur += out[b].size; - if (out[b].size_raw > out[b].size) { - coff_write_zeroes(w, out[b].size_raw - out[b].size); - cur += out[b].size_raw - out[b].size; - } - } - - /* ---- cleanup ---- */ - for (b = 0; b < COFF_NBUCKETS; ++b) { - if (out[b].bytes) heap->free(heap, out[b].bytes, bucket_caps[b]); - } - heap->free(heap, map, sizeof(CoffSecMap) * (img->nsections + 1u)); - if (have_imports) { - coff_import_vaddr_free(img, &import_vaddr); - coff_imports_free(img, &imports); - } -} diff --git a/src/link/link_dyn.c b/src/link/link_dyn.c @@ -1,982 +0,0 @@ -/* Phase 4 of dynamic linking: synthesize the dyn-link tables and - * sections an ET_DYN ELF exe needs to be loadable by a real runtime - * loader (musl ld-musl-aarch64.so.1). - * - * Inputs (computed by earlier passes): - * - LinkSymbol entries with `imported = 1` (set by resolve_undefs's - * DSO-search path; their dso_input_id names the providing DSO). - * - LinkInputs of kind LINK_INPUT_DSO_BYTES carrying SONAMEs. - * - * Outputs (deposited on LinkImage.dyn): - * - .interp PT_INTERP target string - * - .dynsym + .dynstr symbol table + name pool - * - .gnu.hash GNU-style hash for the loader - * - .rela.dyn GLOB_DAT (data imports) + space for - * R_AARCH64_RELATIVE records that - * Phase 6 emit fills in - * - .rela.plt JUMP_SLOT records (one per imported func) - * - .plt allocated, body NOT emitted (Phase 5) - * - .got.plt 3 reserved slots + 1 per PLT slot, - * allocated, body NOT emitted - * - .dynamic PT_DYNAMIC body, populated - * - * The .plt body / GOT-slot fill / CALL26 reloc rewriting are Phase 5; - * they're called out at the relevant allocation site so the missing - * pieces are obvious to anyone reading the output. The static-exe path - * is unaffected — layout_dyn early-outs when emit_pie is 0. - * - * Allocator pattern follows layout_iplt (link_layout.c): grow segments - * + sections via realloc, then page-align each new segment after the - * existing image span. Synthetic sections carry input_id == LINK_INPUT_NONE - * so downstream passes (emit_reloc_records, GC) leave them alone. - */ - -#include <string.h> - -#include "core/bytes.h" -#include "core/heap.h" -#include "core/pool.h" -#include "core/slice.h" -#include "core/util.h" -#include "core/vec.h" -#include "link/link.h" -#include "link/link_arch.h" -#include "link/link_internal.h" -#include "obj/elf.h" - -static SrcLoc no_loc(void) { - SrcLoc l = {0, 0, 0}; - return l; -} - -/* ---- small allocators (mirror layout_iplt's helpers) ---- */ - -static u32 dyn_alloc_segments(LinkImage* img, u32 nseg) { - Heap* h = img->heap; - u32 base = img->nsegments; - u32 new_nseg = base + nseg; - LinkSegment* nsegs = (LinkSegment*)h->realloc( - h, img->segments, sizeof(*img->segments) * img->nsegments, - sizeof(*img->segments) * new_nseg, _Alignof(LinkSegment)); - u8** nsbufs = (u8**)h->realloc( - h, img->segment_bytes, sizeof(*img->segment_bytes) * img->nsegments, - sizeof(*img->segment_bytes) * new_nseg, _Alignof(u8*)); - size_t* nscaps = (size_t*)h->realloc( - h, img->segment_bytes_cap, - sizeof(*img->segment_bytes_cap) * img->nsegments, - sizeof(*img->segment_bytes_cap) * new_nseg, _Alignof(size_t)); - if (!nsegs || !nsbufs || !nscaps) - compiler_panic(img->c, no_loc(), "link: oom on dyn segments"); - img->segments = nsegs; - img->segment_bytes = nsbufs; - img->segment_bytes_cap = nscaps; - return base; -} - -static u32 dyn_alloc_sections(LinkImage* img, u32 nsec) { - Heap* h = img->heap; - u32 base = img->nsections; - u32 new_nsec = base + nsec; - LinkSection* nsections = (LinkSection*)h->realloc( - h, img->sections, sizeof(*img->sections) * img->nsections, - sizeof(*img->sections) * new_nsec, _Alignof(LinkSection)); - if (!nsections) compiler_panic(img->c, no_loc(), "link: oom on dyn sections"); - img->sections = nsections; - return base; -} - -/* ---- byte-builder for .dynstr / .gnu.hash ---- */ - -typedef struct ByteBuf { - Heap* heap; - u8* data; - u32 len; - u32 cap; -} ByteBuf; - -static void bb_init(ByteBuf* b, Heap* h) { - b->heap = h; - b->data = NULL; - b->len = 0; - b->cap = 0; -} -static void bb_reserve(ByteBuf* b, u32 need) { - if (need <= b->cap) return; - (void)VEC_GROW(b->heap, b->data, b->cap, need); -} -static u32 bb_append(ByteBuf* b, const void* src, u32 n) { - u32 off = b->len; - bb_reserve(b, b->len + n); - if (n) memcpy(b->data + b->len, src, n); - b->len += n; - return off; -} -static u32 bb_append_str(ByteBuf* b, const char* s, u32 n) { - /* Linear dedup over what we've appended so far. Strtabs are small. */ - if (n == 0) return 0; - if (b->len > n) { - u32 i; - for (i = 0; i + n < b->len; ++i) { - if (b->data[i + n] == 0 && memcmp(b->data + i, s, n) == 0) return i; - } - } - u32 off = b->len; - bb_reserve(b, b->len + n + 1u); - memcpy(b->data + b->len, s, n); - b->data[b->len + n] = 0; - b->len += n + 1u; - return off; -} - -/* ---- GNU-hash computation (psABI v1 hash) ---- - * Body layout: - * u32 nbuckets - * u32 symoffset (first hashed dynsym index) - * u32 bloom_size (in 64-bit words) - * u32 bloom_shift - * u64 bloom[bloom_size] - * u32 buckets[nbuckets] - * u32 chains[ndynsym - symoffset] - * - * For Phase 4 we keep this very small: nbuckets = max(1, n/2), - * bloom_size = 1, bloom_shift = 6 (64-bit ELFCLASS64). All hashed - * symbols (sym_offset..ndynsym-1) participate in bloom + buckets + - * chains. Slot 0..symoffset-1 are STN_UNDEF + locals, which the - * loader doesn't hash. */ - -static u32 gnu_hash_name(const char* s, u32 n) { - /* h = 5381; for c in s: h = h * 33 + c */ - u32 h = 5381u; - u32 i; - for (i = 0; i < n; ++i) h = (h * 33u) + (u8)s[i]; - return h; -} - -/* ---- partition: enumerate imports ---- - * - * Walks LinkSyms and collects each `imported` symbol that's the - * canonical entry in img->globals (resolve_undefs may stamp `imported` - * onto multiple shadow slots of the same name; only the canonical one - * lands in dynsym). The two output arrays are LinkSymIds: funcs first - * (PLT-bound), then data (GOT-bound via GLOB_DAT). */ - -typedef struct ImportLists { - LinkSymId* funcs; - u32 nfuncs; - LinkSymId* datas; - u32 ndatas; -} ImportLists; - -static int sym_is_func_import(const LinkSymbol* s) { - /* Most undef shadows have kind = SK_UNDEF (the obj reader keys kind - * off shndx, not STT_*). Only useful when the canonical entry - * carried a real type — fall through to the DSO lookup otherwise. */ - return s->kind == SK_FUNC || s->kind == SK_IFUNC; -} - -/* Resolve an import's classifier kind by consulting its providing - * DSO's dynsym. read_elf_dso preserves STT_FUNC / STT_OBJECT / etc. - * on each defined export; the consumer's undef may have arrived as - * SK_UNDEF (clang emits external refs as SHN_UNDEF, which the reader - * collapses to SK_UNDEF regardless of STT_*). Returns 1 for func / - * ifunc, 0 for everything else (or if the DSO export is missing). */ -static int dso_export_is_func(Linker* l, const LinkSymbol* s) { - if (s->dso_input_id == LINK_INPUT_NONE) return 0; - if (s->dso_input_id - 1u >= LinkInputs_count(&l->inputs)) return 0; - LinkInput* in = LinkInputs_at(&l->inputs, s->dso_input_id - 1u); - if (!in->obj) return 0; - ObjSymIter* it = obj_symiter_new(in->obj); - ObjSymEntry e; - int is_func = 0; - while (obj_symiter_next(it, &e)) { - const ObjSym* es = e.sym; - if (!es || es->name != s->name) continue; - if (es->kind == SK_UNDEF) continue; - is_func = (es->kind == SK_FUNC || es->kind == SK_IFUNC); - break; - } - obj_symiter_free(it); - return is_func; -} - -static void collect_imports(Linker* l, LinkImage* img, Heap* h, - ImportLists* il) { - u32 i; - u32 cap_f = 0, cap_d = 0; - il->funcs = NULL; - il->datas = NULL; - il->nfuncs = il->ndatas = 0; - for (i = 0; i < LinkSyms_count(&img->syms); ++i) { - LinkSymbol* s = LinkSyms_at(&img->syms, i); - if (!s->imported) continue; - if (s->name == 0) continue; - /* Only the canonical (img->globals) entry per name. */ - LinkSymId canonical = symhash_get(&img->globals, s->name); - if (canonical != LINK_SYM_NONE && canonical != s->id) continue; - int is_func = sym_is_func_import(s) || dso_export_is_func(l, s); - if (is_func) { - if (VEC_GROW(h, il->funcs, cap_f, il->nfuncs + 1u)) - compiler_panic(img->c, no_loc(), "link: oom on import-funcs"); - il->funcs[il->nfuncs++] = s->id; - } else { - if (VEC_GROW(h, il->datas, cap_d, il->ndatas + 1u)) - compiler_panic(img->c, no_loc(), "link: oom on import-datas"); - il->datas[il->ndatas++] = s->id; - } - } -} - -static void free_imports(Heap* h, ImportLists* il) { - if (il->funcs) h->free(h, il->funcs, sizeof(*il->funcs) * il->nfuncs); - if (il->datas) h->free(h, il->datas, sizeof(*il->datas) * il->ndatas); -} - -/* ---- DT_NEEDED set: each DSO input that contributed at least one - * import. Order is input order so the loader sees deps in declaration - * order. */ -static void collect_needed(Linker* l, LinkImage* img, LinkDynState* dyn) { - Heap* h = img->heap; - u8* used; - u32 ninputs = LinkInputs_count(&l->inputs); - u32 i, nused = 0; - - used = (u8*)h->alloc(h, ninputs ? ninputs : 1u, 1); - if (!used) compiler_panic(img->c, no_loc(), "link: oom on needed map"); - memset(used, 0, ninputs ? ninputs : 1u); - - /* Mark every DSO that ended up satisfying at least one import. */ - for (i = 0; i < LinkSyms_count(&img->syms); ++i) { - LinkSymbol* s = LinkSyms_at(&img->syms, i); - if (!s->imported) continue; - if (s->dso_input_id == LINK_INPUT_NONE) continue; - if (s->dso_input_id - 1u >= ninputs) continue; - used[s->dso_input_id - 1u] = 1; - } - /* Always pull every explicitly-supplied DSO into DT_NEEDED, even if - * no import landed on it — matches GNU ld without --as-needed. - * Phase 4 doesn't plumb --as-needed through to the resolver, so the - * default "needed" behavior is the right baseline. */ - for (i = 0; i < ninputs; ++i) { - LinkInput* in = LinkInputs_at(&l->inputs, i); - if (in->kind == LINK_INPUT_DSO_BYTES && in->soname != 0) used[i] = 1; - } - for (i = 0; i < ninputs; ++i) - if (used[i]) ++nused; - - dyn->needed = - nused ? (Sym*)h->alloc(h, sizeof(Sym) * nused, _Alignof(Sym)) : NULL; - if (nused && !dyn->needed) - compiler_panic(img->c, no_loc(), "link: oom on needed list"); - dyn->nneeded = 0; - for (i = 0; i < ninputs; ++i) { - LinkInput* in = LinkInputs_at(&l->inputs, i); - if (!used[i]) continue; - if (in->soname == 0) continue; - dyn->needed[dyn->nneeded++] = in->soname; - } - h->free(h, used, ninputs ? ninputs : 1u); -} - -/* ---- dynsym + dynstr build ---- - * - * Slot 0: STN_UNDEF (zero entry). The loader ignores names with index - * 0; we still emit a dynstr entry at offset 0 (the leading NUL). - * - * Slots 1..nimports: imported symbols (functions first, then data). - * st_shndx = SHN_UNDEF; the loader fills in the value at bind time. - * st_value/size are zero — the static linker has no value for an - * imported symbol. - * - * No `--export-dynamic` plumbing in Phase 4: only imports + the null - * slot land in .dynsym. Adding exports is mechanical (walk - * img->globals, append entries with st_shndx = matching .text/.data - * section index) but isn't on the test/musl path. */ - -static void build_dynsym(LinkImage* img, LinkDynState* dyn, - const ImportLists* il, ByteBuf* dynstr) { - Heap* h = img->heap; - u32 nimports = il->nfuncs + il->ndatas; - u32 ndynsym = 1u + nimports; /* +1 for null slot */ - u32 i; - - dyn->ndynsym = ndynsym; - dyn->dynsym = (DynSymRec*)h->alloc(h, sizeof(*dyn->dynsym) * ndynsym, - _Alignof(DynSymRec)); - if (!dyn->dynsym) compiler_panic(img->c, no_loc(), "link: oom on dynsym"); - memset(dyn->dynsym, 0, sizeof(*dyn->dynsym) * ndynsym); - - /* Slot 0: STN_UNDEF. dynstr leads with a NUL so st_name=0 reads as - * the empty string. */ - { - u8 z = 0; - bb_append(dynstr, &z, 1); - } - - /* Per-symbol: dedupe `sym_dynidx` lookup table. Sized to LinkSymId - * upper bound. Clean (zero-filled) by alloc convention; we set - * indices for imports below. */ - dyn->sym_dynidx_size = LinkSyms_count(&img->syms) + 1u; - dyn->sym_dynidx = (u32*)h->alloc( - h, sizeof(*dyn->sym_dynidx) * dyn->sym_dynidx_size, _Alignof(u32)); - if (!dyn->sym_dynidx) - compiler_panic(img->c, no_loc(), "link: oom on sym_dynidx"); - memset(dyn->sym_dynidx, 0, sizeof(*dyn->sym_dynidx) * dyn->sym_dynidx_size); - /* sym_plt_vaddr is populated alongside the PLT body emit below; here - * we only allocate the parallel array. */ - dyn->sym_plt_vaddr = (u64*)h->alloc( - h, sizeof(*dyn->sym_plt_vaddr) * dyn->sym_dynidx_size, _Alignof(u64)); - if (!dyn->sym_plt_vaddr) - compiler_panic(img->c, no_loc(), "link: oom on sym_plt_vaddr"); - memset(dyn->sym_plt_vaddr, 0, - sizeof(*dyn->sym_plt_vaddr) * dyn->sym_dynidx_size); - - /* All imports have STB_GLOBAL so first_global is right after the - * single STN_UNDEF slot. (When local exports land via - * --export-dynamic, this needs to grow.) */ - dyn->first_global = 1u; - - u32 idx = 1u; - for (i = 0; i < il->nfuncs; ++i) { - LinkSymId lsid = il->funcs[i]; - LinkSymbol* s = LinkSyms_at(&img->syms, lsid - 1); - DynSymRec* r = &dyn->dynsym[idx]; - Slice nm_s = pool_slice(img->c->global, s->name); - const char* nm = nm_s.s; - size_t namelen = nm_s.len; - r->st_name = bb_append_str(dynstr, nm, (u32)namelen); - r->st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC); - r->st_other = STV_DEFAULT; - r->st_shndx = SHN_UNDEF; - r->st_value = 0; - r->st_size = 0; - dyn->sym_dynidx[lsid] = idx; - ++idx; - } - for (i = 0; i < il->ndatas; ++i) { - LinkSymId lsid = il->datas[i]; - LinkSymbol* s = LinkSyms_at(&img->syms, lsid - 1); - DynSymRec* r = &dyn->dynsym[idx]; - Slice nm_s = pool_slice(img->c->global, s->name); - const char* nm = nm_s.s; - size_t namelen = nm_s.len; - u8 elf_type = STT_OBJECT; - if (s->kind == SK_TLS) - elf_type = STT_TLS; - else if (s->kind == SK_NOTYPE) - elf_type = STT_NOTYPE; - r->st_name = bb_append_str(dynstr, nm, (u32)namelen); - r->st_info = ELF64_ST_INFO(STB_GLOBAL, elf_type); - r->st_other = STV_DEFAULT; - r->st_shndx = SHN_UNDEF; - r->st_value = 0; - r->st_size = 0; - dyn->sym_dynidx[lsid] = idx; - ++idx; - } -} - -/* ---- .gnu.hash builder ---- - * - * Hashed range is [first_global, ndynsym) — slot 0 (STN_UNDEF) is - * unhashed. Layout matches loader expectations (musl, glibc, FreeBSD). - * - * Bucket count: max(1, hashed_count / 2), rounded up to odd so the - * mod operation distributes more uniformly. Bloom is 1 word for - * Phase 4 — a real implementation would scale with hashed_count, but - * 1 word with shift=6 still satisfies the loader's correctness check - * (any bit set is "maybe present"; false-positives only cost a chain - * scan). */ - -static void build_gnu_hash(Heap* h, LinkImage* img, LinkDynState* dyn, - const ByteBuf* dynstr) { - u32 hashed = (dyn->ndynsym > dyn->first_global) - ? (dyn->ndynsym - dyn->first_global) - : 0u; - u32 nbuckets = hashed ? hashed : 1u; - /* Round nbuckets up to next odd number. */ - if ((nbuckets & 1u) == 0u) nbuckets += 1u; - u32 bloom_size = 1u; /* 64-bit word */ - u32 bloom_shift = 6u; - u32 sym_offset = dyn->first_global; - u32 hdr_bytes = 16u; /* nbuckets/symoff/bloomsz/bloomshift */ - u32 bloom_bytes = bloom_size * 8u; - u32 buckets_bytes = nbuckets * 4u; - u32 chains_bytes = hashed * 4u; - u32 total = hdr_bytes + bloom_bytes + buckets_bytes + chains_bytes; - - u8* buf = (u8*)h->alloc(h, total ? total : 1u, 4); - if (!buf) compiler_panic(img->c, no_loc(), "link: oom on .gnu.hash"); - memset(buf, 0, total); - - wr_u32_le(buf + 0, nbuckets); - wr_u32_le(buf + 4, sym_offset); - wr_u32_le(buf + 8, bloom_size); - wr_u32_le(buf + 12, bloom_shift); - - /* Bloom + buckets + chains. We need each hashed symbol's hash. */ - if (hashed) { - u32 i; - u32* hashes = (u32*)h->alloc(h, sizeof(u32) * hashed, _Alignof(u32)); - if (!hashes) - compiler_panic(img->c, no_loc(), "link: oom on .gnu.hash hashes"); - for (i = 0; i < hashed; ++i) { - const DynSymRec* r = &dyn->dynsym[sym_offset + i]; - const char* name = (const char*)dynstr->data + r->st_name; - size_t n = name ? slice_from_cstr(name).len : 0; - hashes[i] = gnu_hash_name(name, (u32)n); - } - - /* Bloom filter: H[i] / H[i] >> shift */ - u64 bloom = 0; - for (i = 0; i < hashed; ++i) { - u32 h1 = hashes[i] % 64u; - u32 h2 = (hashes[i] >> bloom_shift) % 64u; - bloom |= ((u64)1 << h1) | ((u64)1 << h2); - } - wr_u64_le(buf + hdr_bytes, bloom); - - /* Buckets/chains: for each hashed sym, append to its bucket's - * chain. The chain encodes (hash & ~1) per entry; the LSB is set - * on the LAST entry in a bucket to terminate. Buckets are filled - * with the first chain index that hashes there (1-based into the - * dynsym, i.e. `sym_offset + i`). */ - u32* buckets = (u32*)(buf + hdr_bytes + bloom_bytes); - u32* chains = (u32*)(buf + hdr_bytes + bloom_bytes + buckets_bytes); - /* First pass: bucket = first sym index that hashes there. */ - for (i = 0; i < hashed; ++i) { - u32 b = hashes[i] % nbuckets; - if (buckets[b] == 0) buckets[b] = sym_offset + i; - } - /* Second pass: chain[i] = hash with LSB cleared; LSB set if next - * sym is in a different bucket. Walk symbols in order; LSB on - * chain[i] when sym i+1 is in a different bucket OR is the end. */ - for (i = 0; i < hashed; ++i) { - u32 v = hashes[i] & ~1u; - int last = (i + 1 == hashed) || - ((hashes[i + 1] % nbuckets) != (hashes[i] % nbuckets)); - if (last) v |= 1u; - chains[i] = v; - } - /* Fix bucket→first-sym indices: if multiple syms share a bucket - * but were inserted out of contiguous order, we need them - * contiguous. We assumed contiguity above without enforcing it. - * For Phase 4 with small hashed sets this is fine, but flag the - * shortcut. */ - h->free(h, hashes, sizeof(u32) * hashed); - } - - dyn->gnu_hash = buf; - dyn->gnu_hash_len = total; -} - -/* ---- .dynamic body builder ---- - * - * Computed at layout time so the size is known before segments are - * placed. Each entry is two u64s (d_tag, d_un.d_val|d_un.d_ptr). - * Final entry is DT_NULL. The d_ptr fields that point at other - * synthetic sections are filled with image-relative vaddrs; the emit - * pass adds load-base / IMAGE_BASE only when ET_EXEC. */ - -typedef struct DynEntry { - u64 tag; - u64 val; /* either d_val or d_ptr; emit just writes 8 bytes */ -} DynEntry; - -static u32 count_dynamic_entries(const LinkDynState* dyn) { - /* Required: DT_STRTAB DT_STRSZ DT_SYMTAB DT_SYMENT DT_GNU_HASH - * DT_FLAGS_1 (DF_1_NOW for eager binding) - * DT_NULL terminator - * Optional (only when there are .rela.dyn records): - * DT_RELA DT_RELASZ DT_RELAENT - * Optional (only when there are imported functions / a PLT): - * DT_PLTGOT DT_PLTRELSZ DT_PLTREL DT_JMPREL - * Plus DT_NEEDED per dependency. */ - u32 n = dyn->nneeded; - n += 6; /* 5 fixed + DT_NULL */ - if (dyn->cap_rela_dyn) n += 3; /* DT_RELA + DT_RELASZ + DT_RELAENT */ - if (dyn->nrela_plt) n += 4; /* PLT-only entries */ - return n; -} - -/* ---- main entry ---- */ - -void layout_dyn(Linker* l, LinkImage* img) { - Heap* h = img->heap; - LinkDynState* dyn; - ImportLists imports; - ByteBuf dynstr; - u64 page; - const LinkArchDesc* arch; - - if (!l->emit_pie) return; - - arch = link_arch_desc_for(l->c); - if (!arch) - compiler_panic(img->c, no_loc(), "link: layout_dyn: no arch descriptor"); - - dyn = (LinkDynState*)h->alloc(h, sizeof(*dyn), _Alignof(LinkDynState)); - if (!dyn) compiler_panic(img->c, no_loc(), "link: oom on dyn state"); - memset(dyn, 0, sizeof(*dyn)); - img->dyn = dyn; - img->pie = 1; - - /* PT_INTERP path. Default to the canonical musl loader matching the - * target arch (per-arch table in src/arch/<arch>/link.c) when the caller - * didn't set one. Drivers like cfree-cc always override via - * link_set_interp_path; this default is correctness for direct - * libcfree consumers. glibc users have to set their interp - * explicitly — we don't pick a default for them. */ - dyn->interp_path = - l->interp_path - ? l->interp_path - : pool_intern_slice(l->c->global, slice_from_cstr(arch->default_musl_interp)); - - /* Step 1: enumerate imports + DT_NEEDED. */ - collect_imports(l, img, h, &imports); - collect_needed(l, img, dyn); - - /* Step 2: build .dynstr + .dynsym. .dynstr must also carry the - * DT_NEEDED soname strings the .dynamic body references; intern - * them after the import names so build_dynsym's de-dup also covers - * any name that happens to collide with a soname. */ - bb_init(&dynstr, h); - build_dynsym(img, dyn, &imports, &dynstr); - { - u32 ni; - for (ni = 0; ni < dyn->nneeded; ++ni) { - Slice s_s = pool_slice(l->c->global, dyn->needed[ni]); - const char* s = s_s.s; - size_t slen = s_s.len; - if (s && slen) (void)bb_append_str(&dynstr, s, (u32)slen); - } - } - dyn->dynstr = dynstr.data; - dyn->dynstr_len = dynstr.len; - - /* Step 3: .gnu.hash. */ - build_gnu_hash(h, img, dyn, &dynstr); - - /* Step 4: pre-size all the synthetic sections. - * .interp: strlen + 1 - * .dynsym: 24 * ndynsym - * .dynstr: dynstr_len - * .gnu.hash: gnu_hash_len - * .rela.dyn: 24 * (ndatas + cap_relative) — we reserve 4096 entries - * for RELATIVE; emit fills them. (Quick-and-dirty: the - * static path never has so many internal absolute relocs.) - * .rela.plt: 24 * nfuncs - * .plt: 32 + 16 * nfuncs (PLT0 + per-slot) - * .got.plt: 8 * (3 + nfuncs) - * .dynamic: 16 * count_dynamic_entries - */ - dyn->nplt = imports.nfuncs; - dyn->nrela_plt = imports.nfuncs; - dyn->rela_plt = imports.nfuncs - ? (DynRela*)h->alloc(h, sizeof(DynRela) * imports.nfuncs, - _Alignof(DynRela)) - : NULL; - if (imports.nfuncs && !dyn->rela_plt) - compiler_panic(img->c, no_loc(), "link: oom on rela_plt"); - - /* RELA dyn: GLOB_DAT (one per imported abs-relocated symbol) + - * RELATIVE (one per PIE internal abs reloc against a defined sym). - * Phase 5 emits these dynamically during reloc-apply; pre-count the - * exact total here (img->relocs and the resolve-time `imported` flags - * are already settled by the time layout_dyn runs) so the section - * isn't padded with hundreds of trailing R_*_NONE records. */ - u32 cap_rel = 0; - { - u32 ri; - for (ri = 0; ri < LinkRelocs_count(&img->relocs); ++ri) { - const LinkRelocApply* r = LinkRelocs_at(&img->relocs, ri); - const LinkSymbol* tgt = LinkSyms_at(&img->syms, r->target - 1); - if (r->kind != R_ABS32 && r->kind != R_ABS64) continue; - if (tgt->imported) { - cap_rel++; /* GLOB_DAT */ - } else if (tgt->defined && tgt->kind != SK_ABS) { - cap_rel++; /* RELATIVE */ - } - } - } - dyn->cap_rela_dyn = cap_rel; - dyn->rela_dyn = - dyn->cap_rela_dyn - ? (DynRela*)h->alloc(h, sizeof(DynRela) * dyn->cap_rela_dyn, - _Alignof(DynRela)) - : NULL; - if (dyn->cap_rela_dyn && !dyn->rela_dyn) - compiler_panic(img->c, no_loc(), "link: oom on rela_dyn"); - dyn->nrela_dyn = 0; - - Slice interp_s = pool_slice(l->c->global, dyn->interp_path); - const char* interp_str = interp_s.s; - size_t namelen = interp_s.len; - u64 interp_bytes = (u64)namelen + 1u; - u64 dynsym_bytes = (u64)dyn->ndynsym * ELF64_SYM_SIZE; - u64 dynstr_bytes = (u64)dyn->dynstr_len; - u64 gnuhash_bytes = (u64)dyn->gnu_hash_len; - /* rela.dyn / rela.plt sized for full capacity; emit only writes - * what's populated, but the section's file_size matches capacity - * so PT_LOAD/.rela.dyn shdr sh_size add up. Trailing zero records - * are harmless to the loader (R_AARCH64_NONE). */ - u64 rela_dyn_bytes = (u64)dyn->cap_rela_dyn * ELF64_RELA_SIZE; - u64 rela_plt_bytes = (u64)dyn->nrela_plt * ELF64_RELA_SIZE; - u64 plt_bytes = - (u64)(imports.nfuncs - ? arch->plt0_size + arch->plt_entry_size * imports.nfuncs - : 0u); - u64 gotplt_bytes = (u64)(imports.nfuncs ? 8u * (3u + imports.nfuncs) : 0u); - dyn->ndyn_entries = count_dynamic_entries(dyn); - u64 dynamic_bytes = (u64)dyn->ndyn_entries * ELF64_DYN_SIZE; - - /* Step 5: place segments, page-aligned after the existing image - * span. New segments: - * ro_seg (PF_R) — .interp + .dynsym + .dynstr + .gnu.hash + - * .rela.dyn + .rela.plt - * rx_seg (PF_R+X)— .plt (only when imports.nfuncs > 0) - * rw_seg (PF_R+W)— .got.plt + .dynamic - * - * .dynamic lives in rw_seg because glibc's loader patches DT_* - * d_un.d_ptr fields in-place at startup (elf_get_dynamic_info - * adjusts STRTAB/SYMTAB/etc. by l_addr); a PF_R-only segment - * causes SEGV_ACCERR. musl's loader doesn't do this rewrite, but - * the RW placement is conventional and works for both. - */ - page = 0x4000u; /* keep aligned with layout_page_size default */ - { - /* Read the page size from layout_page_size by re-using the - * configured execmem if present — duplicates the helper rather - * than expose it; the value is only used for alignment. */ - const CfreeExecMem* m = (l && l->jit_host) ? l->jit_host->execmem : NULL; - if (m && m->page_size) page = (u64)m->page_size; - } - - u64 base_vaddr = 0; - u32 i; - for (i = 0; i < img->nsegments; ++i) { - u64 end = img->segments[i].vaddr + img->segments[i].mem_size; - if (end > base_vaddr) base_vaddr = end; - } - base_vaddr = ALIGN_UP(base_vaddr, page); - - /* Pack ro section offsets (relative to ro_seg.vaddr). 8-byte - * alignment for tables; 4-byte for .interp string. */ - u64 off = 0; - u64 interp_off = off; - off = ALIGN_UP(off + interp_bytes, 8u); - u64 dynsym_off = off; - off = ALIGN_UP(off + dynsym_bytes, 8u); - u64 dynstr_off = off; - off = ALIGN_UP(off + dynstr_bytes, 8u); - u64 gnuhash_off = off; - off = ALIGN_UP(off + gnuhash_bytes, 8u); - u64 rela_dyn_off = off; - off = ALIGN_UP(off + rela_dyn_bytes, 8u); - u64 rela_plt_off = off; - off = ALIGN_UP(off + rela_plt_bytes, 8u); - u64 ro_seg_size = off; - - /* When no PLT is needed, suppress the RX/.plt segment entirely. */ - int has_plt = imports.nfuncs > 0; - - /* Pack rw_seg offsets: .got.plt (when has_plt) followed by .dynamic. */ - u64 rw_off = 0; - u64 gotplt_off = rw_off; - if (has_plt) rw_off = ALIGN_UP(rw_off + gotplt_bytes, 8u); - u64 dynamic_off = rw_off; - rw_off = ALIGN_UP(rw_off + dynamic_bytes, 8u); - u64 rw_seg_size = rw_off; - - u64 ro_vaddr = base_vaddr; - u64 rx_vaddr = ALIGN_UP(ro_vaddr + ro_seg_size, page); - u64 rw_vaddr = ALIGN_UP(rx_vaddr + (has_plt ? plt_bytes : 0u), page); - - /* rw_seg always exists (it carries .dynamic). */ - u32 nseg = 2u + (has_plt ? 1u : 0u); - u32 seg_base = dyn_alloc_segments(img, nseg); - u32 ro_seg_idx = seg_base + 0u; - u32 rx_seg_idx = has_plt ? seg_base + 1u : 0u; - u32 rw_seg_idx = seg_base + (has_plt ? 2u : 1u); - - LinkSegment* ro_seg = &img->segments[ro_seg_idx]; - memset(ro_seg, 0, sizeof(*ro_seg)); - ro_seg->id = (LinkSegmentId)(ro_seg_idx + 1u); - ro_seg->flags = SF_ALLOC; /* PF_R */ - ro_seg->file_offset = ro_vaddr; - ro_seg->vaddr = ro_vaddr; - ro_seg->file_size = ro_seg_size; - ro_seg->mem_size = ro_seg_size; - ro_seg->align = (u32)page; - ro_seg->nsections = 6; - img->segment_bytes[ro_seg_idx] = - ro_seg_size ? (u8*)h->alloc(h, (size_t)ro_seg_size, 16) : NULL; - img->segment_bytes_cap[ro_seg_idx] = (size_t)ro_seg_size; - if (ro_seg_size && !img->segment_bytes[ro_seg_idx]) - compiler_panic(img->c, no_loc(), "link: oom on ro dyn segment"); - if (ro_seg_size) - memset(img->segment_bytes[ro_seg_idx], 0, (size_t)ro_seg_size); - - if (has_plt) { - LinkSegment* rx_seg = &img->segments[rx_seg_idx]; - memset(rx_seg, 0, sizeof(*rx_seg)); - rx_seg->id = (LinkSegmentId)(rx_seg_idx + 1u); - rx_seg->flags = SF_ALLOC | SF_EXEC; - rx_seg->file_offset = rx_vaddr; - rx_seg->vaddr = rx_vaddr; - rx_seg->file_size = plt_bytes; - rx_seg->mem_size = plt_bytes; - rx_seg->align = (u32)page; - rx_seg->nsections = 1; - img->segment_bytes[rx_seg_idx] = (u8*)h->alloc(h, (size_t)plt_bytes, 16); - img->segment_bytes_cap[rx_seg_idx] = (size_t)plt_bytes; - if (!img->segment_bytes[rx_seg_idx]) - compiler_panic(img->c, no_loc(), "link: oom on .plt segment"); - memset(img->segment_bytes[rx_seg_idx], 0, (size_t)plt_bytes); - /* Stash plt / got.plt vaddrs now — the PLT body emit just below - * reads them, and the post-shift fixup in shift_image_addresses - * (link_elf.c) keys on these fields too. */ - dyn->plt_vaddr = rx_vaddr; - dyn->plt_size = plt_bytes; - dyn->got_plt_vaddr = rw_vaddr; - dyn->got_plt_size = gotplt_bytes; - /* PLT body emit: the descriptor owns the psABI-specific bytes. */ - if (!arch->emit_plt0 || !arch->emit_plt_entry) - compiler_panic(l->c, no_loc(), "link: PLT emit not configured"); - { - u8* plt_b = img->segment_bytes[rx_seg_idx]; - u32 ki; - arch->emit_plt0(plt_b, dyn->plt_vaddr, dyn->got_plt_vaddr); - for (ki = 0; ki < imports.nfuncs; ++ki) { - u64 entry_vaddr = dyn->plt_vaddr + arch->plt0_size + - (u64)arch->plt_entry_size * (u64)ki; - u64 slot_vaddr = dyn->got_plt_vaddr + 8u * (3u + ki); - u8* p = - plt_b + arch->plt0_size + (size_t)arch->plt_entry_size * (size_t)ki; - arch->emit_plt_entry(p, entry_vaddr, slot_vaddr); - } - } - } - /* rw_seg always exists — it carries .dynamic, plus .got.plt when - * imports are present. */ - { - LinkSegment* rw_seg = &img->segments[rw_seg_idx]; - memset(rw_seg, 0, sizeof(*rw_seg)); - rw_seg->id = (LinkSegmentId)(rw_seg_idx + 1u); - rw_seg->flags = SF_ALLOC | SF_WRITE; - rw_seg->file_offset = rw_vaddr; - rw_seg->vaddr = rw_vaddr; - rw_seg->file_size = rw_seg_size; - rw_seg->mem_size = rw_seg_size; - rw_seg->align = (u32)page; - rw_seg->nsections = has_plt ? 2u : 1u; - img->segment_bytes[rw_seg_idx] = (u8*)h->alloc(h, (size_t)rw_seg_size, 16); - img->segment_bytes_cap[rw_seg_idx] = (size_t)rw_seg_size; - if (!img->segment_bytes[rw_seg_idx]) - compiler_panic(img->c, no_loc(), "link: oom on rw dyn segment"); - /* Zero-initialize. .got.plt[0] (&.dynamic) is filled later, after - * shift_image_addresses has bumped dyn->dynamic_vaddr. .dynamic - * body is built post-shift in link_emit_elf. Loader - * patches all .got.plt slots from .rela.plt before user code - * under DF_1_NOW. */ - memset(img->segment_bytes[rw_seg_idx], 0, (size_t)rw_seg_size); - } - img->nsegments += nseg; - - /* Step 6: synthetic LinkSection entries. Order in img->sections - * matches the loader-friendly file order and feeds emit's - * outshdr-merge pass. */ - u32 nsec = 7u + (has_plt ? 2u : 0u); - u32 sec_base = dyn_alloc_sections(img, nsec); - - /* helper: populate a fresh LinkSection for a segment-internal range */ - /* Inline because the args differ enough (sem, name) per slot. */ - Sym name_interp = pool_intern_slice(l->c->global, SLICE_LIT(".interp")); - Sym name_dynsym = pool_intern_slice(l->c->global, SLICE_LIT(".dynsym")); - Sym name_dynstr = pool_intern_slice(l->c->global, SLICE_LIT(".dynstr")); - Sym name_gnu_hash = pool_intern_slice(l->c->global, SLICE_LIT(".gnu.hash")); - Sym name_rela_dyn = pool_intern_slice(l->c->global, SLICE_LIT(".rela.dyn")); - Sym name_rela_plt = pool_intern_slice(l->c->global, SLICE_LIT(".rela.plt")); - Sym name_dynamic = pool_intern_slice(l->c->global, SLICE_LIT(".dynamic")); - Sym name_plt = pool_intern_slice(l->c->global, SLICE_LIT(".plt")); - Sym name_got_plt = pool_intern_slice(l->c->global, SLICE_LIT(".got.plt")); - -#define INIT_SEC(IDX, NAME, SEG_IDX, OFF_IN_SEG, SIZE, ALIGN, FLAGS, SEM) \ - do { \ - LinkSection* ls = &img->sections[sec_base + (IDX)]; \ - memset(ls, 0, sizeof(*ls)); \ - ls->id = (LinkSectionId)(sec_base + (IDX) + 1u); \ - ls->input_id = LINK_INPUT_NONE; \ - ls->obj_section_id = OBJ_SEC_NONE; \ - ls->segment_id = img->segments[(SEG_IDX)].id; \ - ls->input_offset = (OFF_IN_SEG); \ - ls->file_offset = img->segments[(SEG_IDX)].file_offset + (OFF_IN_SEG); \ - ls->vaddr = img->segments[(SEG_IDX)].vaddr + (OFF_IN_SEG); \ - ls->size = (SIZE); \ - ls->flags = (FLAGS); \ - ls->align = (ALIGN); \ - ls->name = (NAME); \ - ls->sem = (SEM); \ - } while (0) - - INIT_SEC(0, name_interp, ro_seg_idx, interp_off, interp_bytes, 1, SF_ALLOC, - SSEM_PROGBITS); - INIT_SEC(1, name_dynsym, ro_seg_idx, dynsym_off, dynsym_bytes, 8, SF_ALLOC, - SSEM_PROGBITS); - INIT_SEC(2, name_dynstr, ro_seg_idx, dynstr_off, dynstr_bytes, 1, SF_ALLOC, - SSEM_PROGBITS); - INIT_SEC(3, name_gnu_hash, ro_seg_idx, gnuhash_off, gnuhash_bytes, 8, - SF_ALLOC, SSEM_PROGBITS); - INIT_SEC(4, name_rela_dyn, ro_seg_idx, rela_dyn_off, rela_dyn_bytes, 8, - SF_ALLOC, SSEM_PROGBITS); - INIT_SEC(5, name_rela_plt, ro_seg_idx, rela_plt_off, rela_plt_bytes, 8, - SF_ALLOC, SSEM_PROGBITS); - INIT_SEC(6, name_dynamic, rw_seg_idx, dynamic_off, dynamic_bytes, 8, - SF_ALLOC | SF_WRITE, SSEM_PROGBITS); - - dyn->sec_interp = (LinkSectionId)(sec_base + 0 + 1u); - dyn->sec_dynsym = (LinkSectionId)(sec_base + 1 + 1u); - dyn->sec_dynstr = (LinkSectionId)(sec_base + 2 + 1u); - dyn->sec_gnu_hash = (LinkSectionId)(sec_base + 3 + 1u); - dyn->sec_rela_dyn = (LinkSectionId)(sec_base + 4 + 1u); - dyn->sec_rela_plt = (LinkSectionId)(sec_base + 5 + 1u); - dyn->sec_dynamic = (LinkSectionId)(sec_base + 6 + 1u); - dyn->dynamic_vaddr = img->segments[rw_seg_idx].vaddr + dynamic_off; - dyn->dynamic_size = dynamic_bytes; - - if (has_plt) { - INIT_SEC(7, name_plt, rx_seg_idx, 0, plt_bytes, 16, SF_ALLOC | SF_EXEC, - SSEM_PROGBITS); - INIT_SEC(8, name_got_plt, rw_seg_idx, gotplt_off, gotplt_bytes, 8, - SF_ALLOC | SF_WRITE, SSEM_PROGBITS); - dyn->sec_plt = (LinkSectionId)(sec_base + 7 + 1u); - dyn->sec_got_plt = (LinkSectionId)(sec_base + 8 + 1u); - } -#undef INIT_SEC - - img->nsections += nsec; - - /* Step 7: copy .interp / .dynsym / .dynstr / .gnu.hash bytes into - * the ro segment. .dynamic body is built during emit (it embeds - * runtime vaddrs that PIE keeps image-relative; emit just reads - * the section ids' final vaddrs). */ - u8* ro_bytes = img->segment_bytes[ro_seg_idx]; - - /* .interp */ - if (interp_bytes && ro_bytes) - memcpy(ro_bytes + interp_off, interp_str, (size_t)interp_bytes); - - /* .dynsym: serialize DynSymRec to ELF64 wire layout. */ - { - u32 si; - for (si = 0; si < dyn->ndynsym; ++si) { - u8* p = ro_bytes + dynsym_off + (u64)si * ELF64_SYM_SIZE; - const DynSymRec* r = &dyn->dynsym[si]; - wr_u32_le(p + 0, r->st_name); - p[4] = r->st_info; - p[5] = r->st_other; - wr_u16_le(p + 6, r->st_shndx); - wr_u64_le(p + 8, r->st_value); - wr_u64_le(p + 16, r->st_size); - } - } - - /* .dynstr */ - if (dynstr_bytes && ro_bytes && dyn->dynstr) - memcpy(ro_bytes + dynstr_off, dyn->dynstr, dyn->dynstr_len); - - /* .gnu.hash */ - if (gnuhash_bytes && ro_bytes && dyn->gnu_hash) - memcpy(ro_bytes + gnuhash_off, dyn->gnu_hash, dyn->gnu_hash_len); - - /* .rela.plt: emit JUMP_SLOT records, one per imported function, and - * stash each import's PLT-entry vaddr in `sym_plt_vaddr` so the - * apply pass can redirect CALL26/JUMP26 against the import. The - * record's r_offset addresses the .got.plt slot the PLT stub reads - * through; the loader patches that slot to the resolved runtime - * address before user code runs (DF_1_NOW, BIND_NOW). Bytes are - * written here at pre-shift vaddrs; link_emit re-serializes them - * after shift_image_addresses bumps the dyn vaddrs by headers_load. */ - { - u32 ki; - for (ki = 0; ki < imports.nfuncs; ++ki) { - LinkSymId lsid = imports.funcs[ki]; - u32 dynidx = dyn->sym_dynidx[lsid]; - u64 slot_vaddr = dyn->got_plt_vaddr + 8u * (3u + ki); - u64 plt_entry_vaddr = dyn->plt_vaddr + arch->plt0_size + - (u64)arch->plt_entry_size * (u64)ki; - DynRela* r = &dyn->rela_plt[ki]; - r->r_offset = slot_vaddr; - r->r_info = ELF64_R_INFO((u64)dynidx, arch->elf_r_jump_slot); - r->r_addend = 0; - /* Serialize into segment bytes (will be re-serialized post-shift). */ - u8* p = ro_bytes + rela_plt_off + (u64)ki * ELF64_RELA_SIZE; - wr_u64_le(p + 0, r->r_offset); - wr_u64_le(p + 8, r->r_info); - wr_u64_le(p + 16, (u64)r->r_addend); - /* sym_plt_vaddr is consulted by apply_all_relocs. */ - dyn->sym_plt_vaddr[lsid] = plt_entry_vaddr; - } - } - - /* .rela.dyn entries (GLOB_DAT for imports referenced via .got, and - * RELATIVE for PIE internal abs fixups) are emitted by - * apply_all_relocs as it walks every relocation. layout_dyn - * leaves .rela.dyn empty here; the bytes are written post-shift in - * link_emit_elf. */ - - /* .got.plt prelude: for BIND_NOW we leave the body zero — the - * loader patches every slot from .rela.plt before user code. Some - * loaders still inspect slot 0 (&.dynamic) at startup; provide it - * so glibc-style loaders don't fault. The loader writes the link_map - * cookie into slot 1 at load time. */ - if (has_plt) { - u8* gp_bytes = img->segment_bytes[rw_seg_idx]; - if (gp_bytes && gotplt_bytes >= 8u) { - wr_u64_le(gp_bytes, dyn->dynamic_vaddr); - /* Slots 1, 2, and per-PLT slots stay zero until the loader - * fills them. Phase 5 would prefill the per-PLT slots with - * the address of PLT0 to support lazy binding. */ - } - } - - /* The .dynamic body is built later, after segment shifts are - * applied during emit (link_elf.c). emit_dynamic_body takes the - * post-shift vaddrs of every other dyn section and writes one - * DT_* entry per index. */ - - /* Synthesize linker-defined symbols that reference the .dynamic - * vaddr. Scrt1.o on Linux loads `_DYNAMIC` via ADRP+ADD, and - * libc_nonshared.a's atexit shim takes `__dso_handle` as the - * per-image identity (we use the .dynamic vaddr — any stable - * per-image address satisfies the contract since the shim only - * passes it through to __cxa_atexit, which the program-side glibc - * just stashes). */ - link_define_boundary(l, img, "_DYNAMIC", dyn->dynamic_vaddr); - link_define_boundary(l, img, "__dso_handle", dyn->dynamic_vaddr); - - free_imports(h, &imports); -} - -/* ---- cleanup ---- */ - -void link_dyn_state_free(LinkImage* img) { - Heap* h = img->heap; - LinkDynState* dyn = img->dyn; - if (!dyn) return; - if (dyn->dynsym) h->free(h, dyn->dynsym, sizeof(*dyn->dynsym) * dyn->ndynsym); - if (dyn->dynstr) h->free(h, dyn->dynstr, dyn->dynstr_len); - if (dyn->gnu_hash) h->free(h, dyn->gnu_hash, dyn->gnu_hash_len); - if (dyn->rela_dyn) - h->free(h, dyn->rela_dyn, sizeof(*dyn->rela_dyn) * dyn->cap_rela_dyn); - if (dyn->rela_plt) - h->free(h, dyn->rela_plt, sizeof(*dyn->rela_plt) * dyn->nrela_plt); - if (dyn->needed) h->free(h, dyn->needed, sizeof(*dyn->needed) * dyn->nneeded); - if (dyn->sym_dynidx) - h->free(h, dyn->sym_dynidx, - sizeof(*dyn->sym_dynidx) * dyn->sym_dynidx_size); - if (dyn->sym_plt_vaddr) - h->free(h, dyn->sym_plt_vaddr, - sizeof(*dyn->sym_plt_vaddr) * dyn->sym_dynidx_size); - h->free(h, dyn, sizeof(*dyn)); - img->dyn = NULL; -} diff --git a/src/link/link_elf.c b/src/link/link_elf.c @@ -1,1417 +0,0 @@ -/* link_emit_elf: write a static ET_EXEC ELF64 image to the - * caller-provided Writer. - * - * 64-bit little-endian only. The per-arch ELF reloc-type tables in - * obj/elf_reloc_<arch>.c handle RelocKind <-> ELF translation; this - * file gets e_machine from the link arch descriptor. - * - * File layout (in write order): - * - * [headers PT_LOAD, PF_R, mapped at IMAGE_BASE] - * Ehdr64 - * Phdr64[nphdr] -- one per loaded segment + headers + - * PT_NOTE .note.gnu.build-id -- 12 + 16 = 28 bytes - * (deterministic 16-byte id) pad to PAGE - * - * [PT_LOAD per kept image segment, in img->segments order] - * segment bytes (padded to its file_offset) - * - * [non-allocatable sections, file-only] - * .symtab -- ELF64_SYM_SIZE * nsyms - * .strtab -- NUL-led blob - * .shstrtab -- NUL-led blob - * - * [section header table at e_shoff] - * Shdr64[nshdr] - * - * Section header schema (for nm / objdump -t / gdb consumption): - * - * 0 SHN_UNDEF (zero entry) - * N one shdr per loaded sub-region: .text/.rodata/.data/.bss as - * the corresponding RX/R/RW segments materialize (.bss split - * out as the trailing memsz>filesz tail of the RW segment). - * 1 .note.gnu.build-id (allocatable, in headers PT_LOAD) - * 1 .symtab (sh_link -> .strtab; sh_info = first non-local idx) - * 1 .strtab - * 1 .shstrtab (Ehdr64.e_shstrndx) - * - * Build-id is computed deterministically over the post-relocation - * segment bytes (FNV-1a 64 over each segment, mixed into a 128-bit - * accumulator). The 16-byte digest is written into the note before the - * note is emitted to the Writer. - * - * The image image-relative addresses on entry are bumped by - * align_up(headers_size, PAGE) before relocs are applied, exactly as - * before — segment bytes / symbol vaddrs land at their final IMAGE_BASE - * absolute addresses by the time relocs run. */ - -#include <string.h> - -#include "core/heap.h" -#include "core/pool.h" -#include "core/slice.h" -#include "core/util.h" -#include "core/vec.h" -#include "link/link.h" -#include "link/link_arch.h" -#include "link/link_internal.h" -#include "obj/elf.h" - -static SrcLoc no_loc(void) { - SrcLoc l = {0, 0, 0}; - return l; -} - -/* ---- ELF64 wire structs (subset) ---- */ - -#define EI_NIDENT 16 - -typedef struct __attribute__((packed)) Ehdr64 { - u8 e_ident[EI_NIDENT]; - u16 e_type; - u16 e_machine; - u32 e_version; - u64 e_entry; - u64 e_phoff; - u64 e_shoff; - u32 e_flags; - u16 e_ehsize; - u16 e_phentsize; - u16 e_phnum; - u16 e_shentsize; - u16 e_shnum; - u16 e_shstrndx; -} Ehdr64; - -typedef struct __attribute__((packed)) Phdr64 { - u32 p_type; - u32 p_flags; - u64 p_offset; - u64 p_vaddr; - u64 p_paddr; - u64 p_filesz; - u64 p_memsz; - u64 p_align; -} Phdr64; - -typedef struct __attribute__((packed)) Shdr64 { - u32 sh_name; - u32 sh_type; - u64 sh_flags; - u64 sh_addr; - u64 sh_offset; - u64 sh_size; - u32 sh_link; - u32 sh_info; - u64 sh_addralign; - u64 sh_entsize; -} Shdr64; - -#define PT_NOTE 4 -#define PT_TLS 7 - -/* Static ET_EXEC base. ET_DYN (PIE) uses 0 — the loader picks the - * runtime base. The active value lives in `img_base` below; the macro - * stays for the static path's hard-coded vaddrs. */ -#define IMAGE_BASE_STATIC 0x400000ULL - -#define BUILD_ID_DESC_LEN 16u -#define NOTE_NAME_GNU "GNU" -#define NOTE_NAME_GNU_LEN 4u /* "GNU\0" */ -#define NOTE_BUILD_ID_TYPE 3u -#define BUILD_ID_NOTE_BYTES (12u + NOTE_NAME_GNU_LEN + BUILD_ID_DESC_LEN) - -/* ---- byte writer helpers ---- */ - -static void write_bytes(Writer* w, const void* data, size_t n) { - w->write(w, data, n); -} - -static void write_zeroes(Writer* w, size_t n) { - static const u8 zeroes[256] = {0}; - while (n) { - size_t step = n > sizeof(zeroes) ? sizeof(zeroes) : n; - w->write(w, zeroes, step); - n -= step; - } -} - -static u32 perms_to_pflags(u32 secflags) { - u32 f = PF_R; - if (secflags & SF_EXEC) f |= PF_X; - if (secflags & SF_WRITE) f |= PF_W; - return f; -} - -/* Scripted-layout post-pass: vaddrs are already final (the script - * pinned them via `. = …`), so only file offsets need to bump to - * leave room for ehdr+phdrs. Mirror of shift_image_addresses but - * touches only the file dimension. */ -static void shift_image_file_offsets(LinkImage* img, u64 delta) { - u32 i; - for (i = 0; i < img->nsegments; ++i) img->segments[i].file_offset += delta; - for (i = 0; i < img->nsections; ++i) img->sections[i].file_offset += delta; - for (i = 0; i < LinkRelocs_count(&img->relocs); ++i) - LinkRelocs_at(&img->relocs, i)->write_file_offset += delta; -} - -static void shift_image_addresses(LinkImage* img, u64 delta) { - u32 i; - for (i = 0; i < img->nsegments; ++i) { - img->segments[i].file_offset += delta; - img->segments[i].vaddr += delta; - } - for (i = 0; i < img->nsections; ++i) { - img->sections[i].file_offset += delta; - img->sections[i].vaddr += delta; - } - for (i = 0; i < LinkRelocs_count(&img->relocs); ++i) { - LinkRelocs_at(&img->relocs, i)->write_file_offset += delta; - LinkRelocs_at(&img->relocs, i)->write_vaddr += delta; - } - for (i = 0; i < LinkSyms_count(&img->syms); ++i) { - LinkSymbol* s = LinkSyms_at(&img->syms, i); - if (s->kind == SK_ABS) continue; - if (!s->defined) continue; - s->vaddr += delta; - } - /* tls_vaddr lives in the same image-relative coordinate system as - * the segments it tracks, so it bumps with them. */ - if (img->tls_memsz) img->tls_vaddr += delta; - /* Dyn-link state mirrors a few segment / section vaddrs and pre- - * populated DynRela.r_offset values from layout_dyn. Bump them so - * the post-shift .rela.plt / .dynamic emit and apply_all_relocs see - * the right addresses (sym_plt_vaddr is read to redirect CALL26 - * against imports). */ - if (img->dyn) { - LinkDynState* dyn = img->dyn; - if (dyn->plt_vaddr) dyn->plt_vaddr += delta; - if (dyn->got_plt_vaddr) dyn->got_plt_vaddr += delta; - if (dyn->dynamic_vaddr) dyn->dynamic_vaddr += delta; - if (dyn->sym_plt_vaddr) { - u32 j; - for (j = 0; j < dyn->sym_dynidx_size; ++j) - if (dyn->sym_plt_vaddr[j]) dyn->sym_plt_vaddr[j] += delta; - } - if (dyn->rela_plt) { - u32 j; - for (j = 0; j < dyn->nrela_plt; ++j) dyn->rela_plt[j].r_offset += delta; - } - /* rela_dyn is populated by apply_all_relocs (which runs after this - * shift), so its records are already in post-shift coordinates. */ - } -} - -/* AArch64 ELF ABI: the per-thread TLS block starts at TP + 16 bytes - * (the TCB sits ahead of the TLS image). RISC-V psABI normally points - * tp at the start of the TLS image; the cfree harness's start.c - * places a 16-byte TCB ahead of .tdata and biases tp accordingly, so - * the TPREL offset for both arches is (target - tls_vaddr) + 16. */ -#define TLS_TCB_SIZE 16ull - -static int reloc_is_tlsle(RelocKind k) { - return k == R_AARCH64_TLSLE_ADD_TPREL_HI12 || - k == R_AARCH64_TLSLE_ADD_TPREL_LO12_NC || k == R_RV_TPREL_HI20 || - k == R_RV_TPREL_LO12_I || k == R_RV_TPREL_LO12_S; -} - -/* x86_64 SysV ABI: TLS variant II — the per-thread TLS image sits at - * *negative* offsets from %fs (which points at the TCB). start.c - * lays out [tdata | tbss | TCB] and arch_prctl(ARCH_SET_FS, &TCB), so - * a symbol at offset X within the TLS image is at fs-relative offset - * (X - tls_memsz). The two ELF reloc kinds R_X86_64_TPOFF32/_TPOFF64 - * encode that signed offset directly at the reloc site (no TCB bias — - * variant II's TCB sits *after* the image, so TPOFF is negative). */ -static int reloc_is_x64_tlsle(RelocKind k) { - return k == R_X64_TPOFF32 || k == R_X64_TPOFF64; -} - -static int reloc_is_abs(RelocKind k) { return k == R_ABS32 || k == R_ABS64; } - -/* Function-call relocs that may route through the PLT when the target - * is imported. aarch64 CALL26/JUMP26, x86_64 PLT32, and risc-v CALL_PLT - * (which cfree maps to R_PLT32) all carry the "call this address; if - * it's not resolvable here use the PLT trampoline" contract; the apply - * pass overwrites S with the PLT entry vaddr in that case. */ -static int reloc_is_branch26(RelocKind k) { - return k == R_AARCH64_CALL26 || k == R_AARCH64_JUMP26 || k == R_X64_PLT32 || - k == R_PLT32 || k == R_RV_CALL; -} - -static void emit_dyn_record(LinkImage* img, u64 site_vaddr, u32 reloc_type, - u32 dynidx, i64 addend) { - LinkDynState* dyn = img->dyn; - if (!dyn || !dyn->rela_dyn) return; - if (dyn->nrela_dyn >= dyn->cap_rela_dyn) { - compiler_panic(img->c, no_loc(), - "link: too many .rela.dyn records (%u >= %u); raise " - "cap_rela_dyn in layout_dyn", - dyn->nrela_dyn, dyn->cap_rela_dyn); - } - DynRela* r = &dyn->rela_dyn[dyn->nrela_dyn++]; - r->r_offset = site_vaddr; - r->r_info = ELF64_R_INFO((u64)dynidx, reloc_type); - r->r_addend = addend; -} - -static const LinkArchDesc* elf_arch_or_panic(Compiler* c, const char* where) { - const LinkArchDesc* arch = link_arch_desc_for(c); - if (!arch || !arch->e_machine) - compiler_panic(c, no_loc(), "%.*s: no ELF arch descriptor", - SLICE_ARG(slice_from_cstr(where))); - return arch; -} - -static void emit_relative_record(LinkImage* img, u64 site_vaddr, u64 addend) { - const LinkArchDesc* arch = elf_arch_or_panic(img->c, "link"); - emit_dyn_record(img, site_vaddr, arch->elf_r_relative, 0, (i64)addend); -} - -static void emit_globdat_record(LinkImage* img, u64 site_vaddr, u32 dynidx, - i64 addend) { - const LinkArchDesc* arch = elf_arch_or_panic(img->c, "link"); - emit_dyn_record(img, site_vaddr, arch->elf_r_glob_dat, dynidx, addend); -} - -/* RISC-V PCREL_LO12_* references the address of an AUIPC carrying the - * paired PCREL_HI20. Given the AUIPC's site vaddr (post-shift), find - * its PCREL_HI20 reloc and compute the displacement that AUIPC - * encoded — the LO12 then takes the low 12 bits of the same disp. - * - * Linear scan over img->relocs is fine in practice: kernel images and - * cg cases produce at most a few hundred relocs total. */ -static i64 rv_pcrel_lo12_disp(LinkImage* img, u64 auipc_vaddr, u64 img_base) { - u32 i; - for (i = 0; i < LinkRelocs_count(&img->relocs); ++i) { - const LinkRelocApply* hi = LinkRelocs_at(&img->relocs, i); - const LinkSymbol* hi_tgt; - u64 hi_S, hi_P; - if (hi->kind != R_RV_PCREL_HI20 && hi->kind != R_RV_GOT_HI20) continue; - if (hi->write_vaddr + img_base != auipc_vaddr) continue; - hi_tgt = LinkSyms_at(&img->syms, hi->target - 1); - hi_S = (hi_tgt->kind == SK_ABS) ? hi_tgt->vaddr : hi_tgt->vaddr + img_base; - hi_P = hi->write_vaddr + img_base; - return (i64)hi_S + hi->addend - (i64)hi_P; - } - compiler_panic(img->c, no_loc(), - "link: PCREL_LO12 at 0x%llx has no paired PCREL_HI20", - (unsigned long long)auipc_vaddr); - return 0; -} - -static void apply_all_relocs(LinkImage* img, u64 img_base) { - u32 i; - int pie = img->pie; - for (i = 0; i < LinkRelocs_count(&img->relocs); ++i) { - LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); - const LinkSymbol* tgt = LinkSyms_at(&img->syms, r->target - 1); - const LinkSection* sec = &img->sections[r->link_section_id - 1]; - const LinkSegment* seg = &img->segments[sec->segment_id - 1]; - u64 S, P; - u8* P_bytes; - if (reloc_is_tlsle(r->kind)) { - /* S is the target's TP-relative offset: distance from the - * TLS image start plus the 16-byte TCB. Both vaddrs are - * in the same (post-shift, image-relative) coordinate - * system, so img_base cancels out. */ - S = (tgt->vaddr - img->tls_vaddr) + TLS_TCB_SIZE; - } else if (reloc_is_x64_tlsle(r->kind)) { - /* x86_64 variant II: TP points just past the TLS image, so a - * symbol at offset X within the image is at TP-relative offset - * (X - tls_memsz). Cast through i64/u64 so the reloc apply - * writes the full 32- or 64-bit signed value. */ - i64 off = (i64)(tgt->vaddr - img->tls_vaddr) - (i64)img->tls_memsz; - S = (u64)off; - } else if (r->kind == R_RV_PCREL_LO12_I || r->kind == R_RV_PCREL_LO12_S) { - /* PCREL_LO12: rewrite S so that link_reloc_apply's existing - * LO12_I/LO12_S encoder produces the right low 12 bits of the - * paired AUIPC's PC-relative displacement. The reloc's own - * addend is unused; signed lo12 = disp & 0xfff. */ - P = r->write_vaddr + img_base; - P_bytes = img->segment_bytes[seg->id - 1] + - (size_t)(r->write_file_offset - seg->file_offset); - { - i64 disp = rv_pcrel_lo12_disp(img, tgt->vaddr + img_base, img_base); - RelocKind alias = - (r->kind == R_RV_PCREL_LO12_I) ? R_RV_LO12_I : R_RV_LO12_S; - link_reloc_apply(img->c, alias, P_bytes, (u64)disp, 0, P); - } - continue; - } else { - S = tgt->vaddr + img_base; - if (tgt->kind == SK_ABS) S = tgt->vaddr; - } - P = r->write_vaddr + img_base; - P_bytes = img->segment_bytes[seg->id - 1] + - (size_t)(r->write_file_offset - seg->file_offset); - - /* Imported target: redirect / rewrite per reloc kind (Phase 5). - * - * - CALL26 / JUMP26: target the import's PLT entry. The PLT stub - * reads .got.plt[3+i], which the loader pre-fills via JUMP_SLOT - * (.rela.plt). S becomes the PLT-entry vaddr; the existing - * apply path computes the disp from there. - * - R_ABS{32,64}: leave the patch site at zero and emit a - * GLOB_DAT record so the loader writes the resolved address - * into the site at load time. This covers both - * layout_got-emitted .got slot fills (target = import) and any - * direct absolute reference in user data (e.g. a function - * pointer initializer). - * - GOT-page / LO12-NC against an import: emit_reloc_records has - * already redirected the target from the import to the - * synthetic .got slot symbol, so the apply path here sees the - * slot, not the import — nothing special needed; the slot's - * own R_ABS64 fill against the (vaddr=0) import will trip the - * abs-import branch above and emit GLOB_DAT. - * - * Anything else against an imported symbol (e.g. PREL19 / ADR - * etc.) is rare in real binaries and would need its own - * dynamic-reloc kind; panic loudly so a future test that needs - * it announces itself. */ - if (tgt->imported) { - /* `tgt` may be a per-input shadow LinkSymbol — resolve_undefs - * stamps `imported = 1` on every undef matched by name, but - * collect_imports only stashes plt_vaddr / dynidx on the - * canonical entry registered in img->globals. Resolve to the - * canonical id before indexing the dyn-state arrays. */ - LinkSymId canon_id = tgt->id; - if (tgt->name != 0) { - LinkSymId hit = symhash_get(&img->globals, tgt->name); - if (hit != LINK_SYM_NONE) canon_id = hit; - } - u32 dynidx = (img->dyn && canon_id < img->dyn->sym_dynidx_size) - ? img->dyn->sym_dynidx[canon_id] - : 0u; - if (reloc_is_branch26(r->kind)) { - u64 plt_v = (img->dyn && canon_id < img->dyn->sym_dynidx_size) - ? img->dyn->sym_plt_vaddr[canon_id] - : 0u; - if (plt_v == 0) - compiler_panic(img->c, no_loc(), - "link: imported sym has no PLT entry (CALL26)"); - S = plt_v + img_base; - link_reloc_apply(img->c, r->kind, P_bytes, S, r->addend, P); - continue; - } - if (reloc_is_abs(r->kind)) { - if (dynidx == 0) - compiler_panic(img->c, no_loc(), - "link: imported sym has no .dynsym entry"); - emit_globdat_record(img, r->write_vaddr, dynidx, r->addend); - /* Site bytes are irrelevant: the loader's GLOB_DAT writes - * (sym_value + r_addend) into r_offset before user code runs, - * overwriting whatever's there. Leaving the existing zero - * fill saves a write. */ - continue; - } - { - Slice nm_s = tgt->name ? pool_slice(img->c->global, tgt->name) - : SLICE_NULL; - const char* nm = nm_s.s ? nm_s.s : ""; - size_t nl = nm_s.len; - compiler_panic( - img->c, no_loc(), - "link: unhandled reloc kind %u against imported symbol '%.*s'", - (unsigned)r->kind, (int)nl, nm); - } - } - - /* PIE: an absolute reloc against a defined non-imported symbol - * stays image-relative in the file (the loader adds load-base via - * a synthesized R_AARCH64_RELATIVE). img_base is 0 for PIE so - * S above is already image-relative — the apply writes that into - * the site, and the RELATIVE record tells the loader to add - * load_base on top. */ - if (pie && reloc_is_abs(r->kind) && tgt->defined && tgt->kind != SK_ABS) { - emit_relative_record(img, r->write_vaddr, tgt->vaddr); - } - link_reloc_apply(img->c, r->kind, P_bytes, S, r->addend, P); - } -} - -/* The build-id payload is a format-agnostic image identity hash — - * see link_image_id_compute in link_image_id.c. Mach-O wraps the - * same bytes in LC_UUID; ELF wraps them in a .note.gnu.build-id. */ - -/* ---- string-table builder ---- */ - -typedef struct StrBuilder { - Heap* heap; - u8* data; - u32 len; - u32 cap; -} StrBuilder; - -static void strb_init(StrBuilder* s, Heap* h, u32 reserve) { - s->heap = h; - s->cap = reserve > 16u ? reserve : 16u; - s->data = (u8*)h->alloc(h, s->cap, 1); - if (!s->data) s->cap = 0; - s->len = 0; - if (s->cap) { - s->data[0] = 0; - s->len = 1; - } /* leading NUL */ -} - -static void strb_fini(StrBuilder* s) { - if (s->data) s->heap->free(s->heap, s->data, s->cap); - s->data = NULL; - s->cap = s->len = 0; -} - -static void strb_grow(StrBuilder* s, u32 need) { - (void)VEC_GROW(s->heap, s->data, s->cap, need); -} - -static u32 strb_add(StrBuilder* s, const char* str, u32 slen) { - u32 off; - if (slen == 0) return 0; - /* Linear dedup: scan existing data for a matching NUL-terminated - * substring. Strtabs are small enough to make this acceptable. */ - if (s->len > slen) { - u32 i; - for (i = 0; i + slen < s->len; ++i) { - if (s->data[i + slen] == 0 && memcmp(s->data + i, str, slen) == 0) - return i; - } - } - off = s->len; - strb_grow(s, s->len + slen + 1u); - memcpy(s->data + s->len, str, slen); - s->data[s->len + slen] = 0; - s->len += slen + 1u; - return off; -} - -static u32 strb_add_cstr(StrBuilder* s, const char* str) { - return strb_add(s, str, (u32)slice_from_cstr(str).len); -} - -/* ---- symtab builder ---- */ - -typedef struct SymRec { - u32 st_name; - u8 st_info; - u8 st_other; - u16 st_shndx; - u64 st_value; - u64 st_size; -} SymRec; - -static u8 sym_kind_to_st_type(u8 kind) { - switch (kind) { - case SK_FUNC: - return STT_FUNC; - case SK_OBJ: - return STT_OBJECT; - case SK_SECTION: - return STT_SECTION; - case SK_FILE: - return STT_FILE; - case SK_TLS: - return STT_TLS; - case SK_IFUNC: - return STT_GNU_IFUNC; - case SK_NOTYPE: - case SK_ABS: - case SK_UNDEF: - default: - return STT_NOTYPE; - } -} - -static u8 sym_bind_to_st_bind(u8 bind) { - switch (bind) { - case SB_GLOBAL: - return STB_GLOBAL; - case SB_WEAK: - return STB_WEAK; - case SB_LOCAL: - default: - return STB_LOCAL; - } -} - -/* Produces one Elf64_Sym record on the wire from a SymRec. */ -static void write_sym_rec(Writer* w, const SymRec* r) { - u8 buf[ELF64_SYM_SIZE]; - buf[0] = (u8)(r->st_name); - buf[1] = (u8)(r->st_name >> 8); - buf[2] = (u8)(r->st_name >> 16); - buf[3] = (u8)(r->st_name >> 24); - buf[4] = r->st_info; - buf[5] = r->st_other; - buf[6] = (u8)(r->st_shndx); - buf[7] = (u8)(r->st_shndx >> 8); - { - u32 i; - for (i = 0; i < 8; ++i) buf[8 + i] = (u8)(r->st_value >> (i * 8)); - for (i = 0; i < 8; ++i) buf[16 + i] = (u8)(r->st_size >> (i * 8)); - } - write_bytes(w, buf, sizeof buf); -} - -/* ---- section header layout ---- * - * - * Per-segment cuts: each kept image segment contributes 1 .text/.rodata - * shdr for its file portion, plus a separate .bss shdr for the trailing - * NOBITS portion of an RW segment (memsz > filesz). The headers PT_LOAD - * contributes a single .note.gnu.build-id shdr. Trailing non-alloc - * shdrs: .symtab .strtab .shstrtab (always 3). */ - -typedef struct OutShdr { - u32 shdr_idx; /* 1-based; assigned during planning */ - LinkSegmentId segment_id; - Sym name; - u16 sem; /* SecSem from source LinkSection */ - u32 flags; /* SF_* from source LinkSection */ - u32 align; - u64 vaddr; - u64 file_offset; - u64 size; - int is_nobits; -} OutShdr; - -static u16 sym_shndx_for(const LinkSymbol* s, const OutShdr* outshdrs, - u32 noutshdr) { - if (!s->defined) return SHN_UNDEF; - if (s->kind == SK_ABS) return SHN_ABS; - if (s->kind == SK_FILE) return SHN_ABS; - if (s->kind == SK_COMMON) return SHN_COMMON; - /* Find an output shdr whose [vaddr, vaddr+size) covers s->vaddr. - * Boundary symbols match at the upper edge. */ - { - u32 i; - for (i = 0; i < noutshdr; ++i) { - u64 lo = outshdrs[i].vaddr; - u64 hi = lo + outshdrs[i].size; - if (s->vaddr >= lo && s->vaddr <= hi) return (u16)outshdrs[i].shdr_idx; - } - } - return SHN_ABS; -} - -static u32 sec_sem_to_sht(u16 sem) { - switch (sem) { - case SSEM_PROGBITS: - return SHT_PROGBITS; - case SSEM_NOBITS: - return SHT_NOBITS; - case SSEM_NOTE: - return SHT_NOTE; - case SSEM_INIT_ARRAY: - return SHT_INIT_ARRAY; - case SSEM_FINI_ARRAY: - return SHT_FINI_ARRAY; - case SSEM_PREINIT_ARRAY: - return SHT_PREINIT_ARRAY; - default: - return SHT_PROGBITS; - } -} - -static u64 sec_flags_to_shf(u32 flags) { - u64 r = 0; - if (flags & SF_ALLOC) r |= SHF_ALLOC; - if (flags & SF_EXEC) r |= SHF_EXECINSTR; - if (flags & SF_WRITE) r |= SHF_WRITE; - if (flags & SF_TLS) r |= SHF_TLS; - if (flags & SF_MERGE) r |= SHF_MERGE; - if (flags & SF_STRINGS) r |= SHF_STRINGS; - if (flags & SF_LINK_ORDER) r |= SHF_LINK_ORDER; - if (flags & SF_RETAIN) r |= SHF_GNU_RETAIN; - return r; -} - -void link_emit_elf(LinkImage* img, Writer* w) { - Heap* heap = img->heap; - Compiler* c = img->c; - const LinkArchDesc* arch = elf_arch_or_panic(c, "link_emit_elf"); - u32 e_machine = arch->e_machine; - if (img->entry_sym == LINK_SYM_NONE) - compiler_panic(c, no_loc(), "link_emit_elf: no resolved entry symbol"); - /* IFUNC trampolines: layout_iplt builds the .iplt stubs + .igot.plt - * slots and (when emit_static_exe was set) synthesizes a - * .init_array entry that calls __cfree_ifunc_init at startup. The - * rt member walks .iplt.pairs and fills each slot before user code - * runs. The ELF writer doesn't have to do anything special here. */ - - /* PIE / ET_DYN: img_base is 0 (the loader picks the runtime base; - * absolute relocs against internal symbols are emitted as - * R_AARCH64_RELATIVE in .rela.dyn). Otherwise classic ET_EXEC at - * IMAGE_BASE_STATIC. - * - * Scripted: the linker script pinned absolute vaddrs (e.g. - * `. = 0x40080000`); img_base stays 0 and the headers PT_LOAD / - * build-id note are dropped — the script's image is consumed by a - * raw loader (qemu -kernel, a bootloader) that doesn't need a - * self-describing memory image. */ - int pie = img->pie; - int scripted = img->scripted; - u64 img_base = (pie || scripted) ? 0ULL : IMAGE_BASE_STATIC; - - /* ---- plan number of program headers ---- - * - * 1 headers PT_LOAD + nsegments PT_LOAD + 1 PT_NOTE (build-id) - * + 1 PT_TLS when this image carries any TLS sections. - * + 4 dyn phdrs (PT_PHDR / PT_INTERP / PT_DYNAMIC / PT_GNU_STACK) on PIE. - * - * Scripted images skip the headers PT_LOAD and PT_NOTE: phdrs are - * just the per-segment PT_LOADs. */ - u32 has_tls = img->tls_memsz ? 1u : 0u; - u32 nphdr_extra_dyn = pie ? 4u : 0u; - u32 nphdr_headers = scripted ? 0u : 1u; - u32 nphdr_buildid = scripted ? 0u : 1u; - u32 nphdr_total = nphdr_headers + img->nsegments + nphdr_buildid + has_tls + - nphdr_extra_dyn; - u64 build_id_note_bytes = scripted ? 0ULL : BUILD_ID_NOTE_BYTES; - u64 headers_size = - sizeof(Ehdr64) + (u64)nphdr_total * sizeof(Phdr64) + build_id_note_bytes; - u64 headers_load = ALIGN_UP(headers_size, (u64)PAGE_SIZE); - - /* The build-id note lives inside the headers PT_LOAD at this offset. */ - u64 build_id_off = sizeof(Ehdr64) + (u64)nphdr_total * sizeof(Phdr64); - u64 build_id_addr = img_base + build_id_off; - - /* ---- shift image addresses, apply relocations ---- - * - * Must happen before segshdrs/symtab construction so they observe - * post-shift vaddrs (the values that will land in the file). */ - if (scripted) - shift_image_file_offsets(img, headers_load); - else - shift_image_addresses(img, headers_load); - apply_all_relocs(img, img_base); - - /* ---- write .dynamic body + re-serialize .rela.dyn (PIE only) ---- - * - * Both depend on post-shift vaddrs. .dynamic embeds image-relative - * pointers to .dynsym/.dynstr/.gnu.hash/.rela.dyn/.rela.plt/.got.plt - * (the loader adds load_base at runtime). .rela.dyn picked up - * RELATIVE records during apply_all_relocs; rewrite the section - * bytes to include them. */ - if (pie && img->dyn) { - LinkDynState* dyn = img->dyn; - const LinkSection* sec_dynamic = &img->sections[dyn->sec_dynamic - 1]; - const LinkSection* sec_dynsym = &img->sections[dyn->sec_dynsym - 1]; - const LinkSection* sec_dynstr = &img->sections[dyn->sec_dynstr - 1]; - const LinkSection* sec_gnuhash = &img->sections[dyn->sec_gnu_hash - 1]; - const LinkSection* sec_reladyn = &img->sections[dyn->sec_rela_dyn - 1]; - const LinkSection* sec_relaplt = (dyn->sec_rela_plt != LINK_SEC_NONE) - ? &img->sections[dyn->sec_rela_plt - 1] - : NULL; - const LinkSection* sec_gotplt = (dyn->sec_got_plt != LINK_SEC_NONE) - ? &img->sections[dyn->sec_got_plt - 1] - : NULL; - const LinkSegment* dseg = &img->segments[sec_dynamic->segment_id - 1]; - u8* dyn_bytes_at = img->segment_bytes[dseg->id - 1] + - (size_t)(sec_dynamic->file_offset - dseg->file_offset); - - /* Build DT_* entries in order. Layout matches count_dynamic_entries. */ - u32 written = 0; - u8* p = dyn_bytes_at; -#define DT_PUT(TAG, VAL) \ - do { \ - wr_u64_le(p, (u64)(TAG)); \ - wr_u64_le(p + 8, (u64)(VAL)); \ - p += 16; \ - written++; \ - } while (0) - - /* DT_NEEDED entries — d_un.d_val is the offset of the soname - * within .dynstr. The dynstr was built in layout_dyn with - * dedup; look each soname up by name to compute its offset. */ - { - u32 ni; - for (ni = 0; ni < dyn->nneeded; ++ni) { - Sym soname = dyn->needed[ni]; - Slice nm_s = pool_slice(c->global, soname); - const char* nm = nm_s.s; - size_t namelen = nm_s.len; - /* Linear search dynstr for this name. */ - u32 off = 0; - if (nm && namelen) { - u32 si; - for (si = 0; si + namelen < dyn->dynstr_len; ++si) { - if (dyn->dynstr[si + namelen] == 0 && - memcmp(dyn->dynstr + si, nm, namelen) == 0) { - off = si; - break; - } - } - /* Should always be present — collect_needed populated dynstr - * via build_dynsym? Actually build_dynsym only added import - * names. We need to also add NEEDED sonames. */ - if (off == 0) { - /* Fallback: append to dynstr. Phase 4 layout_dyn pre-sized - * .dynstr exactly to its current content; appending here - * would overflow the section. Instead, panic with a clear - * message — the soname was supposed to be added during - * layout. */ - compiler_panic(c, no_loc(), - "link_emit_elf: DT_NEEDED soname missing from " - ".dynstr"); - } - } - DT_PUT(DT_NEEDED, off); - } - } - - DT_PUT(DT_STRTAB, img_base + sec_dynstr->vaddr); - DT_PUT(DT_STRSZ, sec_dynstr->size); - DT_PUT(DT_SYMTAB, img_base + sec_dynsym->vaddr); - DT_PUT(DT_SYMENT, 24); - DT_PUT(DT_GNU_HASH, img_base + sec_gnuhash->vaddr); - /* DT_PLT* / DT_JMPREL only make sense when there's a PLT. Emitting - * them with size=0 / vaddr=0 (or pointing past the end of any - * PT_LOAD) trips llvm-readelf's "address not in any segment" check - * and confuses some loaders' DT walk. */ - if (dyn->nrela_plt) { - DT_PUT(DT_PLTGOT, sec_gotplt ? (img_base + sec_gotplt->vaddr) : 0); - DT_PUT(DT_PLTRELSZ, sec_relaplt ? sec_relaplt->size : 0); - DT_PUT(DT_PLTREL, DT_RELA); - DT_PUT(DT_JMPREL, sec_relaplt ? (img_base + sec_relaplt->vaddr) : 0); - } - if (dyn->cap_rela_dyn) { - DT_PUT(DT_RELA, img_base + sec_reladyn->vaddr); - DT_PUT(DT_RELASZ, sec_reladyn->size); - DT_PUT(DT_RELAENT, 24); - } - DT_PUT(DT_FLAGS_1, DF_1_NOW); - DT_PUT(DT_NULL, 0); -#undef DT_PUT - - /* Pad any pre-allocated tail with DT_NULL. */ - while (written < dyn->ndyn_entries) { - wr_u64_le(p, 0); - wr_u64_le(p + 8, 0); - p += 16; - written++; - } - - /* Re-serialize .rela.dyn body. GLOB_DAT records (imports against - * .got slots) and RELATIVE records (PIE internal abs64 fixups) - * are both populated during apply_all_relocs; .rela.dyn was empty - * coming out of layout_dyn. Trailing capacity stays zero — - * readers stop at the first R_AARCH64_NONE record. */ - { - const LinkSegment* rdseg = &img->segments[sec_reladyn->segment_id - 1]; - u8* rd_bytes = img->segment_bytes[rdseg->id - 1] + - (size_t)(sec_reladyn->file_offset - rdseg->file_offset); - u32 i; - for (i = 0; i < dyn->nrela_dyn; ++i) { - const DynRela* rr = &dyn->rela_dyn[i]; - u8* rp = rd_bytes + (u64)i * ELF64_RELA_SIZE; - wr_u64_le(rp + 0, rr->r_offset); - wr_u64_le(rp + 8, rr->r_info); - wr_u64_le(rp + 16, (u64)rr->r_addend); - } - } - - /* Re-serialize .rela.plt body. JUMP_SLOT records were written by - * layout_dyn at pre-shift vaddrs; shift_image_addresses bumped - * dyn->rela_plt[i].r_offset along with the rest, so the post-shift - * values match the .got.plt slot vaddrs the loader will patch. */ - if (sec_relaplt && dyn->nrela_plt) { - const LinkSegment* rpseg = &img->segments[sec_relaplt->segment_id - 1]; - u8* rp_bytes = img->segment_bytes[rpseg->id - 1] + - (size_t)(sec_relaplt->file_offset - rpseg->file_offset); - u32 i; - for (i = 0; i < dyn->nrela_plt; ++i) { - const DynRela* rr = &dyn->rela_plt[i]; - u8* rp = rp_bytes + (u64)i * ELF64_RELA_SIZE; - wr_u64_le(rp + 0, rr->r_offset); - wr_u64_le(rp + 8, rr->r_info); - wr_u64_le(rp + 16, (u64)rr->r_addend); - } - } - - /* Re-write .got.plt[0] = &.dynamic with the post-shift vaddr. - * layout_dyn wrote the pre-shift value into the segment bytes; - * shift_image_addresses bumped dyn->dynamic_vaddr so we can refill - * the slot here. Slots 1 and 2 (link_map cookie, - * _dl_runtime_resolve) are loader-owned for lazy binding; under - * DF_1_NOW they're never read so leaving them zero is fine. */ - if (sec_gotplt && dyn->dynamic_vaddr) { - const LinkSegment* gpseg = &img->segments[sec_gotplt->segment_id - 1]; - u8* gp_bytes = img->segment_bytes[gpseg->id - 1] + - (size_t)(sec_gotplt->file_offset - gpseg->file_offset); - wr_u64_le(gp_bytes, dyn->dynamic_vaddr); - } - } - - /* ---- compute build-id (post-reloc, deterministic) ---- - * - * Format-agnostic — Mach-O LC_UUID will hash the same bytes. */ - u8 build_id[BUILD_ID_DESC_LEN]; - link_image_id_compute(img, build_id); - - /* ---- plan section headers covering loaded segments ---- - * - * Worst case: 1 file shdr per segment + 1 .bss shdr if RW has a tail. - * shdr indices: 0=NULL, 1..nsegshdr=these, then build-id/symtab/... - */ - /* Walk img->sections sorted by (segment_id, vaddr) and merge into - * one OutShdr per (segment_id, name) run. layout already places - * same-name sections adjacent within a segment, so a stable - * by-vaddr sort followed by run-length grouping captures it. */ - OutShdr* outshdrs; - u32 noutshdr = 0; - u32 outshdr_cap = img->nsections + 1u; - outshdrs = (OutShdr*)heap->alloc(heap, sizeof(*outshdrs) * outshdr_cap, - _Alignof(OutShdr)); - if (!outshdrs) compiler_panic(c, no_loc(), "link_emit_elf: oom on outshdrs"); - memset(outshdrs, 0, sizeof(*outshdrs) * outshdr_cap); - { - /* Build a sort index over LinkSection ids by (segment_id, vaddr). */ - u32* order = (u32*)heap->alloc(heap, sizeof(u32) * (img->nsections + 1u), - _Alignof(u32)); - if (!order && img->nsections) - compiler_panic(c, no_loc(), "link_emit_elf: oom on shdr sort"); - u32 i, j; - for (i = 0; i < img->nsections; ++i) order[i] = i; - /* Insertion sort — section count is small. */ - for (i = 1; i < img->nsections; ++i) { - u32 cur = order[i]; - const LinkSection* a = &img->sections[cur]; - j = i; - while (j > 0) { - const LinkSection* b = &img->sections[order[j - 1]]; - if ((b->segment_id < a->segment_id) || - (b->segment_id == a->segment_id && b->vaddr <= a->vaddr)) - break; - order[j] = order[j - 1]; - --j; - } - order[j] = cur; - } - for (i = 0; i < img->nsections; ++i) { - const LinkSection* ls = &img->sections[order[i]]; - OutShdr* tail = noutshdr ? &outshdrs[noutshdr - 1] : NULL; - int merge = tail && tail->segment_id == ls->segment_id && - tail->name == ls->name && - tail->is_nobits == (ls->sem == SSEM_NOBITS); - if (merge) { - u64 end = ls->vaddr + ls->size; - u64 prev_end = tail->vaddr + tail->size; - if (end > prev_end) tail->size = end - tail->vaddr; - if (ls->align > tail->align) tail->align = ls->align; - } else { - OutShdr* o = &outshdrs[noutshdr]; - o->shdr_idx = 1u + noutshdr; - o->segment_id = ls->segment_id; - o->name = ls->name; - o->sem = ls->sem; - o->flags = ls->flags; - o->align = ls->align; - o->vaddr = ls->vaddr; - o->file_offset = ls->file_offset; - o->size = ls->size; - o->is_nobits = (ls->sem == SSEM_NOBITS); - noutshdr++; - } - } - heap->free(heap, order, sizeof(u32) * (img->nsections + 1u)); - } - - /* ---- build .shstrtab ---- */ - StrBuilder shstrtab; - strb_init(&shstrtab, heap, 128); - u32 sh_name_symtab = strb_add_cstr(&shstrtab, ".symtab"); - u32 sh_name_strtab = strb_add_cstr(&shstrtab, ".strtab"); - u32 sh_name_shstrtab = strb_add_cstr(&shstrtab, ".shstrtab"); - u32 sh_name_buildid = strb_add_cstr(&shstrtab, ".note.gnu.build-id"); - /* Per-output-shdr names — interned strings from input section names. */ - u32* outshdr_name_off = - (u32*)heap->alloc(heap, sizeof(u32) * (noutshdr + 1u), _Alignof(u32)); - if (!outshdr_name_off && noutshdr) - compiler_panic(c, no_loc(), "link_emit_elf: oom on shdr name table"); - { - u32 i; - for (i = 0; i < noutshdr; ++i) { - const OutShdr* o = &outshdrs[i]; - if (o->name) { - Slice nm_s = pool_slice(c->global, o->name); - const char* nm = nm_s.s; - size_t nlen = nm_s.len; - outshdr_name_off[i] = - nm && nlen ? strb_add(&shstrtab, nm, (u32)nlen) : 0; - } else { - outshdr_name_off[i] = 0; - } - } - } - - u32 nshdr = 1u + noutshdr + 4u; - u32 shndx_buildid = 1u + noutshdr; - u32 shndx_symtab = shndx_buildid + 1u; - u32 shndx_strtab = shndx_symtab + 1u; - u32 shndx_shstrtab = shndx_strtab + 1u; - - /* ---- build .symtab + .strtab ---- - * - * Two passes (locals first, then globals/weaks). Slot 0 is - * STN_UNDEF. Globals are deduped via img->globals — only the - * canonical entry per name is emitted, since per-input undef - * records keep their own LinkSymId after resolve_undefs's - * "copy fields from canonical def" step. sh_info = first non-local - * idx. */ - StrBuilder strtab; - strb_init(&strtab, heap, 256); - - SymRec* recs = (SymRec*)heap->alloc( - heap, sizeof(*recs) * (LinkSyms_count(&img->syms) + 1u), - _Alignof(SymRec)); - if (!recs) compiler_panic(c, no_loc(), "link_emit_elf: oom on symrecs"); - u32 nsyms_emit = 0; - u32 first_global_idx; - memset(&recs[nsyms_emit++], 0, sizeof(*recs)); /* slot 0 */ - first_global_idx = nsyms_emit; - - { - u32 pass, i; - for (pass = 0; pass < 2; ++pass) { - int want_local = (pass == 0); - if (!want_local) first_global_idx = nsyms_emit; - for (i = 0; i < LinkSyms_count(&img->syms); ++i) { - const LinkSymbol* s = LinkSyms_at(&img->syms, i); - int is_local = (s->bind == SB_LOCAL); - size_t namelen = 0; - const char* nm; - u8 st_type, st_bind; - u16 shndx; - u64 st_value; - SymRec* r; - if (want_local != is_local) continue; - if (s->name == 0 && s->kind != SK_FILE) continue; - /* Dedupe globals: per-input undef-of-X and the canonical - * def-of-X are separate img->syms entries (resolve_undefs - * mirrors fields onto the undef). Only the canonical - * (first registered) entry is in img->globals. Skip the - * shadow copies. */ - if (!is_local && s->name) { - LinkSymId canonical = symhash_get(&img->globals, s->name); - if (canonical != LINK_SYM_NONE && canonical != s->id) continue; - } - { - Slice nm_s = s->name ? pool_slice(c->global, s->name) : SLICE_NULL; - nm = nm_s.s ? nm_s.s : ""; - namelen = nm_s.len; - } - shndx = sym_shndx_for(s, outshdrs, noutshdr); - /* st_value: in ET_EXEC, defined non-ABS symbols carry - * absolute virtual addresses (IMAGE_BASE + image - * vaddr); ABS symbols carry their own value verbatim. */ - if (s->kind == SK_FILE) - st_value = 0; - else if (s->kind == SK_ABS) - st_value = s->vaddr; - else if (s->defined) - st_value = img_base + s->vaddr; - else - st_value = 0; - st_type = sym_kind_to_st_type(s->kind); - st_bind = sym_bind_to_st_bind(s->bind); - r = &recs[nsyms_emit++]; - memset(r, 0, sizeof(*r)); - r->st_name = (nm && namelen) ? strb_add(&strtab, nm, (u32)namelen) : 0; - r->st_info = ELF64_ST_INFO(st_bind, st_type); - r->st_other = STV_DEFAULT; - r->st_shndx = shndx; - r->st_value = st_value; - r->st_size = s->size; - } - } - } - - /* ---- compute file offsets for trailing non-alloc sections ---- */ - /* End of segment data: the highest (file_offset + file_size) across - * loaded segments. */ - u64 end_of_segs = headers_load; - { - u32 i; - for (i = 0; i < img->nsegments; ++i) { - const LinkSegment* seg = &img->segments[i]; - u64 e = seg->file_offset + seg->file_size; - if (e > end_of_segs) end_of_segs = e; - } - } - u64 symtab_off = ALIGN_UP(end_of_segs, (u64)8u); - u64 symtab_size = (u64)ELF64_SYM_SIZE * nsyms_emit; - u64 strtab_off = symtab_off + symtab_size; - u64 strtab_size = strtab.len; - u64 shstrtab_off = strtab_off + strtab_size; - u64 shstrtab_size = shstrtab.len; - u64 shdr_off = ALIGN_UP(shstrtab_off + shstrtab_size, (u64)8u); - - /* ---- build phdrs ---- */ - Phdr64* phdrs = (Phdr64*)heap->alloc(heap, sizeof(Phdr64) * nphdr_total, - _Alignof(Phdr64)); - if (!phdrs) compiler_panic(c, no_loc(), "link_emit_elf: oom on phdrs"); - memset(phdrs, 0, sizeof(Phdr64) * nphdr_total); - { - u32 pi = 0; - /* PT_PHDR points at the phdr table itself within the headers - * PT_LOAD. Required by the runtime loader for ET_DYN to know - * where its own program headers live. Must appear before the - * first PT_LOAD on dynamic exes (musl checks). */ - if (pie) { - phdrs[pi].p_type = PT_PHDR; - phdrs[pi].p_flags = PF_R; - phdrs[pi].p_offset = sizeof(Ehdr64); - phdrs[pi].p_vaddr = img_base + sizeof(Ehdr64); - phdrs[pi].p_paddr = phdrs[pi].p_vaddr; - phdrs[pi].p_filesz = (u64)nphdr_total * sizeof(Phdr64); - phdrs[pi].p_memsz = phdrs[pi].p_filesz; - phdrs[pi].p_align = 8; - pi++; - } - /* Headers PT_LOAD (covers ehdr + phdrs + build-id note). - * Scripted images don't emit one — see plan note above. */ - if (!scripted) { - phdrs[pi].p_type = PT_LOAD; - phdrs[pi].p_flags = PF_R; - phdrs[pi].p_offset = 0; - phdrs[pi].p_vaddr = img_base; - phdrs[pi].p_paddr = img_base; - phdrs[pi].p_filesz = headers_size; - phdrs[pi].p_memsz = headers_size; - phdrs[pi].p_align = PAGE_SIZE; - pi++; - } - /* Per-segment PT_LOAD. */ - u32 i; - for (i = 0; i < img->nsegments; ++i) { - const LinkSegment* seg = &img->segments[i]; - Phdr64* p = &phdrs[pi++]; - p->p_type = PT_LOAD; - p->p_flags = perms_to_pflags(seg->flags); - p->p_offset = seg->file_offset; - p->p_vaddr = img_base + seg->vaddr; /* post-shift */ - p->p_paddr = p->p_vaddr; - p->p_filesz = seg->file_size; - /* TLS .tbss is per-thread template space, not a loadable bss - * region — PT_TLS already records the full memsz (incl. .tbss) - * for the loader's per-thread allocation, so the matching - * PT_LOAD must not extend memsz past filesz. qemu-riscv64 - * rejects PT_LOADs with memsz>filesz on non-writable mappings - * ("PT_LOAD with non-writable bss"), and the SEG_TLS perms are - * SF_ALLOC|SF_TLS only. */ - p->p_memsz = (seg->flags & SF_TLS) ? seg->file_size : seg->mem_size; - p->p_align = seg->align ? seg->align : PAGE_SIZE; - } - /* PT_NOTE for build-id. Scripted images skip the build-id entirely. */ - if (!scripted) { - phdrs[pi].p_type = PT_NOTE; - phdrs[pi].p_flags = PF_R; - phdrs[pi].p_offset = build_id_off; - phdrs[pi].p_vaddr = build_id_addr; - phdrs[pi].p_paddr = build_id_addr; - phdrs[pi].p_filesz = BUILD_ID_NOTE_BYTES; - phdrs[pi].p_memsz = BUILD_ID_NOTE_BYTES; - phdrs[pi].p_align = 4; - pi++; - } - /* PT_TLS describing the .tdata template + .tbss zero-fill. - * vaddr/file_offset point at the same bytes the matching - * PT_LOAD already covers — the loader uses PT_TLS to size - * each thread's TLS block and to seed it from .tdata. */ - if (has_tls) { - phdrs[pi].p_type = PT_TLS; - phdrs[pi].p_flags = PF_R; - phdrs[pi].p_offset = img->tls_vaddr; - phdrs[pi].p_vaddr = img_base + img->tls_vaddr; - phdrs[pi].p_paddr = phdrs[pi].p_vaddr; - phdrs[pi].p_filesz = img->tls_filesz; - phdrs[pi].p_memsz = img->tls_memsz; - phdrs[pi].p_align = img->tls_align ? img->tls_align : 1u; - pi++; - } - /* Dynamic phdrs. PT_INTERP and PT_DYNAMIC point at the matching - * sections (which layout_dyn placed in the ro/rw_dyn segments). - * PT_GNU_STACK marks the stack as non-executable (filesz=0). */ - if (pie && img->dyn) { - LinkDynState* dyn = img->dyn; - const LinkSection* sec_interp = &img->sections[dyn->sec_interp - 1]; - const LinkSection* sec_dynamic = &img->sections[dyn->sec_dynamic - 1]; - phdrs[pi].p_type = PT_INTERP; - phdrs[pi].p_flags = PF_R; - phdrs[pi].p_offset = sec_interp->file_offset; - phdrs[pi].p_vaddr = img_base + sec_interp->vaddr; - phdrs[pi].p_paddr = phdrs[pi].p_vaddr; - phdrs[pi].p_filesz = sec_interp->size; - phdrs[pi].p_memsz = sec_interp->size; - phdrs[pi].p_align = 1; - pi++; - phdrs[pi].p_type = PT_DYNAMIC; - phdrs[pi].p_flags = PF_R | PF_W; - phdrs[pi].p_offset = sec_dynamic->file_offset; - phdrs[pi].p_vaddr = img_base + sec_dynamic->vaddr; - phdrs[pi].p_paddr = phdrs[pi].p_vaddr; - phdrs[pi].p_filesz = sec_dynamic->size; - phdrs[pi].p_memsz = sec_dynamic->size; - phdrs[pi].p_align = 8; - pi++; - phdrs[pi].p_type = PT_GNU_STACK; - phdrs[pi].p_flags = PF_R | PF_W; - phdrs[pi].p_offset = 0; - phdrs[pi].p_vaddr = 0; - phdrs[pi].p_paddr = 0; - phdrs[pi].p_filesz = 0; - phdrs[pi].p_memsz = 0; - phdrs[pi].p_align = 16; - pi++; - /* PT_GNU_RELRO would mark the read-only-after-relocation span - * here. Phase 6 leaves it out — it's an optimization the loader - * can live without, and our ro_seg already lives in a PF_R - * PT_LOAD that's never made writable. */ - } else if (pie) { - /* dyn was nominally requested but layout_dyn early-out — no - * imports and no DSO inputs. The image still needs a PT_GNU_STACK - * for kernels that demand it; INTERP/DYNAMIC are skipped. */ - (void)0; - } - (void)pi; - } - - /* ---- build ehdr ---- */ - Ehdr64 ehdr; - memset(&ehdr, 0, sizeof(ehdr)); - ehdr.e_ident[0] = ELFMAG0; - ehdr.e_ident[1] = ELFMAG1; - ehdr.e_ident[2] = ELFMAG2; - ehdr.e_ident[3] = ELFMAG3; - ehdr.e_ident[4] = ELFCLASS64; - ehdr.e_ident[5] = ELFDATA2LSB; - ehdr.e_ident[6] = EV_CURRENT; - ehdr.e_ident[7] = ELFOSABI_NONE; - ehdr.e_type = pie ? ET_DYN : ET_EXEC; - ehdr.e_machine = (u16)e_machine; - ehdr.e_version = EV_CURRENT; - ehdr.e_entry = img_base + LinkSyms_at(&img->syms, img->entry_sym - 1)->vaddr; - ehdr.e_phoff = sizeof(Ehdr64); - ehdr.e_shoff = shdr_off; - ehdr.e_flags = 0; - ehdr.e_ehsize = sizeof(Ehdr64); - ehdr.e_phentsize = sizeof(Phdr64); - ehdr.e_phnum = (u16)nphdr_total; - ehdr.e_shentsize = sizeof(Shdr64); - ehdr.e_shnum = (u16)nshdr; - ehdr.e_shstrndx = (u16)shndx_shstrtab; - - /* ---- write ehdr, phdrs, build-id note, pad ---- */ - u64 cur_off; - write_bytes(w, &ehdr, sizeof(ehdr)); - write_bytes(w, phdrs, sizeof(Phdr64) * nphdr_total); - cur_off = sizeof(ehdr) + sizeof(Phdr64) * nphdr_total; - - /* .note.gnu.build-id wire format: - * u32 namesz = 4 ("GNU\0") - * u32 descsz = 16 - * u32 type = NT_GNU_BUILD_ID (3) - * "GNU\0" - * <16 bytes of build-id> - * - * Scripted images don't carry build-id; they have no PT_NOTE phdr to - * point at it and the file payload would just be dead bytes. */ - if (!scripted) { - u8 nh[12]; - u32 v; - v = NOTE_NAME_GNU_LEN; - nh[0] = (u8)v; - nh[1] = (u8)(v >> 8); - nh[2] = (u8)(v >> 16); - nh[3] = (u8)(v >> 24); - v = BUILD_ID_DESC_LEN; - nh[4] = (u8)v; - nh[5] = (u8)(v >> 8); - nh[6] = (u8)(v >> 16); - nh[7] = (u8)(v >> 24); - v = NOTE_BUILD_ID_TYPE; - nh[8] = (u8)v; - nh[9] = (u8)(v >> 8); - nh[10] = (u8)(v >> 16); - nh[11] = (u8)(v >> 24); - write_bytes(w, nh, sizeof nh); - write_bytes(w, NOTE_NAME_GNU "\0", NOTE_NAME_GNU_LEN); - write_bytes(w, build_id, BUILD_ID_DESC_LEN); - cur_off += BUILD_ID_NOTE_BYTES; - } - - /* Pad to first segment file_offset (== headers_load). */ - { - u32 i; - for (i = 0; i < img->nsegments; ++i) { - const LinkSegment* seg = &img->segments[i]; - if (seg->file_size == 0) continue; - if (cur_off < seg->file_offset) { - write_zeroes(w, (size_t)(seg->file_offset - cur_off)); - cur_off = seg->file_offset; - } - write_bytes(w, img->segment_bytes[seg->id - 1], (size_t)seg->file_size); - cur_off += seg->file_size; - } - } - - /* ---- write trailing non-alloc sections ---- */ - if (cur_off < symtab_off) { - write_zeroes(w, (size_t)(symtab_off - cur_off)); - cur_off = symtab_off; - } - { - u32 i; - for (i = 0; i < nsyms_emit; ++i) write_sym_rec(w, &recs[i]); - cur_off += symtab_size; - } - if (strtab.len) { - write_bytes(w, strtab.data, strtab.len); - cur_off += strtab.len; - } - if (shstrtab.len) { - write_bytes(w, shstrtab.data, shstrtab.len); - cur_off += shstrtab.len; - } - - /* ---- write section header table ---- */ - if (cur_off < shdr_off) { - write_zeroes(w, (size_t)(shdr_off - cur_off)); - cur_off = shdr_off; - } - { - Shdr64 sh; - u32 i; - /* shdr 0: NULL */ - memset(&sh, 0, sizeof(sh)); - write_bytes(w, &sh, sizeof(sh)); - /* Locate dyn-section names (interned earlier in layout_dyn) so - * we can override sh_type / sh_link / sh_info / sh_entsize for - * .dynsym / .dynstr / .gnu.hash / .rela.dyn / .rela.plt / - * .dynamic. The sh_link cross-references (e.g., .dynsym -> - * .dynstr) need the matching shdr indices, which we look up by - * comparing OutShdr.name to the same Sym values. */ - Sym n_dynsym = 0, n_dynstr = 0, n_gnuhash = 0; - Sym n_reladyn = 0, n_relaplt = 0, n_dynamic = 0; - Sym n_gotplt = 0; - if (pie && img->dyn) { - n_dynsym = pool_intern_slice(c->global, SLICE_LIT(".dynsym")); - n_dynstr = pool_intern_slice(c->global, SLICE_LIT(".dynstr")); - n_gnuhash = pool_intern_slice(c->global, SLICE_LIT(".gnu.hash")); - n_reladyn = pool_intern_slice(c->global, SLICE_LIT(".rela.dyn")); - n_relaplt = pool_intern_slice(c->global, SLICE_LIT(".rela.plt")); - n_dynamic = pool_intern_slice(c->global, SLICE_LIT(".dynamic")); - n_gotplt = pool_intern_slice(c->global, SLICE_LIT(".got.plt")); - } - /* Two-pass: first find dynsym/dynstr/gotplt indices for sh_link - * fixups, then emit. */ - u32 idx_dynsym = 0, idx_dynstr = 0, idx_gotplt = 0; - if (pie && img->dyn) { - for (i = 0; i < noutshdr; ++i) { - Sym nm = outshdrs[i].name; - u32 ix = outshdrs[i].shdr_idx; - if (nm == n_dynsym) - idx_dynsym = ix; - else if (nm == n_dynstr) - idx_dynstr = ix; - else if (nm == n_gotplt) - idx_gotplt = ix; - } - } - /* per-name output shdrs */ - for (i = 0; i < noutshdr; ++i) { - const OutShdr* o = &outshdrs[i]; - memset(&sh, 0, sizeof(sh)); - sh.sh_name = outshdr_name_off[i]; - sh.sh_type = sec_sem_to_sht(o->sem); - sh.sh_flags = sec_flags_to_shf(o->flags); - sh.sh_addr = img_base + o->vaddr; - sh.sh_offset = o->file_offset; - sh.sh_size = o->size; - sh.sh_link = 0; - sh.sh_info = 0; - sh.sh_addralign = o->align ? o->align : 1; - sh.sh_entsize = (o->sem == SSEM_INIT_ARRAY || o->sem == SSEM_FINI_ARRAY || - o->sem == SSEM_PREINIT_ARRAY) - ? 8 - : 0; - /* Dyn-section overrides: sh_type / sh_link / sh_info / entsize. */ - if (pie && img->dyn) { - if (o->name == n_dynsym) { - sh.sh_type = SHT_DYNSYM; - sh.sh_link = idx_dynstr; - sh.sh_info = img->dyn->first_global; - sh.sh_entsize = 24; - } else if (o->name == n_dynstr) { - sh.sh_type = SHT_STRTAB; - } else if (o->name == n_gnuhash) { - sh.sh_type = SHT_GNU_HASH; - sh.sh_link = idx_dynsym; - } else if (o->name == n_reladyn) { - sh.sh_type = SHT_RELA; - sh.sh_link = idx_dynsym; - sh.sh_entsize = 24; - } else if (o->name == n_relaplt) { - sh.sh_type = SHT_RELA; - sh.sh_link = idx_dynsym; - sh.sh_info = idx_gotplt; - sh.sh_entsize = 24; - sh.sh_flags |= SHF_INFO_LINK; - } else if (o->name == n_dynamic) { - sh.sh_type = SHT_DYNAMIC; - sh.sh_link = idx_dynstr; - sh.sh_entsize = 16; - } else if (o->name == n_gotplt) { - sh.sh_entsize = 8; - } - } - write_bytes(w, &sh, sizeof(sh)); - } - /* shdr: .note.gnu.build-id (allocatable; in headers PT_LOAD) */ - memset(&sh, 0, sizeof(sh)); - sh.sh_name = sh_name_buildid; - sh.sh_type = SHT_NOTE; - sh.sh_flags = SHF_ALLOC; - sh.sh_addr = build_id_addr; - sh.sh_offset = build_id_off; - sh.sh_size = BUILD_ID_NOTE_BYTES; - sh.sh_addralign = 4; - write_bytes(w, &sh, sizeof(sh)); - /* shdr: .symtab */ - memset(&sh, 0, sizeof(sh)); - sh.sh_name = sh_name_symtab; - sh.sh_type = SHT_SYMTAB; - sh.sh_flags = 0; - sh.sh_addr = 0; - sh.sh_offset = symtab_off; - sh.sh_size = symtab_size; - sh.sh_link = shndx_strtab; - sh.sh_info = first_global_idx; - sh.sh_addralign = 8; - sh.sh_entsize = ELF64_SYM_SIZE; - write_bytes(w, &sh, sizeof(sh)); - /* shdr: .strtab */ - memset(&sh, 0, sizeof(sh)); - sh.sh_name = sh_name_strtab; - sh.sh_type = SHT_STRTAB; - sh.sh_offset = strtab_off; - sh.sh_size = strtab_size; - sh.sh_addralign = 1; - write_bytes(w, &sh, sizeof(sh)); - /* shdr: .shstrtab */ - memset(&sh, 0, sizeof(sh)); - sh.sh_name = sh_name_shstrtab; - sh.sh_type = SHT_STRTAB; - sh.sh_offset = shstrtab_off; - sh.sh_size = shstrtab_size; - sh.sh_addralign = 1; - write_bytes(w, &sh, sizeof(sh)); - } - - heap->free(heap, phdrs, sizeof(Phdr64) * nphdr_total); - heap->free(heap, recs, sizeof(*recs) * (LinkSyms_count(&img->syms) + 1u)); - heap->free(heap, outshdrs, sizeof(*outshdrs) * outshdr_cap); - if (outshdr_name_off) - heap->free(heap, outshdr_name_off, sizeof(u32) * (noutshdr + 1u)); - strb_fini(&strtab); - strb_fini(&shstrtab); -} diff --git a/src/link/link_internal.h b/src/link/link_internal.h @@ -234,14 +234,6 @@ void link_capture_debug_inputs(struct Linker*, LinkImage*); * vaddr, before resolve_undefs runs. */ #define LINK_PE_IMAGE_BASE 0x140000000ULL -/* Defined in link_dyn.c. Phase 4: synthesize .interp/.dynsym/.dynstr/ - * .gnu.hash/.rela.dyn/.rela.plt/.plt/.got.plt/.dynamic when the link - * is producing a PIE / ET_DYN exe. No-op when there are zero imports - * AND no DSO inputs (in PIE-with-no-imports we still need PT_INTERP - * and a minimal .dynamic). */ -void layout_dyn(struct Linker*, LinkImage*); -void link_dyn_state_free(LinkImage*); - /* Define / upsert a synthetic global symbol resolved to `vaddr`. * Satisfies any prior undef ref (e.g. _DYNAMIC from Scrt1.o, * __dso_handle from libc_nonshared.a) and fans out across per-input diff --git a/src/link/link_jit.c b/src/link/link_jit.c @@ -119,15 +119,14 @@ struct CfreeJit { * RISC-V psABI normally points TP at the start of the TLS image, but * cfree's freestanding start.c (and the JIT harness) places a 16-byte * TCB ahead of .tdata and biases TP accordingly so a single TPREL - * convention works for both arches. Mirrors src/link/link_elf.c's + * convention works for both arches. Mirrors src/obj/elf/link.c's * TLS_TCB_SIZE comment. */ #define JIT_TLS_TCB_SIZE 16ull static int reloc_is_tlsle(RelocKind k) { return k == R_AARCH64_TLSLE_ADD_TPREL_HI12 || - k == R_AARCH64_TLSLE_ADD_TPREL_LO12_NC || - k == R_RV_TPREL_HI20 || k == R_RV_TPREL_LO12_I || - k == R_RV_TPREL_LO12_S; + k == R_AARCH64_TLSLE_ADD_TPREL_LO12_NC || k == R_RV_TPREL_HI20 || + k == R_RV_TPREL_LO12_I || k == R_RV_TPREL_LO12_S; } /* RISC-V PCREL_LO12_I/S target a local "anchor" symbol whose vaddr is @@ -501,8 +500,7 @@ CfreeJit* cfree_jit_from_image(LinkImage* img) { * vaddrs are image-relative, so the runtime alias drops * out and we work in image-space. */ S = (tgt->vaddr - img->tls_vaddr) + JIT_TLS_TCB_SIZE; - } else if (r->kind == R_RV_PCREL_LO12_I || - r->kind == R_RV_PCREL_LO12_S) { + } else if (r->kind == R_RV_PCREL_LO12_I || r->kind == R_RV_PCREL_LO12_S) { /* RISC-V PCREL_LO12: target.vaddr is the paired AUIPC site * (a local anchor symbol). Recompute the AUIPC's runtime * displacement and feed it as S to the LO12_I/S apply path so @@ -682,7 +680,8 @@ CfreeJit* cfree_jit_from_image(LinkImage* img) { /* Run .init_array constructors in forward order. */ { typedef void (*VoidFn)(void); - void* p_start = cfree_jit_lookup(jit, CFREE_SLICE_LIT("__init_array_start")); + void* p_start = + cfree_jit_lookup(jit, CFREE_SLICE_LIT("__init_array_start")); void* p_end = cfree_jit_lookup(jit, CFREE_SLICE_LIT("__init_array_end")); if (p_start && p_end) { VoidFn* fn = (VoidFn*)p_start; @@ -750,7 +749,8 @@ void* cfree_jit_lookup(CfreeJit* jit, CfreeSlice name) { if (!jit || !name.s) return NULL; /* C-symbol mangling lives in obj_format_c_mangle so JIT lookups by * source-level name find the symbol regardless of target format. - * name.s is NUL-terminated (CFREE_SLICE_LIT / cfree_slice_cstr / interned). */ + * name.s is NUL-terminated (CFREE_SLICE_LIT / cfree_slice_cstr / interned). + */ sym = obj_format_c_mangle(jit->c, name.s); id = symhash_get(&jit->image->globals, sym); if (id == LINK_SYM_NONE) return NULL; @@ -1569,7 +1569,8 @@ static void jit_view_copy_debug_section(CfreeJit* jit, u32 ii, const char* tnm = tnm_s.s; size_t tnlen = tnm_s.len; if (tnm) { - Sym v_tn = pool_intern_slice(view_pool, (Slice){ .s = tnm, .len = tnlen }); + Sym v_tn = + pool_intern_slice(view_pool, (Slice){.s = tnm, .len = tnlen}); ViewSec* tgt = view_sec_find(tab, ntab, v_tn); if (tgt) { S = (u64)tgt->snap; @@ -1667,7 +1668,7 @@ static CfreeObjFile* jit_view_build(CfreeJit* jit) { } if (!nm || !jit_view_is_debug_name(nm)) continue; v_nm = pool_intern_slice(obj_compiler(view_ob)->global, - (Slice){ .s = nm, .len = nlen }); + (Slice){.s = nm, .len = nlen}); vs = view_sec_find(tab, ntab, v_nm); if (!vs) continue; jit_view_copy_debug_section(jit, ii, (ObjSecId)(k + 1), view_ob, tab, diff --git a/src/link/link_layout.c b/src/link/link_layout.c @@ -24,6 +24,7 @@ #include "link/link.h" #include "link/link_arch.h" #include "link/link_internal.h" +#include "obj/format.h" LinkImage* link_image_alloc(Compiler*); /* defined in link.c */ @@ -38,8 +39,7 @@ static SrcLoc no_loc(void) { * loader. A future cross-link with mismatched host/target page sizes * will need a target-derived value here instead. */ u64 link_layout_page_size(Linker* l) { - const CfreeExecMem* m = - (l && l->jit_host) ? l->jit_host->execmem : NULL; + const CfreeExecMem* m = (l && l->jit_host) ? l->jit_host->execmem : NULL; if (m && m->page_size) return (u64)m->page_size; return 0x4000u; } @@ -635,8 +635,7 @@ static void link_layout_sections_scripted(Linker* l, LinkImage* img, u32 nseg_max = 0; for (si = 0; si < script->nsections; ++si) - if (!slice_eq_cstr(script->sections[si].name, "/DISCARD/")) - ++nseg_max; + if (!slice_eq_cstr(script->sections[si].name, "/DISCARD/")) ++nseg_max; img->segments = nseg_max ? (LinkSegment*)h->alloc(h, sizeof(*img->segments) * nseg_max, _Alignof(LinkSegment)) @@ -1064,13 +1063,12 @@ LinkImage* link_resolve(Linker* l) { if (got_map) h->free(h, got_map, sizeof(*got_map) * map_size); if (stub_map) h->free(h, stub_map, sizeof(*stub_map) * map_size); } - /* layout_dyn synthesizes ELF-specific .interp / .dynsym / .dynstr / - * .rela.dyn / .plt / .got.plt / .dynamic sections. Mach-O has its - * own equivalent path; COFF binds imports through .idata + IAT - * (Phase 3.2). Skip for non-ELF formats. */ - metrics_scope_begin(l->c, "link.layout_dyn"); - if (l->c->target.obj == CFREE_OBJ_ELF) layout_dyn(l, img); - metrics_scope_end(l->c, "link.layout_dyn"); + { + const ObjFormatImpl* fmt = obj_format_lookup(l->c->target.obj); + metrics_scope_begin(l->c, "link.layout_dyn"); + if (fmt && fmt->layout_dyn) fmt->layout_dyn(l, img); + metrics_scope_end(l->c, "link.layout_dyn"); + } metrics_scope_begin(l->c, "link.resolve_entry"); link_resolve_entry(l, img); metrics_scope_end(l->c, "link.resolve_entry"); diff --git a/src/link/link_macho.c b/src/link/link_macho.c @@ -1,2603 +0,0 @@ -/* link_emit_macho — write a dyld-loadable arm64 MH_EXECUTE. - * - * Mach-O peer of link_emit_elf. Produces a position-independent - * MH_EXECUTE that links against libSystem.B.dylib (or any other - * dylib/.tbd input) via LC_LOAD_DYLIB + LC_DYLD_CHAINED_FIXUPS. The - * binary is ad-hoc codesigned at the tail so the kernel will exec it - * on macOS 11+. - * - * Layout (Apple's stock arm64 layout): - * - * __PAGEZERO vmaddr 0, vmsize 0x100000000, no file bytes - * __TEXT (R-X) - * mach_header_64 - * load commands - * [SF_EXEC sections — .text] - * [SF_ALLOC R-only sections — .rodata, init/fini_array, etc.] - * __stubs (12B per import-func) - * __DATA_CONST (RW initially, dyld marks R-only after fixups) - * __got (8B per import — both data and func imports) - * __DATA (R-W) - * [SF_WRITE sections — .data, .bss] - * __LINKEDIT (R) - * dyld_chained_fixups blob - * dyld_exports_trie blob - * function starts (empty) - * data in code (empty) - * symtab - * indirect symbol table (one entry per __stubs and __got slot) - * strtab - * code signature - * - * Imports are routed: - * CALL26/JUMP26 against an imported function -> __stubs entry - * GOT_LOAD_PAGE21/PAGEOFF12 against any import -> __got slot - * ABS64 against an imported symbol -> chained-bind at site - * ABS64 against a defined internal symbol -> chained-rebase at site - * - * arm64-only. x86_64-macos arrives with x64 codegen. */ - -#include <string.h> - -#include "core/bytes.h" -#include "core/heap.h" -#include "core/pool.h" -#include "core/sha256.h" -#include "core/slice.h" -#include "core/util.h" -#include "core/vec.h" -#include "link/link.h" -#include "link/link_arch.h" -#include "link/link_internal.h" -#include "obj/macho.h" - -static SrcLoc no_loc(void) { - SrcLoc l = {0, 0, 0}; - return l; -} - -/* ---- constants ---- */ -#define MZ_PAGEZERO 0x100000000ULL -#define MZ_PAGE 0x4000ULL -#define MZ_GOT_SIZE 8u -/* __DATA,__thread_ptrs slot size — one pointer per unique TLV referenced - * via TLVP_LOAD_PAGE21/PAGEOFF12. Each slot holds the address of the - * matching TLV descriptor in __DATA,__thread_vars. */ -#define MZ_TLVP_SIZE 8u - -#define DYLD_CHAINED_PTR_64 2u -#define DYLD_CHAINED_IMPORT 1u - -#define VM_PROT_READ 0x1u -#define VM_PROT_WRITE 0x2u -#define VM_PROT_EXECUTE 0x4u - -#define CS_MAGIC_EMBEDDED_SIGNATURE 0xfade0cc0u -#define CS_MAGIC_CODEDIRECTORY 0xfade0c02u -#define CSSLOT_CODEDIRECTORY 0u -#define CS_HASHTYPE_SHA256 2u -#define CS_SHA256_LEN SHA256_DIGEST_LEN -#define CS_PAGE_SIZE_LOG2 12u -#define CS_EXECSEG_MAIN_BINARY 1u - -/* extra LC ids */ -#define LC_DYLD_INFO_ONLY (0x22u | 0x80000000u) -#define LC_FUNCTION_STARTS_C 0x26u -#define LC_DATA_IN_CODE_C 0x29u -#define LC_CODE_SIGNATURE_C 0x1du - -/* ---- byte buffer ---- */ - -typedef struct MByte { - Heap* heap; - u8* data; - u32 len; - u32 cap; -} MByte; - -static void mbuf_init(MByte* b, Heap* h) { - b->heap = h; - b->data = NULL; - b->len = 0; - b->cap = 0; -} -static void mbuf_fini(MByte* b) { - if (b->data) b->heap->free(b->heap, b->data, b->cap); - b->data = NULL; - b->cap = b->len = 0; -} -static void mbuf_reserve(MByte* b, u32 need) { - if (need <= b->cap) return; - (void)VEC_GROW(b->heap, b->data, b->cap, need); -} -static u32 mbuf_align(MByte* b, u32 a) { - u32 n = (u32)ALIGN_UP((u64)b->len, (u64)a); - if (n > b->len) { - mbuf_reserve(b, n); - memset(b->data + b->len, 0, n - b->len); - b->len = n; - } - return b->len; -} -static u32 mbuf_append(MByte* b, const void* src, u32 n) { - u32 off = b->len; - mbuf_reserve(b, b->len + n); - if (n) memcpy(b->data + b->len, src, n); - b->len += n; - return off; -} -static u32 mbuf_u32(MByte* b, u32 v) { - u8 t[4]; - wr_u32_le(t, v); - return mbuf_append(b, t, 4); -} -static u32 mbuf_u16(MByte* b, u16 v) { - u8 t[2]; - wr_u16_le(t, v); - return mbuf_append(b, t, 2); -} -static u32 mbuf_u64(MByte* b, u64 v) { - u8 t[8]; - wr_u64_le(t, v); - return mbuf_append(b, t, 8); -} -static u32 mbuf_u8(MByte* b, u8 v) { return mbuf_append(b, &v, 1); } -static u32 mbuf_str(MByte* b, const char* s, u32 n) { - u32 off = b->len; - mbuf_reserve(b, b->len + n + 1u); - if (n) memcpy(b->data + b->len, s, n); - b->data[b->len + n] = 0; - b->len += n + 1u; - return off; -} - -/* ---- imports + dylibs ---- */ - -typedef struct MachImp { - LinkSymId sym; - Sym name; - u32 dylib_ord; /* 1-based ordinal into LC_LOAD_DYLIB list */ - u32 stub_idx; /* 1-based index into __stubs (0 if data import) */ - u32 got_idx; /* 1-based index into __got */ - u32 imports_strx; /* offset into chained-fixups symbol pool */ - u8 is_func; - u8 weak; - /* internal=1 means this entry is an in-image symbol that's referenced - * via GOT_LOAD_PAGE21 / LD64_GOT_LO12_NC (clang emits these for any - * extern global so a single static-link can later become PIC). The - * GOT slot stores the symbol's image-relative vaddr and gets a - * chained-fixup rebase entry (or no entry at all for a weak-undef - * resolving to NULL). No dylib_ord / stub_idx / chained-fixup bind. */ - u8 internal; - u8 pad[1]; - u64 internal_vaddr; /* image-relative target vaddr; meaningful only when - internal=1 */ -} MachImp; - -typedef struct MachDylib { - Sym install; -} MachDylib; - -/* One slot in the synthetic __DATA,__thread_ptrs section per unique TLV - * descriptor referenced via TLVP_LOAD_PAGE21/PAGEOFF12. Modeled after - * MachImp's internal-GOT entries: the slot holds the descriptor address - * (REBASE for internal-to-image descriptors, BIND for dylib-imported - * ones). The descriptor itself is laid out in __DATA,__thread_vars by - * either the input objects (internal) or the providing dylib (imported). */ -typedef struct MachTlv { - LinkSymId sym; /* canonical descriptor LinkSymId */ - u32 tlv_idx; /* 1-based slot index in __thread_ptrs */ - u8 imported; /* 1 == descriptor lives in a dylib (BIND), 0 == internal - (REBASE) */ - u8 pad[3]; - u32 import_idx; /* 1-based MachImp index when imported (for chained-bind - ordinal) */ -} MachTlv; - -/* ---- planned section ---- */ - -typedef struct MSec { - /* Source: either a LinkSection (link_sec_id != 0) or a synthetic - * pre-built byte buffer (data + size). */ - LinkSectionId link_sec_id; - const u8* synth_data; - u32 synth_size; - /* Mach-O placement */ - const char* segname; - const char* sectname; - /* Inline storage for segname/sectname when split from a Mach-O - * `__SEG,__sect`-form LinkSection name. Names from string literals - * (synth sections, derived-from-flags defaults) point at .rodata - * and don't use these. 16 bytes matches the on-disk field width. */ - char segname_buf[16]; - char sectname_buf[16]; - u64 vaddr; - u64 file_offset; - u64 size; - u32 align; - u32 flags; /* S_TYPE | S_ATTR_* */ - u32 reserved1; - u32 reserved2; - u8 segidx; /* 1=__TEXT, 2=__DATA_CONST, 3=__DATA */ - u8 is_zerofill; - u8 pad[6]; -} MSec; - -static void msec_repair_name_ptrs(MSec* m) { - if (m->segname_buf[0]) m->segname = m->segname_buf; - if (m->sectname_buf[0]) m->sectname = m->sectname_buf; -} - -typedef struct MSeg { - const char* name; - u32 maxprot; - u32 initprot; - u64 vmaddr; - u64 vmsize; - u64 fileoff; - u64 filesize; - u32 nsects; /* MSec count in segment — internal layout */ - u32 first_sec; /* first index into MSec[] */ - u32 nouts; /* OutSec count in segment — what hits the file */ - u32 first_out; /* first index into OutSec[] */ -} MSeg; - -/* On-disk section view: one record per (segname, sectname) within a - * segment. Mach-O requires this — emitting one section_64 per input - * MSec yields sibling __TEXT,__text records that violate the spec. - * Built from MSec[] after vaddr placement; reloc-apply still uses - * MSec[] for byte-buffer addressing. */ -typedef struct OutSec { - const char* segname; - const char* sectname; - u64 vaddr; - u64 file_offset; - u64 size; - u32 align; - u32 flags; - u32 reserved1; - u32 reserved2; - u8 segidx; - u8 is_zerofill; -} OutSec; - -/* ---- main context ---- */ - -typedef struct MCtx { - LinkImage* img; - Compiler* c; - Heap* h; - Writer* w; - Linker* linker; - const LinkArchDesc* arch; - - /* imports */ - MachImp* imports; - u32 nimports; - u32 nimports_real; /* count of imports with internal=0 (== prefix length; - * collect_imports appends internal=1 entries last) */ - u32 nimport_funcs; - MachDylib* dylibs; - u32 ndylibs; - /* sym->import index, 1-based, 0 = not an import. Sized to LinkSymId space - * + 1. */ - u32* sym_to_imp; - u32 sym_to_imp_size; - - /* sections + segments */ - MSec* secs; - u32 nsecs; - OutSec* outs; - u32 nouts; - MSeg segs[5]; /* PAGEZERO, TEXT, DATA_CONST, DATA, LINKEDIT */ - u32 nsegs; - - /* Synthetic byte buffers, owned. */ - u8* stubs_bytes; - u32 stubs_size; - u8* got_bytes; - u32 got_size; - /* TLV pointer slots — one entry in __DATA,__thread_ptrs per unique - * descriptor referenced via TLVP_LOAD_PAGE21/PAGEOFF12. sym_to_tlv - * maps LinkSymId → 1-based slot index (parallel to sym_to_imp). Slot - * bytes are populated at apply_relocs time once shift_sections has - * pinned descriptor vaddrs. */ - MachTlv* tlv_slots; - u32 ntlv; - u32* sym_to_tlv; - u32 sym_to_tlv_size; - u8* tlv_ptrs_bytes; - u32 tlv_ptrs_size; - u64 tlv_ptrs_vaddr; - /* Vaddr of the first thread-local-storage section - * (__thread_data / __thread_bss). Each TLV descriptor's word 2 - * stores the symbol's offset within this image rather than an - * absolute address — see apply_relocs's S_THREAD_LOCAL_VARIABLES - * ABS64 special case. */ - u64 tls_image_vaddr; - u8 has_tls_image; - - /* Final layout (computed during plan) */ - u64 text_vaddr; - u64 text_filesz; - u64 stubs_vaddr; - u64 got_vaddr; - u64 data_const_vaddr; - u64 data_vaddr; - u64 data_const_filesz; - u64 data_filesz; - u64 data_memsz; - u64 linkedit_vaddr; - u64 linkedit_fileoff; - u32 entry_offset; /* offset of entry within __TEXT segment */ - - u64 headers_size; /* header + loadcmds */ - - /* LINKEDIT contents */ - MByte chained_fixups; - MByte exports_trie; - MByte symtab; /* binary nlist_64 array */ - MByte strtab; - MByte indirect; /* u32 array */ - MByte fn_starts; - MByte data_in_code; - MByte codesig; - - u32 chained_fixups_off; - u32 exports_trie_off; - u32 fn_starts_off; - u32 data_in_code_off; - u32 symtab_off; - u32 indirect_off; - u32 strtab_off; - u32 codesig_off; - u32 codesig_size; - u32 nsyms; - - u8 uuid[16]; -} MCtx; - -/* ---- helpers for finding LinkSymbol vaddr ---- */ - -static LinkSymbol* sym_at(LinkImage* img, LinkSymId id) { - if (id == LINK_SYM_NONE || id > LinkSyms_count(&img->syms)) return NULL; - return LinkSyms_at(&img->syms, id - 1); -} - -/* ---- pass: collect imports ---- */ - -static u32 dylib_ordinal_of(MCtx* x, Sym install) { - for (u32 j = 0; j < x->ndylibs; ++j) - if (x->dylibs[j].install == install) return j + 1u; - return 0; -} - -static void collect_imports(MCtx* x) { - LinkImage* img = x->img; - Heap* h = x->h; - - x->sym_to_imp_size = LinkSyms_count(&img->syms) + 1u; - x->sym_to_imp = - (u32*)h->alloc(h, sizeof(u32) * x->sym_to_imp_size, _Alignof(u32)); - if (!x->sym_to_imp) - compiler_panic(x->c, no_loc(), "link_macho: oom on sym_to_imp"); - memset(x->sym_to_imp, 0, sizeof(u32) * x->sym_to_imp_size); - - u32 cap = 0, cap_d = 0; - for (u32 i = 0; i < LinkSyms_count(&img->syms); ++i) { - LinkSymbol* s = LinkSyms_at(&img->syms, i); - if (!s->imported) continue; - if (s->name == 0) continue; - LinkSymId canon = symhash_get(&img->globals, s->name); - if (canon != LINK_SYM_NONE && canon != s->id) continue; - if (VEC_GROW(h, x->imports, cap, x->nimports + 1u)) - compiler_panic(x->c, no_loc(), "link_macho: oom on imports"); - MachImp* mi = &x->imports[x->nimports++]; - memset(mi, 0, sizeof(*mi)); - mi->sym = s->id; - mi->name = s->name; - mi->is_func = (s->kind == SK_FUNC || s->kind == SK_IFUNC) ? 1 : 0; - mi->weak = (s->bind == SB_WEAK) ? 1 : 0; - x->sym_to_imp[s->id] = x->nimports; - } - - /* Back-classify: any CALL26/JUMP26 reloc target -> function. */ - for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) { - LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); - if (!x->arch->is_branch_reloc || !x->arch->is_branch_reloc(r->kind)) - continue; - if (r->target == LINK_SYM_NONE || r->target >= x->sym_to_imp_size) continue; - u32 idx = x->sym_to_imp[r->target]; - if (!idx) { - /* Resolve through canonical. */ - LinkSymbol* tgt = LinkSyms_at(&img->syms, r->target - 1); - if (tgt->name == 0) continue; - LinkSymId canon = symhash_get(&img->globals, tgt->name); - if (canon == LINK_SYM_NONE || canon >= x->sym_to_imp_size) continue; - idx = x->sym_to_imp[canon]; - if (!idx) continue; - /* Stash so future lookups skip this loop. */ - x->sym_to_imp[r->target] = idx; - } - x->imports[idx - 1].is_func = 1; - } - - /* Build dylib ordinal table. Pull soname from the providing DSO. */ - for (u32 i = 0; i < x->nimports; ++i) { - MachImp* mi = &x->imports[i]; - LinkSymbol* s = sym_at(img, mi->sym); - LinkInputId dso_id = s ? s->dso_input_id : LINK_INPUT_NONE; - Sym install = 0; - if (dso_id != LINK_INPUT_NONE && x->linker && - dso_id - 1u < LinkInputs_count(&x->linker->inputs)) { - LinkInput* in = LinkInputs_at(&x->linker->inputs, dso_id - 1u); - if (in->kind == LINK_INPUT_DSO_BYTES) install = in->soname; - } - if (install == 0) - install = pool_intern_slice(x->c->global, SLICE_LIT("/usr/lib/libSystem.B.dylib")); - u32 ord = dylib_ordinal_of(x, install); - if (!ord) { - if (VEC_GROW(h, x->dylibs, cap_d, x->ndylibs + 1u)) - compiler_panic(x->c, no_loc(), "link_macho: oom on dylibs"); - x->dylibs[x->ndylibs].install = install; - ++x->ndylibs; - ord = x->ndylibs; - } - mi->dylib_ord = ord; - } - - /* Always include every DSO input's install-name. */ - if (x->linker) { - for (u32 ii = 0; ii < LinkInputs_count(&x->linker->inputs); ++ii) { - LinkInput* in = LinkInputs_at(&x->linker->inputs, ii); - if (in->kind != LINK_INPUT_DSO_BYTES) continue; - if (in->soname == 0) continue; - if (dylib_ordinal_of(x, in->soname)) continue; - if (VEC_GROW(h, x->dylibs, cap_d, x->ndylibs + 1u)) - compiler_panic(x->c, no_loc(), "link_macho: oom on dylibs"); - x->dylibs[x->ndylibs].install = in->soname; - ++x->ndylibs; - } - } - - /* All entries so far are real imports; remember the partition point - * so import/symtab table emit loops can skip the appended internals. */ - x->nimports_real = x->nimports; - - /* Internal GOT pass. clang on Mach-O routes every extern-global - * reference through the GOT (GOT_LOAD_PAGE21 / LD64_GOT_LO12_NC), so - * even a common symbol or weak-undef that ends up resolved within the - * image still needs a __got slot. For each such reloc whose target - * isn't an existing import, materialize a MachImp with internal=1. - * The slot's contents are filled at write time and a chained-fixup - * REBASE entry (or none, for weak undef → NULL) keeps it valid - * post-ASLR. */ - for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) { - LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); - if (!x->arch->is_got_load_reloc || !x->arch->is_got_load_reloc(r->kind)) - continue; - if (r->target == LINK_SYM_NONE || r->target >= x->sym_to_imp_size) continue; - if (x->sym_to_imp[r->target]) continue; - LinkSymbol* t = sym_at(img, r->target); - if (!t) continue; - /* Resolve through canonical so we share a single slot per symbol. */ - LinkSymId canon = r->target; - if (t->name != 0) { - LinkSymId hit = symhash_get(&img->globals, t->name); - if (hit != LINK_SYM_NONE) { - canon = hit; - if (x->sym_to_imp[canon]) { - x->sym_to_imp[r->target] = x->sym_to_imp[canon]; - continue; - } - t = sym_at(img, canon); - if (!t) continue; - } - } - if (VEC_GROW(h, x->imports, cap, x->nimports + 1u)) - compiler_panic(x->c, no_loc(), "link_macho: oom on internal got"); - MachImp* mi = &x->imports[x->nimports++]; - memset(mi, 0, sizeof(*mi)); - mi->sym = canon; - mi->name = t->name; - mi->is_func = (t->kind == SK_FUNC || t->kind == SK_IFUNC) ? 1 : 0; - mi->weak = (t->bind == SB_WEAK) ? 1 : 0; - mi->internal = 1; - /* internal_vaddr is read fresh from the LinkSymbol when the slot - * gets initialized — collect_imports runs before shift_sections - * rebases section vaddrs to Mach-O layout, so capturing here would - * be stale by the time __got bytes are written. */ - mi->internal_vaddr = 0; - x->sym_to_imp[canon] = x->nimports; - if (canon != r->target) x->sym_to_imp[r->target] = x->nimports; - } - - /* Assign stub_idx + got_idx. Internal entries get a slot but no stub: - * the call site (CALL26) on internal funcs goes direct, not via stub. */ - u32 stub_run = 0; - for (u32 i = 0; i < x->nimports; ++i) { - MachImp* mi = &x->imports[i]; - mi->got_idx = i + 1u; - if (mi->is_func && !mi->internal) mi->stub_idx = ++stub_run; - } - x->nimport_funcs = stub_run; -} - -/* ---- pass: collect TLV pointer slots ---- - * - * Mirror of collect_imports' internal-GOT pass, but for TLV descriptors: - * each unique descriptor referenced via ARM64_RELOC_TLVP_LOAD_PAGE21 / - * PAGEOFF12 gets one slot in the synthetic __DATA,__thread_ptrs section. - * The slot's runtime value is the descriptor's address; we patch it at - * apply_relocs time (REBASE for in-image descriptors, BIND for ones in - * a dylib). - * - * Slots are deduplicated by canonical LinkSymId so a single descriptor - * referenced from N call sites shares one __thread_ptrs entry. */ -static void collect_tlv(MCtx* x) { - LinkImage* img = x->img; - Heap* h = x->h; - x->sym_to_tlv_size = LinkSyms_count(&img->syms) + 1u; - x->sym_to_tlv = - (u32*)h->alloc(h, sizeof(u32) * x->sym_to_tlv_size, _Alignof(u32)); - if (!x->sym_to_tlv) - compiler_panic(x->c, no_loc(), "link_macho: oom on sym_to_tlv"); - memset(x->sym_to_tlv, 0, sizeof(u32) * x->sym_to_tlv_size); - - u32 cap = 0; - for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) { - LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); - if (!x->arch->is_tlvp_reloc || !x->arch->is_tlvp_reloc(r->kind)) continue; - if (r->target == LINK_SYM_NONE || r->target >= x->sym_to_tlv_size) continue; - /* Resolve through canonical so multiple per-input duplicate undefs - * collapse onto one __thread_ptrs slot. */ - LinkSymId canon = r->target; - LinkSymbol* t = sym_at(img, r->target); - if (!t) continue; - if (t->name != 0) { - LinkSymId hit = symhash_get(&img->globals, t->name); - if (hit != LINK_SYM_NONE) { - canon = hit; - t = sym_at(img, canon); - if (!t) continue; - } - } - if (x->sym_to_tlv[canon]) { - if (canon != r->target) x->sym_to_tlv[r->target] = x->sym_to_tlv[canon]; - continue; - } - if (VEC_GROW(h, x->tlv_slots, cap, x->ntlv + 1u)) - compiler_panic(x->c, no_loc(), "link_macho: oom on tlv_slots"); - MachTlv* ts = &x->tlv_slots[x->ntlv++]; - memset(ts, 0, sizeof(*ts)); - ts->sym = canon; - ts->tlv_idx = x->ntlv; - ts->imported = t->imported ? 1u : 0u; - /* If the descriptor is imported we route the bind through the - * symbol's MachImp slot — that's where dyld's chained-import index - * comes from. When this loop fires the imp pass has already - * materialized the entry (real imports were processed first); the - * lookup may also have stashed an alias for non-canonical ids. */ - if (ts->imported) { - u32 idx = (canon < x->sym_to_imp_size) ? x->sym_to_imp[canon] : 0u; - if (!idx && t->name != 0) { - LinkSymId hit2 = symhash_get(&img->globals, t->name); - if (hit2 != LINK_SYM_NONE && hit2 < x->sym_to_imp_size) - idx = x->sym_to_imp[hit2]; - } - ts->import_idx = idx; - } - x->sym_to_tlv[canon] = x->ntlv; - if (canon != r->target) x->sym_to_tlv[r->target] = x->ntlv; - } -} - -/* ---- pass: plan Mach-O sections ---- - * - * Walks LinkImage sections. Each non-zero-size LinkSection becomes one - * MSec. Synthetic __stubs and __got are appended at the right segment - * boundaries. Vaddr and file_offset are assigned in a single forward - * pass starting at __TEXT base; __PAGEZERO and __LINKEDIT are special. */ - -static void seg_init(MSeg* s, const char* name, u32 maxp, u32 initp) { - memset(s, 0, sizeof(*s)); - s->name = name; - s->maxprot = maxp; - s->initprot = initp; -} - -static int sec_is_writable(const LinkSection* ls) { - return (ls->flags & SF_WRITE) != 0u; -} -static int sec_is_exec(const LinkSection* ls) { - return (ls->flags & SF_EXEC) != 0u; -} -static int sec_is_zerofill(const LinkSection* ls) { - return ls->sem == SSEM_NOBITS; -} - -static int section_has_abs64_reloc(const LinkImage* img, LinkSectionId id) { - for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) { - const LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); - if (r->link_section_id == id && r->kind == R_ABS64) return 1; - } - return 0; -} - -static int sec_needs_data_const(const LinkImage* img, const LinkSection* ls) { - if (!ls || !ls->size || sec_is_exec(ls) || sec_is_writable(ls) || - sec_is_zerofill(ls)) { - return 0; - } - return section_has_abs64_reloc(img, ls->id); -} - -/* Pick (segname, sectname) for a LinkSection. Comma-form Mach-O names - * round-trip into MSec's inline 16-byte buffers; literal defaults point - * at .rodata strings. Caller passes the MSec for per-section storage — - * a previous version used a shared static buffer which aliased all - * sections to whichever name was set last. */ -static void pick_macho_names(const LinkSection* ls, Compiler* c, MSec* m) { - Slice nm_s = pool_slice(c->global, ls->name); - const char* nm = nm_s.s; - size_t nlen = nm_s.len; - if (nm) { - /* Comma-form: "__SEG,__sect" round-tripped from a Mach-O input. */ - for (size_t i = 0; i < nlen; ++i) { - if (nm[i] == ',') { - u32 seg_n = (u32)(i > 15 ? 15 : i); - memcpy(m->segname_buf, nm, seg_n); - m->segname_buf[seg_n] = 0; - u32 sect_n = (u32)((nlen - i - 1) > 15 ? 15 : (nlen - i - 1)); - memcpy(m->sectname_buf, nm + i + 1, sect_n); - m->sectname_buf[sect_n] = 0; - m->segname = m->segname_buf; - m->sectname = m->sectname_buf; - return; - } - } - } - /* Derive from flags. */ - if (sec_is_exec(ls)) { - m->segname = "__TEXT"; - m->sectname = "__text"; - } else if (sec_is_writable(ls)) { - m->segname = "__DATA"; - m->sectname = sec_is_zerofill(ls) ? "__bss" : "__data"; - } else { - m->segname = "__TEXT"; - m->sectname = "__const"; - } -} - -static void plan_layout(MCtx* x) { - LinkImage* img = x->img; - Heap* h = x->h; - - /* PAGEZERO */ - seg_init(&x->segs[0], "__PAGEZERO", 0, 0); - x->segs[0].vmaddr = 0; - x->segs[0].vmsize = MZ_PAGEZERO; - x->segs[0].fileoff = 0; - x->segs[0].filesize = 0; - x->segs[0].nsects = 0; - x->segs[0].first_sec = 0; - - /* Segments 1..4 */ - seg_init(&x->segs[1], "__TEXT", VM_PROT_READ | VM_PROT_EXECUTE, - VM_PROT_READ | VM_PROT_EXECUTE); - seg_init(&x->segs[2], "__DATA_CONST", VM_PROT_READ | VM_PROT_WRITE, - VM_PROT_READ | VM_PROT_WRITE); - seg_init(&x->segs[3], "__DATA", VM_PROT_READ | VM_PROT_WRITE, - VM_PROT_READ | VM_PROT_WRITE); - seg_init(&x->segs[4], "__LINKEDIT", VM_PROT_READ, VM_PROT_READ); - x->nsegs = 5; - - /* Pre-allocate MSec capacity: every LinkSection + 2 synth (__stubs, - * __got). (LinkSections from the dynamic-link layer — .dynsym / .plt - * etc. — were synthesized by layout_dyn for ELF; we won't have them - * since pie wasn't set on this Linker. Still, oversize by a few.) */ - u32 cap = LinkRelocs_count(&img->relocs) + img->nsections + 4u; - x->secs = (MSec*)h->alloc(h, sizeof(MSec) * cap, _Alignof(MSec)); - if (!x->secs) compiler_panic(x->c, no_loc(), "link_macho: oom on MSec"); - memset(x->secs, 0, sizeof(MSec) * cap); - x->nsecs = 0; - - /* Pass 1: __TEXT segment. Header + loadcmds reserve front. */ - /* We need the exact header_size to set first sec's file_offset. We'll - * compute it later, but reserve a placeholder; for now use 0 and patch - * in pass 4 (offsets get bumped). */ - - u64 text_vaddr = MZ_PAGEZERO; - /* We'll compute headers_size after plan; stash starting vaddr only. */ - x->segs[1].vmaddr = text_vaddr; - x->segs[1].fileoff = 0; - x->text_vaddr = text_vaddr; - - /* Collect: (a) exec sections, (b) read-only allocatable sections. */ - /* (cursor advances per-segment in pass 2; nothing to track here) */ - - /* We don't know the header size yet; walk sections first to enumerate - * MSec entries, then back-fill file_offset/vaddr after we know the - * load-command count. */ - - u32 first_text_sec = x->nsecs; - - for (u32 i = 0; i < img->nsections; ++i) { - LinkSection* ls = &img->sections[i]; - if (!ls->size) continue; - if (sec_is_writable(ls)) continue; - if (sec_is_zerofill(ls)) continue; /* placed in __DATA */ - if (sec_needs_data_const(img, ls)) continue; - MSec* m = &x->secs[x->nsecs++]; - memset(m, 0, sizeof(*m)); - m->link_sec_id = ls->id; - pick_macho_names(ls, x->c, m); - /* Force into __TEXT. */ - if (!slice_eq_cstr(slice_from_cstr(m->segname), "__TEXT")) - m->segname = "__TEXT"; - m->align = ls->align ? ls->align : 1u; - m->size = ls->size; - m->segidx = 1; - m->flags = sec_is_exec(ls) ? (0x80000000u /*S_ATTR_PURE_INSTRUCTIONS*/ | - 0x00000400u /*S_ATTR_SOME_INSTRUCTIONS*/) - : 0u; - } - - /* __stubs synthetic */ - if (x->nimport_funcs) { - x->stubs_size = x->nimport_funcs * x->arch->macho_stub_size; - x->stubs_bytes = (u8*)h->alloc(h, x->stubs_size, 4); - if (!x->stubs_bytes) - compiler_panic(x->c, no_loc(), "link_macho: oom on stubs"); - memset(x->stubs_bytes, 0, x->stubs_size); - MSec* m = &x->secs[x->nsecs++]; - memset(m, 0, sizeof(*m)); - m->synth_data = x->stubs_bytes; - m->synth_size = x->stubs_size; - m->segname = "__TEXT"; - m->sectname = "__stubs"; - m->align = 4u; - m->size = x->stubs_size; - m->segidx = 1; - m->flags = 0x80000000u | 0x00000400u | 0x00000008u /*S_SYMBOL_STUBS*/; - m->reserved1 = 0; /* fill in later: indirect-symtab base */ - m->reserved2 = x->arch->macho_stub_size; - } - x->segs[1].nsects = x->nsecs - first_text_sec; - x->segs[1].first_sec = first_text_sec; - - /* __DATA_CONST: __got synth */ - u32 first_dc = x->nsecs; - if (x->nimports) { - x->got_size = x->nimports * MZ_GOT_SIZE; - x->got_bytes = (u8*)h->alloc(h, x->got_size, 8); - if (!x->got_bytes) compiler_panic(x->c, no_loc(), "link_macho: oom on got"); - memset(x->got_bytes, 0, x->got_size); - MSec* m = &x->secs[x->nsecs++]; - memset(m, 0, sizeof(*m)); - m->synth_data = x->got_bytes; - m->synth_size = x->got_size; - m->segname = "__DATA_CONST"; - m->sectname = "__got"; - m->align = 8u; - m->size = x->got_size; - m->segidx = 2; - m->flags = 0x00000006u /*S_NON_LAZY_SYMBOL_POINTERS*/; - m->reserved1 = 0; /* indirect-symtab base */ - } - for (u32 i = 0; i < img->nsections; ++i) { - LinkSection* ls = &img->sections[i]; - if (!sec_needs_data_const(img, ls)) continue; - MSec* m = &x->secs[x->nsecs++]; - memset(m, 0, sizeof(*m)); - m->link_sec_id = ls->id; - pick_macho_names(ls, x->c, m); - m->segname = "__DATA_CONST"; - m->align = ls->align ? ls->align : 1u; - m->size = ls->size; - m->segidx = 2; - m->flags = 0; - } - x->segs[2].nsects = x->nsecs - first_dc; - x->segs[2].first_sec = first_dc; - - /* __DATA segment: writable sections + zerofill. */ - u32 first_d = x->nsecs; - for (u32 i = 0; i < img->nsections; ++i) { - LinkSection* ls = &img->sections[i]; - if (!ls->size && !sec_is_zerofill(ls)) continue; - if (!sec_is_writable(ls)) continue; - MSec* m = &x->secs[x->nsecs++]; - memset(m, 0, sizeof(*m)); - m->link_sec_id = ls->id; - pick_macho_names(ls, x->c, m); - if (!slice_eq_cstr(slice_from_cstr(m->segname), "__DATA")) - m->segname = "__DATA"; - m->align = ls->align ? ls->align : 1u; - m->size = ls->size; - m->segidx = 3; - m->is_zerofill = sec_is_zerofill(ls) ? 1 : 0; - m->flags = m->is_zerofill ? 0x00000001u /*S_ZEROFILL*/ : 0; - /* dyld dispatches on the section type byte (low 8 bits of flags). - * __mod_init_func / __mod_term_func sections must carry the - * S_MOD_INIT_FUNC_POINTERS / S_MOD_TERM_FUNC_POINTERS type or dyld - * skips them entirely — leaving constructors unrun at startup. */ - if (slice_eq_cstr(slice_from_cstr(m->sectname), "__mod_init_func")) - m->flags = 0x00000009u /*S_MOD_INIT_FUNC_POINTERS*/; - else if (slice_eq_cstr(slice_from_cstr(m->sectname), "__mod_term_func")) - m->flags = 0x0000000au /*S_MOD_TERM_FUNC_POINTERS*/; - else if (ls->flags & SF_TLS) { - /* TLV sections: dyld dispatches by section type, not name. Map - * __thread_vars → S_THREAD_LOCAL_VARIABLES (descriptor records), - * __thread_data → S_THREAD_LOCAL_REGULAR (initial data), - * __thread_bss → S_THREAD_LOCAL_ZEROFILL (zero-init data). Done - * by sectname so per-TU inputs without a Mach-O ext_type still - * get the right section type. */ - if (slice_eq_cstr(slice_from_cstr(m->sectname), "__thread_vars")) { - m->flags = S_THREAD_LOCAL_VARIABLES; - /* Each descriptor is three pointers (24B) whose first word is - * dyld's _tlv_bootstrap thunk pointer. Clang/llvm emit - * __thread_vars with on-disk alignment 1 (relying on layout to - * land it on 8); force 8-alignment here so the descriptor - * pointers fall on 8-byte boundaries — dyld's chained-fixup - * processing assumes that. */ - if (m->align < 8u) m->align = 8u; - } else if (m->is_zerofill) - m->flags = S_THREAD_LOCAL_ZEROFILL; - else - m->flags = S_THREAD_LOCAL_REGULAR; - } - } - /* __thread_ptrs synthetic (TLV pointer slots). Emitted into __DATA - * after the user's TLV input sections so descriptors and their - * pointers share the same segment. Each slot's runtime initial - * value (= TLV descriptor address) is patched during apply_relocs. */ - if (x->ntlv) { - x->tlv_ptrs_size = x->ntlv * MZ_TLVP_SIZE; - x->tlv_ptrs_bytes = (u8*)h->alloc(h, x->tlv_ptrs_size, 8); - if (!x->tlv_ptrs_bytes) - compiler_panic(x->c, no_loc(), "link_macho: oom on tlv_ptrs"); - memset(x->tlv_ptrs_bytes, 0, x->tlv_ptrs_size); - MSec* m = &x->secs[x->nsecs++]; - memset(m, 0, sizeof(*m)); - m->synth_data = x->tlv_ptrs_bytes; - m->synth_size = x->tlv_ptrs_size; - m->segname = "__DATA"; - m->sectname = "__thread_ptrs"; - m->align = 8u; - m->size = x->tlv_ptrs_size; - m->segidx = 3; - m->flags = S_THREAD_LOCAL_VARIABLE_POINTERS; - } - x->segs[3].nsects = x->nsecs - first_d; - x->segs[3].first_sec = first_d; - - /* Group MSecs by (segname, sectname) within each segment so vaddr - * placement keeps same-named runs contiguous. Otherwise Phase B's - * adjacency-based coalescing splits a single Mach-O section into - * multiple OutSecs (e.g. `.text` from an in-memory ObjBuilder and - * `__TEXT,__text` from a Mach-O .o input both map to `__TEXT,__text` - * but arrive in separate link_layout groups, interleaved with other - * sections from each input). Stable insertion sort preserves input - * order within a name, which matters for synth __stubs/__thread_ptrs - * order relative to peers. */ - for (u32 i = 0; i < x->nsegs; ++i) { - MSeg* sg = &x->segs[i]; - if (sg->nsects < 2) continue; - u32 base = sg->first_sec; - u32 n = sg->nsects; - for (u32 a = 1; a < n; ++a) { - MSec key = x->secs[base + a]; - msec_repair_name_ptrs(&key); - u32 j = a; - while (j > 0) { - MSec* prev = &x->secs[base + j - 1]; - /* Ordering compare for stable sort: slices don't order, keep strcmp. */ - int cmp = strcmp(prev->segname, key.segname); /* ordering */ - if (cmp == 0) - cmp = strcmp(prev->sectname, key.sectname); /* ordering */ - if (cmp <= 0) break; - x->secs[base + j] = x->secs[base + j - 1]; - msec_repair_name_ptrs(&x->secs[base + j]); - --j; - } - x->secs[base + j] = key; - msec_repair_name_ptrs(&x->secs[base + j]); - } - } - - /* Phase A: count OutSecs per segment (distinct sectnames) so we can - * size the load commands before placing vaddrs. Phase B builds the - * actual OutSec[] after placement, when vaddrs are final. */ - for (u32 i = 0; i < x->nsegs; ++i) { - MSeg* sg = &x->segs[i]; - u32 cnt = 0; - for (u32 a = sg->first_sec; a < sg->first_sec + sg->nsects; ++a) { - int seen = 0; - for (u32 b = sg->first_sec; b < a; ++b) { - if (slice_eq_cstr(slice_from_cstr(x->secs[a].sectname), - x->secs[b].sectname) && - slice_eq_cstr(slice_from_cstr(x->secs[a].segname), - x->secs[b].segname)) { - seen = 1; - break; - } - } - if (!seen) ++cnt; - } - sg->nouts = cnt; - sg->first_out = 0; /* assigned in Phase B */ - } - - /* Compute load-command count + sizeofcmds, then back-fill section - * offsets. Layout pass 2. */ - u32 nseg_real = 0; - for (u32 i = 0; i < x->nsegs; ++i) { - /* Skip __DATA_CONST or __DATA if no sections (edge case). */ - if (i == 0) { - ++nseg_real; - continue; - } /* PAGEZERO */ - if (i == 4) { - ++nseg_real; - continue; - } /* LINKEDIT always */ - if (x->segs[i].nsects > 0) ++nseg_real; - } - /* Each LC_SEGMENT_64 carries 72 + 80*nouts bytes (one section_64 - * record per coalesced (segname,sectname), not per MSec). */ - u32 sizeofcmds = 0; - for (u32 i = 0; i < x->nsegs; ++i) { - if (i == 0 || i == 4) { - sizeofcmds += MACHO_SEGCMD64_SIZE; /* no sections */ - continue; - } - if (x->segs[i].nsects == 0) continue; - sizeofcmds += MACHO_SEGCMD64_SIZE + x->segs[i].nouts * MACHO_SECT64_SIZE; - } - (void)nseg_real; - /* LC_DYLD_CHAINED_FIXUPS / LC_DYLD_EXPORTS_TRIE */ - sizeofcmds += 16u + 16u; - /* LC_SYMTAB / LC_DYSYMTAB */ - sizeofcmds += MACHO_SYMTAB_CMD_SIZE + MACHO_DYSYMTAB_CMD_SIZE; - /* LC_LOAD_DYLINKER */ - { - u32 ld_size = 12u + (u32)(sizeof("/usr/lib/dyld") - 1u) + 1u; - sizeofcmds += (u32)ALIGN_UP((u64)ld_size, 8u); - } - /* LC_UUID + LC_BUILD_VERSION + LC_MAIN */ - sizeofcmds += 24u + 24u + 24u; - /* LC_LOAD_DYLIB per dylib */ - for (u32 i = 0; i < x->ndylibs; ++i) { - size_t nl = pool_slice(x->c->global, x->dylibs[i].install).len; - u32 sz = 24u + (u32)nl + 1u; - sizeofcmds += (u32)ALIGN_UP((u64)sz, 8u); - } - /* LC_FUNCTION_STARTS / LC_DATA_IN_CODE / LC_CODE_SIGNATURE */ - sizeofcmds += 16u + 16u + 16u; - - x->headers_size = MACHO_HDR64_SIZE + sizeofcmds; - - /* Now place sections in __TEXT, __DATA_CONST, __DATA. */ - u64 vaddr = MZ_PAGEZERO + x->headers_size; - u64 fileoff = x->headers_size; - /* Pad __TEXT sections to natural alignment. */ - for (u32 i = 0; i < x->nsegs; ++i) { - if (i == 0 || i == 4) continue; - MSeg* sg = &x->segs[i]; - if (i > 1) { - /* page-align the start of __DATA_CONST and __DATA */ - vaddr = ALIGN_UP(vaddr, MZ_PAGE); - fileoff = ALIGN_UP(fileoff, MZ_PAGE); - } - sg->vmaddr = (i == 1) ? MZ_PAGEZERO : vaddr; - sg->fileoff = (i == 1) ? 0 : fileoff; - /* __TEXT carries the headers_size + sections. */ - u64 seg_start_v = sg->vmaddr; - u64 seg_start_f = sg->fileoff; - /* For __TEXT, sections begin after the header area. */ - u64 cur_v = (i == 1) ? (seg_start_v + x->headers_size) : seg_start_v; - u64 cur_f = (i == 1) ? (seg_start_f + x->headers_size) : seg_start_f; - u64 first_zerofill_v = 0; - int seen_zerofill = 0; - /* Non-zerofill first */ - for (u32 j = 0; j < sg->nsects; ++j) { - MSec* m = &x->secs[sg->first_sec + j]; - if (m->is_zerofill) continue; - cur_v = ALIGN_UP(cur_v, (u64)m->align); - cur_f = ALIGN_UP(cur_f, (u64)m->align); - m->vaddr = cur_v; - m->file_offset = cur_f; - cur_v += m->size; - cur_f += m->size; - } - first_zerofill_v = cur_v; - /* zerofill last (no file bytes) */ - for (u32 j = 0; j < sg->nsects; ++j) { - MSec* m = &x->secs[sg->first_sec + j]; - if (!m->is_zerofill) continue; - cur_v = ALIGN_UP(cur_v, (u64)m->align); - m->vaddr = cur_v; - m->file_offset = 0; - cur_v += m->size; - seen_zerofill = 1; - } - sg->filesize = (i == 1) - ? (cur_f - seg_start_f) - : (first_zerofill_v ? (first_zerofill_v - seg_start_v) - : (cur_v - seg_start_v)); - sg->vmsize = ALIGN_UP(cur_v - seg_start_v, MZ_PAGE); - if (sg->vmsize == 0 && sg->nsects > 0) sg->vmsize = MZ_PAGE; - if (i == 1) { - x->stubs_vaddr = 0; - for (u32 j = 0; j < sg->nsects; ++j) { - MSec* m = &x->secs[sg->first_sec + j]; - if (slice_eq_cstr(slice_from_cstr(m->sectname), "__stubs")) - x->stubs_vaddr = m->vaddr; - } - x->text_filesz = sg->filesize; - } - if (i == 2) { - for (u32 j = 0; j < sg->nsects; ++j) { - MSec* m = &x->secs[sg->first_sec + j]; - if (slice_eq_cstr(slice_from_cstr(m->sectname), "__got")) - x->got_vaddr = m->vaddr; - } - x->data_const_vaddr = sg->vmaddr; - x->data_const_filesz = sg->filesize; - } - if (i == 3) { - for (u32 j = 0; j < sg->nsects; ++j) { - MSec* m = &x->secs[sg->first_sec + j]; - if (slice_eq_cstr(slice_from_cstr(m->sectname), "__thread_ptrs")) - x->tlv_ptrs_vaddr = m->vaddr; - /* TLS storage image base: min vaddr across __thread_data and - * __thread_bss sections. __thread_vars is excluded — it holds - * the descriptors, not the data that maps into the per-thread - * block. */ - if ((slice_eq_cstr(slice_from_cstr(m->sectname), "__thread_data") || - slice_eq_cstr(slice_from_cstr(m->sectname), "__thread_bss")) && - (!x->has_tls_image || m->vaddr < x->tls_image_vaddr)) { - x->tls_image_vaddr = m->vaddr; - x->has_tls_image = 1; - } - } - x->data_vaddr = sg->vmaddr; - x->data_filesz = sg->filesize; - x->data_memsz = sg->vmsize; - } - vaddr = sg->vmaddr + sg->vmsize; - /* Mach-O segments are mapped in page units. If a segment's memory - * image extends past its initialized file bytes (for example - * __DATA,__bss), the following segment's fileoff must not reuse those - * pages or the kernel can map later file contents into the zero-fill - * tail. */ - fileoff = sg->fileoff + ((sg->vmsize > ALIGN_UP(sg->filesize, MZ_PAGE)) - ? sg->vmsize - : sg->filesize); - (void)seen_zerofill; - } - /* LINKEDIT placeholder; size is filled after blob assembly. */ - vaddr = ALIGN_UP(vaddr, MZ_PAGE); - fileoff = ALIGN_UP(fileoff, MZ_PAGE); - x->segs[4].vmaddr = vaddr; - x->segs[4].fileoff = fileoff; - x->linkedit_vaddr = vaddr; - x->linkedit_fileoff = fileoff; - - /* Encode __stubs bytes now that vaddrs are settled. Internal-GOT - * entries have stub_idx=0 (direct CALL26, no stub) and must be - * skipped so the (stub_idx - 1u) arithmetic doesn't wrap. */ - for (u32 i = 0; i < x->nimports; ++i) { - MachImp* mi = &x->imports[i]; - if (!mi->is_func || !mi->stub_idx) continue; - u64 stub_v = - x->stubs_vaddr + (mi->stub_idx - 1u) * x->arch->macho_stub_size; - u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE; - x->arch->emit_macho_stub( - x->stubs_bytes + (mi->stub_idx - 1u) * x->arch->macho_stub_size, stub_v, - got_v); - } - - /* Phase B: build OutSec[] now that all MSec vaddrs are final. Walk - * MSecs sorted by (segidx, vaddr) and coalesce adjacent same-name - * runs. Mirrors link_elf.c's OutShdr build at link_elf.c:879. */ - { - u32* order = - (u32*)h->alloc(h, sizeof(u32) * (x->nsecs + 1u), _Alignof(u32)); - if (!order && x->nsecs) - compiler_panic(x->c, no_loc(), "link_macho: oom on outsec sort"); - for (u32 i = 0; i < x->nsecs; ++i) order[i] = i; - /* Insertion sort — section count is small. */ - for (u32 i = 1; i < x->nsecs; ++i) { - u32 cur = order[i]; - MSec* a = &x->secs[cur]; - u32 j = i; - while (j > 0) { - MSec* b = &x->secs[order[j - 1]]; - if ((b->segidx < a->segidx) || - (b->segidx == a->segidx && b->vaddr <= a->vaddr)) - break; - order[j] = order[j - 1]; - --j; - } - order[j] = cur; - } - u32 cap = x->nsecs + 1u; - x->outs = (OutSec*)h->alloc(h, sizeof(OutSec) * cap, _Alignof(OutSec)); - if (!x->outs) compiler_panic(x->c, no_loc(), "link_macho: oom on OutSec"); - memset(x->outs, 0, sizeof(OutSec) * cap); - x->nouts = 0; - for (u32 i = 0; i < x->nsecs; ++i) { - MSec* m = &x->secs[order[i]]; - OutSec* tail = x->nouts ? &x->outs[x->nouts - 1] : NULL; - int merge = tail && tail->segidx == m->segidx && - slice_eq_cstr(slice_from_cstr(tail->sectname), - m->sectname) && - slice_eq_cstr(slice_from_cstr(tail->segname), m->segname); - if (merge) { - if (tail->flags != m->flags || tail->is_zerofill != m->is_zerofill) - compiler_panic( - x->c, no_loc(), - "link_macho: coalesce mismatch on %.*s,%.*s (flags/zerofill)", - SLICE_ARG(slice_from_cstr(m->segname)), - SLICE_ARG(slice_from_cstr(m->sectname))); - u64 end = m->vaddr + m->size; - u64 prev_end = tail->vaddr + tail->size; - if (end > prev_end) tail->size = end - tail->vaddr; - if (m->align > tail->align) tail->align = m->align; - } else { - OutSec* o = &x->outs[x->nouts++]; - o->segname = m->segname; - o->sectname = m->sectname; - o->vaddr = m->vaddr; - o->file_offset = m->file_offset; - o->size = m->size; - o->align = m->align; - o->flags = m->flags; - o->reserved1 = m->reserved1; - o->reserved2 = m->reserved2; - o->segidx = m->segidx; - o->is_zerofill = m->is_zerofill; - } - } - h->free(h, order, sizeof(u32) * (x->nsecs + 1u)); - /* Recompute per-segment OutSec span; Phase A's count was for - * sizeofcmds sizing — recompute it here as the source of truth and - * assert agreement. */ - for (u32 i = 0; i < x->nsegs; ++i) { - x->segs[i].first_out = 0; - } - u32 prev_nouts[5]; - for (u32 i = 0; i < x->nsegs; ++i) prev_nouts[i] = x->segs[i].nouts; - for (u32 i = 0; i < x->nsegs; ++i) x->segs[i].nouts = 0; - for (u32 i = 0; i < x->nouts; ++i) { - u8 sx = x->outs[i].segidx; - if (x->segs[sx].nouts == 0) x->segs[sx].first_out = i; - ++x->segs[sx].nouts; - } - for (u32 i = 0; i < x->nsegs; ++i) { - if (prev_nouts[i] != x->segs[i].nouts) - compiler_panic(x->c, no_loc(), - "link_macho: OutSec count drift seg %u (%u vs %u)", - (u32)i, prev_nouts[i], x->segs[i].nouts); - } - } -} - -/* ---- pass: shift LinkImage into final vaddrs/file_offsets ---- - * - * The sections in img->sections are still in their original - * link_layout coordinates. Map each LinkSection -> its MSec and copy - * the final vaddr/file_offset so reloc-apply walks correctly. */ - -static void shift_sections(MCtx* x) { - LinkImage* img = x->img; - /* Build a quick lookup: link_sec_id -> MSec*. */ - for (u32 i = 0; i < x->nsecs; ++i) { - MSec* m = &x->secs[i]; - if (!m->link_sec_id) continue; - /* Walk link_section_id slot. */ - LinkSection* ls = &img->sections[m->link_sec_id - 1u]; - /* shift relocs whose write_vaddr/file_offset live within this - * section's original [old_vaddr, old_vaddr+size). */ - u64 old_v = ls->vaddr; - u64 old_f = ls->file_offset; - u64 new_v = m->vaddr; - u64 new_f = m->file_offset; - if (old_v == new_v && old_f == new_f) continue; - /* Update the LinkSection itself. */ - ls->vaddr = new_v; - ls->file_offset = new_f; - /* Update relocs that target this section. */ - for (u32 ri = 0; ri < LinkRelocs_count(&img->relocs); ++ri) { - LinkRelocApply* r = LinkRelocs_at(&img->relocs, ri); - if (r->link_section_id != ls->id) continue; - r->write_vaddr = new_v + (r->write_vaddr - old_v); - r->write_file_offset = new_f + (r->write_file_offset - old_f); - } - /* Update LinkSyms that belong to this LinkSection. Match by - * section_id rather than vaddr range — multiple input sections - * may share the same pre-shift vaddr (each bucket in - * link_layout starts at offset 0). */ - for (u32 si = 0; si < LinkSyms_count(&img->syms); ++si) { - LinkSymbol* s = LinkSyms_at(&img->syms, si); - if (!s->defined) continue; - if (s->kind == SK_ABS) continue; - if (s->section_id != ls->id) continue; - s->vaddr = new_v + (s->vaddr - old_v); - } - } -} - -/* ---- pass: apply relocations + collect chained-fixup sites ---- - * - * Reloc dispatch: - * target=imported func + CALL26/JUMP26 -> S = stub vaddr - * target=import + GOT_LOAD_PAGE21/PAGEOFF12 -> S = got slot vaddr - * target=import + ABS64 -> write 0; collect bind site - * target=internal + ABS64 -> write target VA; collect rebase site - * everything else -> standard apply - * - * Patch sites for chained fixups are 8-byte slots; for ABS32 we do not - * support fixups (no chained-fixup format for 32-bit pointers in - * standard arm64 — would need DYLD_CHAINED_PTR_32). Internal R_ABS32 - * still works (no slide adjustment is wrong technically, but for - * compile-time-known offsets it suffices). - */ - -typedef struct FixSite { - u8 segidx; /* 2 = __DATA_CONST, 3 = __DATA */ - u8 is_bind; /* 0 = rebase, 1 = bind */ - u8 pad[2]; - u32 import_idx; /* 1-based import index for binds, 0 for rebases */ - u64 vaddr; /* absolute VA of the slot */ - u64 rebase_target; /* unslid target VA; only used for rebases */ -} FixSite; - -typedef struct FixList { - Heap* heap; - FixSite* a; - u32 n; - u32 cap; -} FixList; - -static void fix_init(FixList* fl, Heap* h) { - fl->heap = h; - fl->a = NULL; - fl->n = 0; - fl->cap = 0; -} -static void fix_fini(FixList* fl) { - if (fl->a) fl->heap->free(fl->heap, fl->a, sizeof(*fl->a) * fl->cap); - fl->a = NULL; - fl->n = fl->cap = 0; -} -static void fix_push(FixList* fl, const FixSite* s) { - if (VEC_GROW(fl->heap, fl->a, fl->cap, fl->n + 1u)) return; - fl->a[fl->n++] = *s; -} - -/* find MSec covering an absolute vaddr */ -static MSec* msec_for_vaddr(MCtx* x, u64 v) { - for (u32 i = 0; i < x->nsecs; ++i) { - MSec* m = &x->secs[i]; - if (v >= m->vaddr && v < m->vaddr + m->size) return m; - } - return NULL; -} - -static u8* bytes_for_section(MCtx* x, MSec* m, LinkImage* img) { - if (m->synth_data) { - /* Synthetic — caller reads/writes via x->stubs_bytes / x->got_bytes. */ - if (m->synth_data == x->stubs_bytes) return x->stubs_bytes; - if (m->synth_data == x->got_bytes) return x->got_bytes; - return NULL; - } - /* Backed by a LinkSection: find the LinkSegment buffer that section - * sits in (link_layout.c stored input section bytes there). */ - LinkSection* ls = &img->sections[m->link_sec_id - 1u]; - u32 segid = ls->segment_id; - if (segid == LINK_SEG_NONE) return NULL; - return img->segment_bytes[segid - 1u]; -} - -/* Map the LinkSection that backs a write_vaddr to an MSec, then to the - * underlying byte buffer. */ -static u8* patch_ptr(MCtx* x, LinkImage* img, const LinkRelocApply* r, - MSec** out_msec) { - /* Look up via the LinkSection. After shift_sections the section - * vaddr is the Mach-O vaddr; the corresponding MSec backs it. */ - if (r->link_section_id == LINK_SEC_NONE) return NULL; - LinkSection* ls = &img->sections[r->link_section_id - 1u]; - /* Find the MSec by link_sec_id. */ - MSec* m = NULL; - for (u32 i = 0; i < x->nsecs; ++i) { - if (x->secs[i].link_sec_id == ls->id) { - m = &x->secs[i]; - break; - } - } - if (!m) return NULL; - /* The LinkSegment's bytes are valid (not shifted), but the offset - * within them is the original input_offset. Use input_offset for - * the byte offset, since the LinkSegment buffer wasn't reshuffled. */ - /* link_layout.c set ls->file_offset = seg.file_offset + input_offset - * originally. ls->vaddr similarly. After our shift, they're new. - * The byte offset within the segment buffer is still input_offset. */ - u8* base = bytes_for_section(x, m, img); - if (!base) return NULL; - u32 within_section = (u32)(r->write_vaddr - m->vaddr); - /* The segment buffer's first byte corresponds to ls->input_offset==0 - * for the FIRST section in the segment. But that's a complication. - * For simplicity we recompute the segment-relative byte offset by - * (file_offset - segment.file_offset) where segment.file_offset is - * unchanged. Wait: the original layout produced `ls->file_offset = - * seg.file_offset + input_offset`, and we may have changed - * ls->file_offset. Let's just use input_offset stored on the - * LinkSection. */ - u32 in_off = (u32)(ls->input_offset + within_section); - if (out_msec) *out_msec = m; - return base + in_off; -} - -/* Symbol-relative resolved-address S, accounting for imports. */ -static int sym_S(MCtx* x, LinkImage* img, LinkSymId id, u64* out_S, - int* out_imp_idx) { - *out_S = 0; - *out_imp_idx = 0; - if (id == LINK_SYM_NONE) return 0; - LinkSymbol* s = sym_at(img, id); - if (!s) return 0; - /* Look up the import index — real imports plus internal-GOT entries - * the collect_imports pass materialized for GOT-routed internal refs. */ - u32 idx = 0; - if (id < x->sym_to_imp_size) idx = x->sym_to_imp[id]; - if (!idx && s->name != 0) { - LinkSymId canon = symhash_get(&img->globals, s->name); - if (canon != LINK_SYM_NONE && canon < x->sym_to_imp_size) - idx = x->sym_to_imp[canon]; - } - if (s->imported) { - *out_imp_idx = (int)idx; - return 1; - } - /* Internal symbol that has a GOT slot — surface the import index so - * the GOT_LOAD reloc paths in apply_relocs find it, but also expose - * S=vaddr so non-GOT relocs (CALL26 etc.) still apply directly. */ - *out_imp_idx = (int)idx; - *out_S = s->vaddr; - return 0; -} - -static void apply_relocs(MCtx* x, FixList* fl) { - LinkImage* img = x->img; - for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) { - LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); - if (r->target == LINK_SYM_NONE) continue; - MSec* msec = NULL; - u8* P_bytes = patch_ptr(x, img, r, &msec); - if (!P_bytes) continue; - u64 P = r->write_vaddr; - - u64 S; - int imp_idx; - int is_imp = sym_S(x, img, r->target, &S, &imp_idx); - - /* TLVP relocs route through a __thread_ptrs slot regardless of - * whether the descriptor target is in-image or imported. Resolved - * before the import / internal split because an imported TLV - * descriptor doesn't use the __got slot (its address lives in - * __thread_ptrs with its own chained bind). */ - if (x->arch->is_tlvp_reloc && x->arch->is_tlvp_reloc(r->kind)) { - u32 tlv_idx = - (r->target < x->sym_to_tlv_size) ? x->sym_to_tlv[r->target] : 0u; - if (!tlv_idx) - compiler_panic(x->c, no_loc(), - "link_macho: TLVP reloc has no __thread_ptrs slot"); - u64 slot_v = x->tlv_ptrs_vaddr + (tlv_idx - 1u) * MZ_TLVP_SIZE; - link_reloc_apply(x->c, r->kind, P_bytes, slot_v, r->addend, P); - continue; - } - - if (is_imp) { - MachImp* mi = (imp_idx > 0) ? &x->imports[imp_idx - 1] : NULL; - if (x->arch->is_branch_reloc && x->arch->is_branch_reloc(r->kind)) { - if (!mi || !mi->stub_idx) - compiler_panic(x->c, no_loc(), - "link_macho: import has no stub for branch"); - u64 stub_v = - x->stubs_vaddr + (mi->stub_idx - 1u) * x->arch->macho_stub_size; - link_reloc_apply(x->c, r->kind, P_bytes, stub_v, r->addend, P); - continue; - } - if (x->arch->is_got_load_reloc && x->arch->is_got_load_reloc(r->kind)) { - if (!mi) - compiler_panic(x->c, no_loc(), - "link_macho: GOT reloc for unknown import"); - u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE; - link_reloc_apply(x->c, r->kind, P_bytes, got_v, r->addend, P); - continue; - } - if (x->arch->is_direct_page_reloc && - x->arch->is_direct_page_reloc(r->kind)) { - /* Direct page/lo12 against an import: route through __got. */ - if (!mi) - compiler_panic(x->c, no_loc(), - "link_macho: PAGE/LO12 against unknown import"); - u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE; - link_reloc_apply(x->c, r->kind, P_bytes, got_v, r->addend, P); - continue; - } - if (r->kind == R_ABS64) { - /* Direct 8-byte absolute against an import: bind the slot. */ - wr_u64_le(P_bytes, 0); - FixSite fs = {(u8)msec->segidx, 1, {0}, (u32)imp_idx, P, 0}; - fix_push(fl, &fs); - continue; - } - compiler_panic(x->c, no_loc(), - "link_macho: unhandled reloc kind %u against imported " - "symbol", - (u32)r->kind); - } - - /* Internal relocs. */ - if (r->kind == R_ABS64) { - /* Special case: ABS64 reloc inside a TLV descriptor record - * (__thread_vars section) targeting in-image TLS storage. This - * is the descriptor's word-2 "offset" field — dyld interprets it - * as the per-thread offset of the storage within the TLS image, - * NOT as an absolute address. Apple's ld writes the literal - * offset and emits no chained-fixup entry; replicate that so the - * chain skips over this slot (chained_fixups already does the - * right thing: no fixsite -> no chain link). */ - if (msec && (msec->flags & SECTION_TYPE) == S_THREAD_LOCAL_VARIABLES && - x->has_tls_image) { - u64 offset = (S + (u64)r->addend) - x->tls_image_vaddr; - wr_u64_le(P_bytes, offset); - continue; - } - /* Rebase site. */ - wr_u64_le(P_bytes, S + (u64)r->addend); - FixSite fs = {(u8)msec->segidx, 0, {0}, 0, P, S + (u64)r->addend}; - fix_push(fl, &fs); - continue; - } - /* Internal symbol routed through __got (clang emits GOT_LOAD_PAGE21 - * for any extern global, even if the def is in-image). imp_idx - * was populated by collect_imports' internal-GOT pass; redirect - * the page/lo12 reloc to the GOT slot's vaddr. */ - if (imp_idx > 0 && x->arch->is_got_load_reloc && - x->arch->is_got_load_reloc(r->kind)) { - MachImp* mi = &x->imports[imp_idx - 1]; - u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE; - link_reloc_apply(x->c, r->kind, P_bytes, got_v, r->addend, P); - continue; - } - /* Generic apply. */ - link_reloc_apply(x->c, r->kind, P_bytes, S, r->addend, P); - } - - /* Per-slot chained fixup. Real imports → bind (dyld resolves at - * load). Internal GOT entries → rebase pointing at the symbol's - * image-relative vaddr; a target vaddr of 0 (weak undef → NULL) gets - * no fixup, just a literal zero slot — chained fixups treat 0 as a - * gap and won't disturb it. */ - for (u32 i = 0; i < x->nimports; ++i) { - MachImp* mi = &x->imports[i]; - u64 slot_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE; - if (mi->internal) { - /* Re-read the symbol's final vaddr now that shift_sections has - * rebased every defined symbol into the Mach-O image layout - * (collect_imports snapshotted too early). */ - LinkSymbol* s = sym_at(img, mi->sym); - u64 tgt_v = s ? s->vaddr : 0; - u8* slot = x->got_bytes + (mi->got_idx - 1u) * MZ_GOT_SIZE; - wr_u64_le(slot, tgt_v); - if (tgt_v == 0) continue; /* weak-undef → NULL */ - FixSite fs = {2u, 0, {0}, 0, slot_v, tgt_v}; - fix_push(fl, &fs); - } else { - /* clear slot bytes (already zero) — dyld writes via chain */ - FixSite fs = {2u, 1, {0}, i + 1u, slot_v, 0}; - fix_push(fl, &fs); - } - } - - /* Per-slot TLV pointer fixups. Mirror of the __got loop above: each - * __thread_ptrs slot points at the descriptor record. When the - * descriptor is in-image (internal) we REBASE to its final vaddr; when - * it lives in a dylib we BIND through the descriptor's MachImp. The - * slot itself lives in __DATA (segidx=3), distinct from __got's - * __DATA_CONST (segidx=2). */ - for (u32 i = 0; i < x->ntlv; ++i) { - MachTlv* ts = &x->tlv_slots[i]; - u64 slot_v = x->tlv_ptrs_vaddr + (ts->tlv_idx - 1u) * MZ_TLVP_SIZE; - u8* slot = x->tlv_ptrs_bytes + (ts->tlv_idx - 1u) * MZ_TLVP_SIZE; - if (ts->imported) { - if (!ts->import_idx) - compiler_panic(x->c, no_loc(), - "link_macho: imported TLV without matching import slot"); - wr_u64_le(slot, 0); - FixSite fs = {3u, 1, {0}, ts->import_idx, slot_v, 0}; - fix_push(fl, &fs); - } else { - LinkSymbol* s = sym_at(img, ts->sym); - u64 tgt_v = s ? s->vaddr : 0; - wr_u64_le(slot, tgt_v); - if (tgt_v == 0) continue; /* weak-undef descriptor → NULL */ - FixSite fs = {3u, 0, {0}, 0, slot_v, tgt_v}; - fix_push(fl, &fs); - } - } -} - -/* ---- chained fixups blob assembler ---- - * - * For each segment that has fixups, build a dyld_chained_starts_in_segment - * with one chain per page (MZ_PAGE). Within a page, sort sites by - * offset, encode each as DYLD_CHAINED_PTR_64, and link via the `next` - * field (4-byte units, 0 = end of chain). - */ - -typedef struct PageChain { - u32 first_offset_in_page; /* relative to page start */ - u32 nsites; - u32 first_site_idx; /* into a per-segment site array */ -} PageChain; - -static int site_cmp_by_vaddr(const void* a, const void* b) { - const FixSite* x = a; - const FixSite* y = b; - if (x->vaddr < y->vaddr) return -1; - if (x->vaddr > y->vaddr) return 1; - return 0; -} - -/* tiny insertion sort to avoid pulling qsort */ -static void sort_sites(FixSite* a, u32 n) { - for (u32 i = 1; i < n; ++i) { - FixSite tmp = a[i]; - u32 j = i; - while (j > 0 && site_cmp_by_vaddr(&a[j - 1], &tmp) > 0) { - a[j] = a[j - 1]; - --j; - } - a[j] = tmp; - } -} - -static void emit_pointer(u8* slot, int is_bind, u32 ord_or_target_lo, - u32 high_or_target_hi, u32 next4) { - /* DYLD_CHAINED_PTR_64: - * bind : ordinal:24, addend:8, reserved:19, next:12, bind:1=1 - * rebase: target:36 (vmaddr), high8:8, reserved:7, next:12, bind:1=0 - */ - u64 v = 0; - if (is_bind) { - u64 ordinal = (u64)ord_or_target_lo & 0xffffffull; /* 24 bits */ - u64 addend = 0; - u64 next = (u64)next4 & 0xfffull; - v = ordinal | (addend << 24) | (0ull /* reserved */ << 32) | (next << 51) | - ((u64)1 << 63); - } else { - /* rebase: target is full vmaddr; we get hi:lo split. */ - u64 target = ((u64)high_or_target_hi << 32) | (u64)ord_or_target_lo; - target &= ((u64)1 << 36) - 1u; /* 36 bits */ - u64 high8 = 0; - u64 next = (u64)next4 & 0xfffull; - v = target | (high8 << 36) | (0ull /* reserved */ << 44) | (next << 51) | - ((u64)0 << 63); - } - wr_u64_le(slot, v); -} - -static void build_chained_fixups(MCtx* x, FixList* fl) { - Heap* h = x->h; - MByte* out = &x->chained_fixups; - mbuf_init(out, h); - - /* Header (32 B): - * uint32 fixups_version (=0) - * uint32 starts_offset - * uint32 imports_offset - * uint32 symbols_offset - * uint32 imports_count - * uint32 imports_format (=1) - * uint32 symbols_format (=0) - */ - u32 hdr_pos = mbuf_u32(out, 0); /* fixups_version */ - (void)hdr_pos; - u32 starts_offset_pos = mbuf_u32(out, 0); - u32 imports_offset_pos = mbuf_u32(out, 0); - u32 symbols_offset_pos = mbuf_u32(out, 0); - mbuf_u32(out, x->nimports_real); - mbuf_u32(out, DYLD_CHAINED_IMPORT); - mbuf_u32(out, 0); /* symbols uncompressed */ - /* dyld expects 8-byte alignment of the starts table. */ - mbuf_align(out, 4); - - /* dyld_chained_starts_in_image: - * uint32 seg_count - * uint32 seg_info_offset[seg_count] - * - * seg_count must equal mach-O segment count (5). - * seg_info_offset[i] = 0 means no fixups in that segment. - */ - u32 starts_off = out->len; - wr_u32_le(out->data + starts_offset_pos, starts_off); - mbuf_u32(out, x->nsegs); - /* Reserve seg_info_offset[]. */ - u32 seg_info_offsets_pos = out->len; - for (u32 i = 0; i < x->nsegs; ++i) mbuf_u32(out, 0); - - /* Sort fixsites by vaddr globally. */ - sort_sites(fl->a, fl->n); - - /* Per segment, emit dyld_chained_starts_in_segment when fixups present. */ - for (u32 si = 0; si < x->nsegs; ++si) { - /* count sites in this segment */ - u32 first = (u32)-1, count = 0; - for (u32 k = 0; k < fl->n; ++k) { - if (fl->a[k].segidx == si) { - if (first == (u32)-1) first = k; - ++count; - } - } - if (!count) continue; - /* Page-align this struct to 4. */ - mbuf_align(out, 4); - u32 sis_off = out->len; - /* Patch seg_info_offset[si] to (sis_off - starts_off). */ - wr_u32_le(out->data + seg_info_offsets_pos + si * 4u, sis_off - starts_off); - - /* Compute page count for this segment. */ - u64 seg_va = x->segs[si].vmaddr; - u64 seg_size = x->segs[si].vmsize ? x->segs[si].vmsize : MZ_PAGE; - u32 page_count = (u32)((seg_size + MZ_PAGE - 1u) / MZ_PAGE); - - /* dyld_chained_starts_in_segment: - * uint32 size - * uint16 page_size - * uint16 pointer_format - * uint64 segment_offset (offset of segment's first byte from - * mach_header) - * uint32 max_valid_pointer (0 for 64-bit) - * uint16 page_count - * uint16 page_start[page_count] (0xFFFF = no fixups in page) - */ - u32 sis_size_pos = mbuf_u32(out, 0); /* fill below */ - mbuf_u16(out, (u16)MZ_PAGE); - mbuf_u16(out, (u16)DYLD_CHAINED_PTR_64); - mbuf_u64(out, (u64)x->segs[si].fileoff); /* segment file offset */ - mbuf_u32(out, 0); - mbuf_u16(out, (u16)page_count); - u32 page_starts_pos = out->len; - for (u32 p = 0; p < page_count; ++p) mbuf_u16(out, 0xFFFFu); - /* size includes the page_start array */ - u32 sis_size = out->len - sis_size_pos + 4u; - /* Hmm, the `size` field is the size of *this* struct. We measure - * from sis_off through end of page_starts. */ - sis_size = out->len - sis_off; - wr_u32_le(out->data + sis_size_pos, sis_size); - - /* Now: walk sites in this segment, group by page, write - * page_start[i] = offset_in_page of first site, and chain via - * next-field in the actual segment's bytes. */ - /* Sites are sorted globally; collect contiguous run for this seg. */ - u32 cur = first; - while (cur < first + count) { - u32 page_idx = (u32)((fl->a[cur].vaddr - seg_va) / MZ_PAGE); - u32 offset_in_page = (u32)((fl->a[cur].vaddr - seg_va) % MZ_PAGE); - wr_u16_le(out->data + page_starts_pos + page_idx * 2u, - (u16)offset_in_page); - /* Walk this page's chain. */ - u32 next_in_page = cur; - while (next_in_page + 1 < first + count) { - u64 nv = fl->a[next_in_page + 1].vaddr; - if (nv >= seg_va + (u64)(page_idx + 1) * MZ_PAGE) break; - ++next_in_page; - } - /* Encode chain pointers. */ - for (u32 k = cur; k <= next_in_page; ++k) { - FixSite* s = &fl->a[k]; - u32 next4 = 0; - if (k < next_in_page) { - u64 dist = fl->a[k + 1].vaddr - s->vaddr; - next4 = (u32)(dist / 4u); - } - /* Find segment bytes. Synthetic pointer sections have private - * buffers; file-backed sections can live in any segment, including - * pointer-bearing read-only constants in __TEXT. */ - u8* slot = NULL; - if (s->segidx == 2 && x->got_bytes && s->vaddr >= x->got_vaddr && - s->vaddr < x->got_vaddr + x->got_size) { - /* __DATA_CONST: __got slot. */ - slot = x->got_bytes + (s->vaddr - x->got_vaddr); - } else if (x->tlv_ptrs_bytes && s->vaddr >= x->tlv_ptrs_vaddr && - s->vaddr < x->tlv_ptrs_vaddr + x->tlv_ptrs_size) { - slot = x->tlv_ptrs_bytes + (s->vaddr - x->tlv_ptrs_vaddr); - } else { - MSec* m = msec_for_vaddr(x, s->vaddr); - if (m && m->link_sec_id) { - u8* base = bytes_for_section(x, m, x->img); - if (base) { - LinkSection* ls = &x->img->sections[m->link_sec_id - 1u]; - u32 in_off = (u32)(ls->input_offset + (s->vaddr - m->vaddr)); - slot = base + in_off; - } - } - } - if (!slot) - compiler_panic(x->c, no_loc(), - "link_macho: chained-fixup slot for vaddr 0x%llx not " - "in any segment buffer", - (unsigned long long)s->vaddr); - if (s->is_bind) { - /* ordinal is import index (1-based) - 1; chained-import format - * uses 0-based. */ - if (s->import_idx == 0 || s->import_idx > x->nimports_real) { - compiler_panic( - x->c, no_loc(), - "link_macho: chained bind for vaddr 0x%llx uses import index " - "%u outside real import table size %u", - (unsigned long long)s->vaddr, (unsigned)s->import_idx, - (unsigned)x->nimports_real); - } - u32 ord = s->import_idx - 1u; - emit_pointer(slot, 1, ord, 0, next4); - } else { - /* rebase target = unslid vmaddr */ - u32 lo = (u32)(s->rebase_target & 0xffffffffu); - u32 hi = (u32)(s->rebase_target >> 32); - emit_pointer(slot, 0, lo, hi, next4); - } - } - cur = next_in_page + 1u; - } - } - - /* Imports table: one dyld_chained_import (4B) per real import. - * Layout: lib_ordinal:8, weak:1, name_offset:23. Internal-GOT - * entries are not bound by dyld so they're omitted here. */ - mbuf_align(out, 4); - u32 imports_off = out->len; - wr_u32_le(out->data + imports_offset_pos, imports_off); - /* We need to first build the symbol pool to know name offsets. */ - u32 symbols_off = imports_off + x->nimports_real * 4u; - /* Reserve imports area. */ - for (u32 i = 0; i < x->nimports_real; ++i) mbuf_u32(out, 0); - /* Emit symbols (each NUL-terminated). Set name_offset on each import. */ - wr_u32_le(out->data + symbols_offset_pos, out->len); - /* Leading NUL for offset 0. */ - mbuf_u8(out, 0); - for (u32 i = 0; i < x->nimports_real; ++i) { - MachImp* mi = &x->imports[i]; - Slice nm_s = pool_slice(x->c->global, mi->name); - const char* nm = nm_s.s; - size_t nl = nm_s.len; - if (!nm || !nl || mi->dylib_ord == 0 || mi->dylib_ord > x->ndylibs) { - compiler_panic(x->c, no_loc(), - "link_macho: invalid chained import %u " - "(name=%u dylib_ord=%u ndylibs=%u)", - (unsigned)i, (unsigned)mi->name, - (unsigned)mi->dylib_ord, (unsigned)x->ndylibs); - } - u32 off = out->len - symbols_off; - mbuf_str(out, nm, (u32)nl); - /* Patch the import slot. */ - u32 packed = ((u32)mi->dylib_ord & 0xffu) | - ((u32)(mi->weak ? 1u : 0u) << 8) | ((off & 0x7fffffu) << 9); - wr_u32_le(out->data + imports_off + i * 4u, packed); - } - (void)symbols_off; -} - -/* ---- exports trie ---- * - * - * Minimal trie: one node carrying a single export "_main" with the - * entry symbol's VA-relative offset. This is enough for dyld; binaries - * with a real exports trie include more data but we don't need it. */ - -static void uleb128(MByte* out, u64 v) { - do { - u8 byte = v & 0x7fu; - v >>= 7; - if (v) byte |= 0x80u; - mbuf_u8(out, byte); - } while (v); -} - -static void build_exports_trie(MCtx* x) { - /* Format: - * node = (terminal_size: uleb128) (export_data)? (children_count: u8) - * (children: [(label NUL) (offset uleb128)]*) - * - * We emit a trie with a single leaf at "_main" with offset - * entry_offset (from __TEXT base). - * - * Easiest: single root node with children_count=1, child label = "_main", - * child offset points to a leaf node. - */ - MByte* out = &x->exports_trie; - mbuf_init(out, x->h); - - LinkImage* img = x->img; - LinkSymbol* esym = sym_at(img, img->entry_sym); - if (!esym || !esym->defined) { - /* No entry — emit a single empty terminal trie. */ - mbuf_u8(out, 0); /* terminal_size 0 */ - mbuf_u8(out, 0); /* children 0 */ - return; - } - Slice nm_s = pool_slice(x->c->global, esym->name); - const char* nm = nm_s.s; - size_t nl = nm_s.len; - if (!nm || nl == 0) { - mbuf_u8(out, 0); - mbuf_u8(out, 0); - return; - } - /* leaf node: terminal_size = sizeof(uleb(flags)+uleb(offset)) - * flags = 0 (regular export); offset = vaddr - __TEXT.vmaddr */ - u64 entry_off = esym->vaddr - x->text_vaddr; - - /* Compute leaf-node bytes length: uleb(flags=0) + uleb(offset). */ - u32 flags = 0; - u32 leaf_payload_len; - { - /* count uleb bytes for flags=0 -> 1 byte */ - u32 a = 1; - /* count uleb bytes for entry_off */ - u32 b = 0; - u64 v = entry_off; - do { - ++b; - v >>= 7; - } while (v); - leaf_payload_len = a + b; - } - /* Layout: root node first, then leaf. The root node's child entry - * carries the absolute offset of the leaf within the trie. */ - - /* root: terminal_size=0, children_count=1, "_main"\0, child_offset= - * (leaf-position uleb). */ - /* We'll back-patch child_offset after we know the leaf position. */ - mbuf_u8(out, 0); /* root terminal size */ - mbuf_u8(out, 1); /* children_count */ - mbuf_str(out, nm, (u32)nl); - /* child offset: 5 bytes max for uleb128(u32). Reserve and patch. */ - u32 child_off_pos = out->len; - /* Reserve 5 bytes. */ - for (u32 i = 0; i < 5; ++i) mbuf_u8(out, 0); - /* leaf node */ - u32 leaf_pos = out->len; - /* terminal_size byte then payload */ - mbuf_u8(out, (u8)leaf_payload_len); - uleb128(out, flags); - uleb128(out, entry_off); - mbuf_u8(out, 0); /* children_count */ - - /* Patch child_offset uleb. */ - u32 v = leaf_pos; - for (u32 i = 0; i < 5; ++i) { - u8 b = (u8)(v & 0x7fu); - v >>= 7; - if (v) b |= 0x80u; - out->data[child_off_pos + i] = b; - if (!v && i < 4) { - /* Remaining bytes need to be 0x00 — but we already wrote zeros; - * we need a continuation-zero so the consumer sees 5 bytes. Set - * top bit on lower bytes to indicate continuation, last byte = 0. */ - /* Actually: ULEB needs proper termination. Force final byte to - * 0 with no continuation by setting bit-7=0 on the last - * non-zero byte and also forcing remaining bytes to be 0x80 - * extension or trim. Simpler: set last byte explicitly. */ - out->data[child_off_pos + i] = (u8)(out->data[child_off_pos + i] & 0x7fu); - for (u32 j = i + 1; j < 5; ++j) out->data[child_off_pos + j] = 0x80; - out->data[child_off_pos + 4] = 0x00; - break; - } - } - /* Pad trie to 8 bytes. */ - mbuf_align(out, 8); -} - -/* ---- symtab + strtab + indirect symtab ---- */ - -typedef struct NlistRec { - u32 strx; - u8 type; - u8 sect; /* 1-based section index (Mach-O) */ - u16 desc; - u64 value; -} NlistRec; - -static void build_symtab(MCtx* x) { - Heap* h = x->h; - LinkImage* img = x->img; - mbuf_init(&x->symtab, h); - mbuf_init(&x->strtab, h); - mbuf_init(&x->indirect, h); - - /* strtab leading NUL */ - mbuf_u8(&x->strtab, 0); - - /* Approach: - * - Add one local nlist per defined LinkSymbol (locals + non-imported - * externs) — but to keep things simple we only emit external defined - * syms (mainly _main), plus all imports as N_UNDF|N_EXT. - * - * Mach-O dyld requires the symtab order: locals first, ext-defs next, - * undef last (matched by LC_DYSYMTAB ranges). - */ - - /* Pass A: defined externals. */ - u32 n_local = 0; - u32 n_extdef = 0; - u32 n_undef = 0; - - /* For now we emit only externals + imports. No locals. */ - /* extdef pass */ - for (u32 i = 0; i < LinkSyms_count(&img->syms); ++i) { - LinkSymbol* s = LinkSyms_at(&img->syms, i); - if (!s->defined) continue; - if (s->bind != SB_GLOBAL && s->bind != SB_WEAK) continue; - if (s->name == 0) continue; - if (s->kind == SK_ABS) continue; /* skip abs externs */ - /* Locate which OutSec contains this vaddr to figure out n_sect. - * n_sect is the 1-based index into the flat section_64 table the - * file actually contains (post-coalesce), matching what we emit - * in emit_load_command_segment. */ - u8 n_sect = 0; - for (u32 k = 0; k < x->nouts; ++k) { - OutSec* o = &x->outs[k]; - if (s->vaddr >= o->vaddr && s->vaddr < o->vaddr + o->size) { - n_sect = (u8)(k + 1u); - break; - } - if (s->vaddr == o->vaddr + o->size) { - n_sect = (u8)(k + 1u); - break; - } - } - Slice nm_s = pool_slice(x->c->global, s->name); - const char* nm = nm_s.s; - size_t nl = nm_s.len; - u32 strx = x->strtab.len; - if (nm && nl) mbuf_str(&x->strtab, nm, (u32)nl); - - u8 t[16]; - u8 nt = N_SECT | N_EXT; - if (s->bind == SB_WEAK) { - /* N_WEAK_DEF in n_desc (not a flag in n_type) */ - } - wr_u32_le(t + 0, strx); - t[4] = nt; - t[5] = n_sect; - wr_u16_le(t + 6, s->bind == SB_WEAK ? N_WEAK_DEF : 0); - wr_u64_le(t + 8, s->vaddr); - mbuf_append(&x->symtab, t, 16); - ++n_extdef; - } - - /* undef imports — real imports only. Internal-GOT entries don't get - * N_UNDF nlist records since they're defined in the image. */ - u32 imp_first_symtab_idx = n_extdef; - for (u32 i = 0; i < x->nimports_real; ++i) { - MachImp* mi = &x->imports[i]; - Slice nm_s = pool_slice(x->c->global, mi->name); - const char* nm = nm_s.s; - size_t nl = nm_s.len; - u32 strx = x->strtab.len; - if (nm && nl) mbuf_str(&x->strtab, nm, (u32)nl); - - u8 t[16]; - wr_u32_le(t + 0, strx); - t[4] = N_UNDF | N_EXT; - t[5] = 0; - /* n_desc carries dylib ordinal in high byte (REFERENCED_DYNAMICALLY etc.) - */ - u16 desc = (u16)(((u16)mi->dylib_ord & 0xff) << 8); - if (mi->weak) desc |= N_WEAK_REF; - wr_u16_le(t + 6, desc); - wr_u64_le(t + 8, 0); - mbuf_append(&x->symtab, t, 16); - ++n_undef; - } - - /* indirect symtab: one entry per __stubs slot, then one per __got - * slot. Internal-GOT slots use INDIRECT_SYMBOL_LOCAL (0x80000000) - * since they have no nlist entry. */ - u32 indirect_start = 0; - /* Patch reserved1 of each synth OutSec. __stubs and __got are each - * singleton OutSecs (synth sections never coalesce with user input), - * so a sectname match identifies them unambiguously. */ - for (u32 i = 0; i < x->nouts; ++i) { - OutSec* o = &x->outs[i]; - if (slice_eq_cstr(slice_from_cstr(o->sectname), "__stubs") && o->size) { - o->reserved1 = indirect_start; - for (u32 k = 0; k < x->nimports; ++k) { - MachImp* mi = &x->imports[k]; - if (!mi->stub_idx) continue; - u32 sym_idx = imp_first_symtab_idx + k; - mbuf_u32(&x->indirect, sym_idx); - ++indirect_start; - } - } - } - for (u32 i = 0; i < x->nouts; ++i) { - OutSec* o = &x->outs[i]; - if (slice_eq_cstr(slice_from_cstr(o->sectname), "__got") && o->size) { - o->reserved1 = indirect_start; - for (u32 k = 0; k < x->nimports; ++k) { - MachImp* mi = &x->imports[k]; - u32 sym_idx = mi->internal ? 0x80000000u /* INDIRECT_SYMBOL_LOCAL */ - : (imp_first_symtab_idx + k); - mbuf_u32(&x->indirect, sym_idx); - ++indirect_start; - } - } - } - - x->nsyms = n_local + n_extdef + n_undef; - (void)n_local; - (void)imp_first_symtab_idx; -} - -/* ---- LINKEDIT layout assembly ---- - * - * Place blobs in the order Apple prefers: - * chained_fixups, exports_trie, fn_starts, data_in_code, - * symtab, indirect, strtab, codesig - */ - -static void layout_linkedit(MCtx* x) { - /* fn_starts and data_in_code are both empty. */ - mbuf_init(&x->fn_starts, x->h); - mbuf_init(&x->data_in_code, x->h); - mbuf_init(&x->codesig, x->h); - - u64 cur = x->linkedit_fileoff; - /* chained fixups */ - cur = ALIGN_UP(cur, 8u); - x->chained_fixups_off = (u32)cur; - cur += x->chained_fixups.len; - /* exports trie */ - cur = ALIGN_UP(cur, 8u); - x->exports_trie_off = (u32)cur; - cur += x->exports_trie.len; - /* function starts (empty placeholder, but allocate one byte) */ - cur = ALIGN_UP(cur, 8u); - x->fn_starts_off = (u32)cur; - /* data in code */ - cur = ALIGN_UP(cur, 8u); - x->data_in_code_off = (u32)cur; - /* symtab */ - cur = ALIGN_UP(cur, 8u); - x->symtab_off = (u32)cur; - cur += x->symtab.len; - /* indirect symtab */ - cur = ALIGN_UP(cur, 4u); - x->indirect_off = (u32)cur; - cur += x->indirect.len; - /* strtab */ - cur = ALIGN_UP(cur, 8u); - x->strtab_off = (u32)cur; - cur += x->strtab.len; - /* code signature: end-aligned to 16 */ - cur = ALIGN_UP(cur, 16u); - x->codesig_off = (u32)cur; - - /* Linkedit segment file_size includes everything up to (but not yet - * including) codesig. Codesig is computed below. */ - u64 le_size = cur - x->linkedit_fileoff; - /* Set linkedit segment size; will be increased after codesig. */ - x->segs[4].filesize = le_size; - x->segs[4].vmsize = ALIGN_UP(le_size, MZ_PAGE); - if (!x->segs[4].vmsize) x->segs[4].vmsize = MZ_PAGE; -} - -/* ---- ad-hoc code signature (CodeDirectory + SuperBlob) ---- - * - * Produces a minimal embedded SuperBlob with a single CodeDirectory. - * The CD is sha256-hashed over CS_PAGE_SIZE_LOG2 = 4096-byte pages of - * the file (excluding the codesig itself). The kernel verifies the - * CD's hash chain on exec. - * - * Output format (in big-endian for SuperBlob/CodeDirectory headers): - * [SuperBlob] - * u32 magic (0xfade0cc0) - * u32 length - * u32 count (=1) - * [Slot] - * u32 type (=0 CSSLOT_CODEDIRECTORY) - * u32 offset (=20) -- relative to start of SuperBlob - * [CodeDirectory] - * u32 magic (0xfade0c02) - * u32 length (bytes including all hashes) - * u32 version (>=0x20400 for execSeg fields) - * u32 flags (=0 ad-hoc — actually flags must include 0x2 - * (kSecCodeSignatureAdhoc)) u32 hashOffset (offset of first slot hash) u32 - * identOffset (offset of identifier string) u32 nSpecialSlots (=0) u32 - * nCodeSlots u32 codeLimit (file bytes covered) u8 hashSize (=32) u8 - * hashType (=2 sha256) u8 platform (=0) u8 pageSize (=12 for 4096) u32 - * spare2 (=0) u32 scatterOffset (=0) u32 teamOffset (=0) u32 spare3 (=0) - * u64 codeLimit64 (=0) - * u64 execSegBase (=__TEXT.fileoff) - * u64 execSegLimit (=__TEXT.filesize) - * u64 execSegFlags (=1 main binary) - * [identifier bytes "a.out\0"] - * [codeslot hashes nCodeSlots * 32 B] - * - * Hashes computed AFTER everything else is final — including the codesig - * blob's own offset in the file (the hash range stops just before - * codeLimit). */ - -static void wr_u64_be(u8* p, u64 v) { - for (u32 i = 0; i < 8; ++i) p[7 - i] = (u8)(v >> (i * 8)); -} - -/* Build the codesig blob with placeholder hashes; size is precise so - * file layout is final after this. */ -static void build_codesig_skeleton(MCtx* x, u32 code_limit, const char* ident) { - u32 code_page = 1u << CS_PAGE_SIZE_LOG2; /* 4096 */ - u32 nslots = (code_limit + code_page - 1u) / code_page; - - /* CodeDirectory size: - * header 88 bytes through execSegFlags - * identifier (ident_len + 1) - * hashes (nslots * 32) - */ - u32 ident_len = (u32)slice_from_cstr(ident).len + 1u; - u32 cd_hdr = 88u; - u32 cd_size = cd_hdr + ident_len + nslots * CS_SHA256_LEN; - /* SuperBlob: 12 hdr + 8 slot + cd. */ - u32 sb_size = 12u + 8u + cd_size; - - MByte* out = &x->codesig; - mbuf_init(out, x->h); - mbuf_reserve(out, sb_size); - memset(out->data, 0, sb_size); - out->len = sb_size; - - u8* sb = out->data; - /* SuperBlob header */ - wr_u32_be(sb + 0, CS_MAGIC_EMBEDDED_SIGNATURE); - wr_u32_be(sb + 4, sb_size); - wr_u32_be(sb + 8, 1); /* count */ - /* slot 0: type=CSSLOT_CODEDIRECTORY, offset=20 */ - wr_u32_be(sb + 12, CSSLOT_CODEDIRECTORY); - wr_u32_be(sb + 16, 20u); - - /* CodeDirectory */ - u8* cd = sb + 20; - wr_u32_be(cd + 0, CS_MAGIC_CODEDIRECTORY); - wr_u32_be(cd + 4, cd_size); - wr_u32_be(cd + 8, 0x20400u); /* version with execSeg */ - wr_u32_be(cd + 12, 0x2u); /* flags = adhoc */ - wr_u32_be(cd + 16, cd_hdr + ident_len); /* hashOffset */ - wr_u32_be(cd + 20, cd_hdr); /* identOffset */ - wr_u32_be(cd + 24, 0); /* nSpecialSlots */ - wr_u32_be(cd + 28, nslots); - wr_u32_be(cd + 32, code_limit); - cd[36] = (u8)CS_SHA256_LEN; - cd[37] = (u8)CS_HASHTYPE_SHA256; - cd[38] = 0; /* platform */ - cd[39] = (u8)CS_PAGE_SIZE_LOG2; - wr_u32_be(cd + 40, 0); /* spare2 */ - wr_u32_be(cd + 44, 0); /* scatterOffset */ - wr_u32_be(cd + 48, 0); /* teamOffset */ - wr_u32_be(cd + 52, 0); /* spare3 */ - wr_u64_be(cd + 56, 0); /* codeLimit64 */ - wr_u64_be(cd + 64, x->segs[1].fileoff); /* execSegBase */ - wr_u64_be(cd + 72, x->segs[1].filesize); /* execSegLimit */ - wr_u64_be(cd + 80, CS_EXECSEG_MAIN_BINARY); - - /* identifier */ - memcpy(cd + cd_hdr, ident, ident_len); - - x->codesig_size = sb_size; -} - -static void compute_codesig(MCtx* x, const u8* full_file, u32 file_len_excl_cs, - const char* ident) { - u32 code_page = 1u << CS_PAGE_SIZE_LOG2; - u32 nslots = (file_len_excl_cs + code_page - 1u) / code_page; - u32 ident_len = (u32)slice_from_cstr(ident).len + 1u; - u8* cd = x->codesig.data + 12 + 8; - u8* hashes = cd + 88u + ident_len; - - for (u32 i = 0; i < nslots; ++i) { - u32 off = i * code_page; - u32 take = (off + code_page <= file_len_excl_cs) ? code_page - : (file_len_excl_cs - off); - Sha256 s; - sha256_init(&s); - sha256_update(&s, full_file + off, take); - /* Pages shorter than code_page get the standard SHA over the - * partial bytes — Apple's tools do exactly this (no zero padding - * on the tail). */ - sha256_final(&s, hashes + i * CS_SHA256_LEN); - } -} - -/* ---- final emission ---- */ - -static void emit_load_command_segment(MByte* lc, MCtx* x, u32 segidx) { - MSeg* sg = &x->segs[segidx]; - u32 seg_cmd_size = MACHO_SEGCMD64_SIZE + sg->nouts * MACHO_SECT64_SIZE; - u32 base = lc->len; - mbuf_u32(lc, LC_SEGMENT_64); - mbuf_u32(lc, seg_cmd_size); - /* segname: 16 bytes zero-padded */ - u8 nm[16]; - memset(nm, 0, 16); - size_t nlen = slice_from_cstr(sg->name).len; - if (nlen > 16) nlen = 16; - memcpy(nm, sg->name, nlen); - mbuf_append(lc, nm, 16); - mbuf_u64(lc, sg->vmaddr); - mbuf_u64(lc, sg->vmsize); - mbuf_u64(lc, sg->fileoff); - mbuf_u64(lc, sg->filesize); - mbuf_u32(lc, sg->maxprot); - mbuf_u32(lc, sg->initprot); - mbuf_u32(lc, sg->nouts); - mbuf_u32(lc, 0); /* flags */ - - for (u32 j = 0; j < sg->nouts; ++j) { - OutSec* o = &x->outs[sg->first_out + j]; - u8 sname[16], gname[16]; - memset(sname, 0, 16); - memset(gname, 0, 16); - size_t sl = o->sectname ? slice_from_cstr(o->sectname).len : 0; - if (sl > 16) sl = 16; - if (sl) memcpy(sname, o->sectname, sl); - size_t gl = slice_from_cstr(sg->name).len; /* segname must match */ - if (gl > 16) gl = 16; - memcpy(gname, sg->name, gl); - mbuf_append(lc, sname, 16); - mbuf_append(lc, gname, 16); - mbuf_u64(lc, o->vaddr); - mbuf_u64(lc, o->size); - mbuf_u32(lc, (u32)o->file_offset); - /* align is power of 2; encode as log2. */ - u32 a = o->align ? o->align : 1u; - u32 al = 0; - while ((1u << al) < a) ++al; - mbuf_u32(lc, al); - mbuf_u32(lc, 0); /* reloff */ - mbuf_u32(lc, 0); /* nreloc */ - mbuf_u32(lc, o->flags); - mbuf_u32(lc, o->reserved1); - mbuf_u32(lc, o->reserved2); - mbuf_u32(lc, 0); /* reserved3 */ - } - (void)base; -} - -void link_emit_macho(LinkImage* img, Writer* w); - -void link_emit_macho(LinkImage* img, Writer* w) { - MCtx x; - memset(&x, 0, sizeof(x)); - x.img = img; - x.c = img->c; - x.h = img->heap; - x.w = w; - x.linker = img->linker; - x.arch = link_arch_desc_for(img->c); - - if (!x.arch || !x.arch->macho_cputype || !x.arch->emit_macho_stub || - !x.arch->macho_stub_size) - compiler_panic(x.c, no_loc(), - "link_emit_macho: no Mach-O descriptor for target"); - if (img->entry_sym == LINK_SYM_NONE) - compiler_panic(x.c, no_loc(), "link_emit_macho: no resolved entry"); - - collect_imports(&x); - collect_tlv(&x); - plan_layout(&x); - shift_sections(&x); - - /* entry offset within __TEXT segment. */ - LinkSymbol* esym = sym_at(img, img->entry_sym); - if (!esym || !esym->defined) - compiler_panic(x.c, no_loc(), "link_emit_macho: entry symbol undefined"); - if (esym->vaddr < x.text_vaddr) - compiler_panic(x.c, no_loc(), - "link_emit_macho: entry symbol below __TEXT base"); - x.entry_offset = (u32)(esym->vaddr - x.text_vaddr); - - /* image-id UUID. */ - u8 image_id[LINK_IMAGE_ID_BYTES]; - link_image_id_compute(img, image_id); - memcpy(x.uuid, image_id, 16); - - /* Reloc apply collects fixsites. */ - FixList fl; - fix_init(&fl, x.h); - apply_relocs(&x, &fl); - - /* Build LINKEDIT contents. */ - build_chained_fixups(&x, &fl); - build_exports_trie(&x); - build_symtab(&x); - layout_linkedit(&x); - - /* Compute code-sig skeleton sized to file bytes excluding sig. */ - u32 code_limit = x.codesig_off; - build_codesig_skeleton(&x, code_limit, "a.out"); - /* Now extend linkedit segment to include codesig. */ - u64 le_size = (u64)x.codesig_off + (u64)x.codesig_size - x.linkedit_fileoff; - x.segs[4].filesize = le_size; - x.segs[4].vmsize = ALIGN_UP(le_size, MZ_PAGE); - - /* Build load commands buffer. */ - MByte lc; - mbuf_init(&lc, x.h); - - /* LC_SEGMENT_64 for each segment with sections (and PAGEZERO/LINKEDIT). */ - emit_load_command_segment(&lc, &x, 0); /* PAGEZERO */ - emit_load_command_segment(&lc, &x, 1); /* TEXT */ - if (x.segs[2].nsects > 0) - emit_load_command_segment(&lc, &x, 2); /* DATA_CONST */ - if (x.segs[3].nsects > 0) emit_load_command_segment(&lc, &x, 3); /* DATA */ - emit_load_command_segment(&lc, &x, 4); /* LINKEDIT */ - - /* LC_DYLD_CHAINED_FIXUPS (linkedit_data_command: 16B) */ - mbuf_u32(&lc, LC_DYLD_CHAINED_FIXUPS); - mbuf_u32(&lc, 16); - mbuf_u32(&lc, x.chained_fixups_off); - mbuf_u32(&lc, x.chained_fixups.len); - - /* LC_DYLD_EXPORTS_TRIE */ - mbuf_u32(&lc, LC_DYLD_EXPORTS_TRIE); - mbuf_u32(&lc, 16); - mbuf_u32(&lc, x.exports_trie_off); - mbuf_u32(&lc, x.exports_trie.len); - - /* LC_SYMTAB */ - mbuf_u32(&lc, LC_SYMTAB); - mbuf_u32(&lc, MACHO_SYMTAB_CMD_SIZE); - mbuf_u32(&lc, x.symtab_off); - mbuf_u32(&lc, x.nsyms); - mbuf_u32(&lc, x.strtab_off); - mbuf_u32(&lc, x.strtab.len); - - /* LC_DYSYMTAB */ - /* nlocal=0, nextdef=#defined-globals, nundef=#imports. We tracked - * those during build_symtab; recompute by inspecting strtab... easier - * to recount: defined globals are total - imports. */ - u32 nlocal = 0; - u32 nundef = x.nimports_real; - u32 nextdef = (x.nsyms > nundef) ? x.nsyms - nundef - nlocal : 0; - mbuf_u32(&lc, LC_DYSYMTAB); - mbuf_u32(&lc, MACHO_DYSYMTAB_CMD_SIZE); - mbuf_u32(&lc, 0); /* ilocalsym */ - mbuf_u32(&lc, nlocal); - mbuf_u32(&lc, nlocal); - mbuf_u32(&lc, nextdef); - mbuf_u32(&lc, nlocal + nextdef); - mbuf_u32(&lc, nundef); - mbuf_u32(&lc, 0); - mbuf_u32(&lc, 0); /* tocoff, ntoc */ - mbuf_u32(&lc, 0); - mbuf_u32(&lc, 0); /* modtaboff, nmodtab */ - mbuf_u32(&lc, 0); - mbuf_u32(&lc, 0); /* extrefsymoff, nextrefsyms */ - mbuf_u32(&lc, x.indirect_off); - mbuf_u32(&lc, x.indirect.len / 4u); - mbuf_u32(&lc, 0); - mbuf_u32(&lc, 0); /* extreloff, nextrel */ - mbuf_u32(&lc, 0); - mbuf_u32(&lc, 0); /* locreloff, nlocrel */ - - /* LC_LOAD_DYLINKER */ - { - const char* dyld = "/usr/lib/dyld"; - u32 dyld_len = (u32)slice_from_cstr(dyld).len; - u32 cmd_size = (u32)ALIGN_UP((u64)(12u + dyld_len + 1u), 8u); - mbuf_u32(&lc, LC_LOAD_DYLINKER); - mbuf_u32(&lc, cmd_size); - mbuf_u32(&lc, 12u); /* name offset within cmd */ - u32 wrote = mbuf_str(&lc, dyld, dyld_len); - (void)wrote; - /* Pad to cmd_size. */ - while (lc.len < (u32)((u64)mbuf_align(&lc, 1) + 0)) { - /* no-op */ - break; - } - /* Re-align to cmd_size. */ - u32 want = (u32)(lc.len); - /* Walk back: lc grew by 12 + (strlen+1). Pad to cmd_size. */ - u32 cmd_start_back = lc.len - (12u + dyld_len + 1u); - u32 pad_needed = cmd_size - (lc.len - cmd_start_back); - while (pad_needed-- > 0) mbuf_u8(&lc, 0); - (void)want; - } - - /* LC_UUID */ - mbuf_u32(&lc, LC_UUID); - mbuf_u32(&lc, 24); - mbuf_append(&lc, x.uuid, 16); - - /* LC_BUILD_VERSION */ - mbuf_u32(&lc, LC_BUILD_VERSION); - mbuf_u32(&lc, 24); - mbuf_u32(&lc, 1); /* PLATFORM_MACOS */ - mbuf_u32(&lc, (12u << 16) | 0); /* minos 12.0.0 */ - mbuf_u32(&lc, (12u << 16) | 0); /* sdk 12.0.0 */ - mbuf_u32(&lc, 0); /* ntools */ - - /* LC_MAIN — entryoff is offset within __TEXT segment from its file - * start (0). */ - mbuf_u32(&lc, LC_MAIN); - mbuf_u32(&lc, 24); - mbuf_u64(&lc, (u64)x.entry_offset); /* entryoff = vaddr - __TEXT.vmaddr */ - mbuf_u64(&lc, 0); /* stacksize */ - - /* LC_LOAD_DYLIB per dylib. */ - for (u32 i = 0; i < x.ndylibs; ++i) { - Slice nm_s = pool_slice(x.c->global, x.dylibs[i].install); - const char* nm = nm_s.s; - size_t nl = nm_s.len; - u32 cmd_size = (u32)ALIGN_UP((u64)(24u + (u32)nl + 1u), 8u); - u32 cmd_start = lc.len; - mbuf_u32(&lc, LC_LOAD_DYLIB); - mbuf_u32(&lc, cmd_size); - mbuf_u32(&lc, 24u); /* name offset */ - mbuf_u32(&lc, 0); /* timestamp */ - mbuf_u32(&lc, (1u << 16)); /* current_version 1.0 */ - mbuf_u32(&lc, (1u << 16)); /* compat_version 1.0 */ - mbuf_str(&lc, nm ? nm : "", (u32)nl); - while (lc.len - cmd_start < cmd_size) mbuf_u8(&lc, 0); - } - - /* LC_FUNCTION_STARTS / LC_DATA_IN_CODE — empty. */ - mbuf_u32(&lc, LC_FUNCTION_STARTS_C); - mbuf_u32(&lc, 16); - mbuf_u32(&lc, x.fn_starts_off); - mbuf_u32(&lc, 0); - - mbuf_u32(&lc, LC_DATA_IN_CODE_C); - mbuf_u32(&lc, 16); - mbuf_u32(&lc, x.data_in_code_off); - mbuf_u32(&lc, 0); - - /* LC_CODE_SIGNATURE */ - mbuf_u32(&lc, LC_CODE_SIGNATURE_C); - mbuf_u32(&lc, 16); - mbuf_u32(&lc, x.codesig_off); - mbuf_u32(&lc, x.codesig_size); - - /* Sanity: lc.len + MACHO_HDR64_SIZE must equal headers_size we - * predicted in plan_layout. If not, we mis-sized — panic. */ - if ((u64)lc.len + MACHO_HDR64_SIZE != x.headers_size) { - compiler_panic(x.c, no_loc(), - "link_macho: load-cmd size mismatch: predicted %llu got %u", - (unsigned long long)(x.headers_size - MACHO_HDR64_SIZE), - lc.len); - } - - /* ---- now stream the file ---- */ - /* The Writer in cfree allows seek; we'll write a flat buffer first - * (so we can hash it for codesig) and flush at the end. */ - MByte file; - mbuf_init(&file, x.h); - - /* mach_header_64 */ - u32 ncmds = 0; - /* Recount: PAGEZERO + TEXT + maybe DATA_CONST + maybe DATA + LINKEDIT - * + chained + exports_trie + symtab + dysymtab + dyld + uuid + - * build_version + main + nDylibs + fn_starts + data_in_code + - * codesig. */ - ncmds += 2; /* PAGEZERO + TEXT */ - if (x.segs[2].nsects > 0) ncmds++; - if (x.segs[3].nsects > 0) ncmds++; - ncmds++; /* LINKEDIT */ - ncmds += 11 + x.ndylibs; - /* (chained, exports_trie, symtab, dysymtab, dyld, uuid, build_version, - * main, fn_starts, data_in_code, codesig) = 11 */ - - mbuf_u32(&file, MH_MAGIC_64); - mbuf_u32(&file, x.arch->macho_cputype); - mbuf_u32(&file, x.arch->macho_cpusubtype); - mbuf_u32(&file, MH_EXECUTE); - mbuf_u32(&file, ncmds); - mbuf_u32(&file, lc.len); - { - u32 mh_flags = MH_DYLDLINK | MH_TWOLEVEL | MH_NOUNDEFS | MH_PIE; - /* dyld scans __thread_vars and allocates a pthread_key for each - * descriptor only when this flag is set; without it the descriptor's - * thunk pointer is silently patched to _tlv_bootstrap_error. Apple's - * ld sets it whenever the image contains S_THREAD_LOCAL_* sections. */ - if (x.ntlv) mh_flags |= MH_HAS_TLV_DESCRIPTORS; - mbuf_u32(&file, mh_flags); - } - mbuf_u32(&file, 0); /* reserved */ - mbuf_append(&file, lc.data, lc.len); - - /* Pad to first section's file offset. */ - /* __TEXT first section begins at headers_size; we wrote header+lc = - * headers_size, so no pad needed. Then each MSec's file_offset - * tells us where to write its bytes. */ - - /* Now emit segment payload bytes per MSec. */ - for (u32 i = 0; i < x.nsecs; ++i) { - MSec* m = &x.secs[i]; - if (m->is_zerofill || m->size == 0) continue; - /* Pad up to m->file_offset. */ - while (file.len < m->file_offset) mbuf_u8(&file, 0); - if (m->synth_data) { - mbuf_append(&file, m->synth_data, m->synth_size); - } else { - LinkSection* ls = &img->sections[m->link_sec_id - 1u]; - u32 segid = ls->segment_id; - u8* base = - (segid != LINK_SEG_NONE) ? img->segment_bytes[segid - 1u] : NULL; - if (base && ls->size) { - mbuf_append(&file, base + ls->input_offset, (u32)ls->size); - } else if (ls->size) { - for (u64 k = 0; k < ls->size; ++k) mbuf_u8(&file, 0); - } - } - } - - /* Pad to LINKEDIT start. */ - while (file.len < x.linkedit_fileoff) mbuf_u8(&file, 0); - - /* LINKEDIT contents in declared order. */ - while (file.len < x.chained_fixups_off) mbuf_u8(&file, 0); - mbuf_append(&file, x.chained_fixups.data, x.chained_fixups.len); - while (file.len < x.exports_trie_off) mbuf_u8(&file, 0); - mbuf_append(&file, x.exports_trie.data, x.exports_trie.len); - while (file.len < x.fn_starts_off) mbuf_u8(&file, 0); - /* fn_starts is empty */ - while (file.len < x.data_in_code_off) mbuf_u8(&file, 0); - /* empty */ - while (file.len < x.symtab_off) mbuf_u8(&file, 0); - mbuf_append(&file, x.symtab.data, x.symtab.len); - while (file.len < x.indirect_off) mbuf_u8(&file, 0); - mbuf_append(&file, x.indirect.data, x.indirect.len); - while (file.len < x.strtab_off) mbuf_u8(&file, 0); - mbuf_append(&file, x.strtab.data, x.strtab.len); - while (file.len < x.codesig_off) mbuf_u8(&file, 0); - - /* Compute codesig hashes over file bytes [0, codesig_off). */ - /* The codesig blob currently has zero hashes; hash now. */ - compute_codesig(&x, file.data, x.codesig_off, "a.out"); - /* Append codesig. */ - mbuf_append(&file, x.codesig.data, x.codesig.len); - - /* Stream out. */ - cfree_writer_seek(w, 0); - cfree_writer_write(w, file.data, file.len); - - /* Cleanup. */ - fix_fini(&fl); - mbuf_fini(&lc); - mbuf_fini(&file); - mbuf_fini(&x.chained_fixups); - mbuf_fini(&x.exports_trie); - mbuf_fini(&x.symtab); - mbuf_fini(&x.strtab); - mbuf_fini(&x.indirect); - mbuf_fini(&x.fn_starts); - mbuf_fini(&x.data_in_code); - mbuf_fini(&x.codesig); - if (x.imports) x.h->free(x.h, x.imports, 0); /* VEC_GROW: cap unknown */ - if (x.dylibs) x.h->free(x.h, x.dylibs, 0); - if (x.sym_to_imp) - x.h->free(x.h, x.sym_to_imp, sizeof(u32) * x.sym_to_imp_size); - if (x.secs) x.h->free(x.h, x.secs, 0); - if (x.stubs_bytes) x.h->free(x.h, x.stubs_bytes, x.stubs_size); - if (x.got_bytes) x.h->free(x.h, x.got_bytes, x.got_size); - if (x.tlv_ptrs_bytes) x.h->free(x.h, x.tlv_ptrs_bytes, x.tlv_ptrs_size); - if (x.tlv_slots) x.h->free(x.h, x.tlv_slots, 0); - if (x.sym_to_tlv) - x.h->free(x.h, x.sym_to_tlv, sizeof(u32) * x.sym_to_tlv_size); -} diff --git a/src/obj/coff_archive.c b/src/obj/coff/archive.c diff --git a/src/obj/coff.h b/src/obj/coff/coff.h diff --git a/src/obj/coff/emit.c b/src/obj/coff/emit.c @@ -0,0 +1,732 @@ +/* PE/COFF relocatable .obj writer. Walks a finalized ObjBuilder and + * emits a 64-bit little-endian relocatable object via the supplied + * Writer. Counterpart to emit_elf / emit_macho. + * + * Layout strategy: + * 1. plan COFF sections (one per kept obj section), assigning + * Characteristics, alignment, raw size, and per-section reloc + * counts; + * 2. build the symbol table (synthesized per-section static symbols + * with section-definition aux records, plus file symbols and + * every ObjSym kept after sweep); + * 3. build per-section relocation records via the per-arch + * translator (arch_for_compiler(c)->coff->reloc_to); + * 4. assign file offsets: + * file header | section headers | (bytes + relocs)* | symtab | strtab + * 5. write the file in that order. + * + * 64-bit little-endian only — IMAGE_FILE_MACHINE_AMD64 (x86_64) and + * IMAGE_FILE_MACHINE_ARM64 (aarch64). Big-endian / ptr_size != 8 panic + * at entry. + * + * Section name mapping policy: we pass the cfree Section.name through + * verbatim to the COFF Name field. Callers / readers are expected to + * have stored COFF-shaped names (".text", ".rdata", ".tls$", etc.) at + * the obj layer; emit_coff does not rewrite ELF-style spellings like + * ".rodata" -> ".rdata". Names longer than 8 bytes spill into the + * string table with the "/<decimal-offset>" encoding. + * + * Addend handling: COFF stores the addend inline in the patched bytes + * (there is no addend field in IMAGE_RELOCATION). The ObjBuilder + * caller is responsible for having written the addend into the section + * bytes already — matching how MSVC / mingw emit. A nonzero + * Reloc::addend with has_explicit_addend set is rejected here as a + * known v1 limitation. */ + +#include <string.h> + +#include "core/arena.h" +#include "core/buf.h" +#include "core/heap.h" +#include "core/pool.h" +#include "core/slice.h" +#include "core/util.h" +#include "obj/coff/coff.h" +#include "obj/format.h" + +static SrcLoc no_loc(void) { + SrcLoc l = {0, 0, 0}; + return l; +} + +static int coff_rel32_absorbs_minus4(CfreeArchKind arch, RelocKind kind, + i64 addend) { + if (arch != CFREE_ARCH_X86_64 || addend != -4) return 0; + switch (kind) { + case R_PC32: + case R_REL32: + case R_PLT32: + case R_X64_PLT32: + case R_X64_GOTPCREL: + case R_X64_GOTPCRELX: + case R_X64_REX_GOTPCRELX: + return 1; + default: + return 0; + } +} + +/* ---- per-COFF-section plan record ---- */ + +typedef struct CSec { + /* IMAGE_SECTION_HEADER fields (little-endian-encoded at write time). */ + char name8[8]; /* Name field bytes; "/N" form if long name */ + u32 virtual_size; /* nonzero for NOBITS (bss size) */ + u32 size_of_raw_data; /* zero for NOBITS */ + u32 pointer_to_raw_data; + u32 pointer_to_relocations; + u16 number_of_relocations; + u32 characteristics; /* IMAGE_SCN_* | ALIGN nibble */ + + /* Planning state. */ + u32 align; /* in bytes, power of two */ + u32 obj_sec; /* originating ObjSecId */ + int is_nobits; + const Buf* obj_bytes; /* NULL when nobits */ + u8* reloc_bytes; /* arena-allocated, nreloc * 10 bytes */ + ObjGroupId group_id; /* OBJ_GROUP_NONE if not in a group */ +} CSec; + +/* ---- emit ---- */ + +static u32 log2_align(u32 a) { + u32 r = 0; + while ((1u << r) < a) ++r; + return r; +} + +/* Map cfree section flags/sem to IMAGE_SCN_* Characteristics, leaving + * the alignment nibble for the caller to OR in. */ +static u32 sec_characteristics(const Section* s, int in_group) { + u32 r = 0; + int is_bss = (s->kind == SEC_BSS) || (s->sem == SSEM_NOBITS); + if (s->flags & SF_EXEC) { + r |= IMAGE_SCN_CNT_CODE | IMAGE_SCN_MEM_EXECUTE; + } else if (is_bss) { + r |= IMAGE_SCN_CNT_UNINITIALIZED_DATA; + } else if (s->flags & SF_WRITE) { + r |= IMAGE_SCN_CNT_INITIALIZED_DATA; + } else if (s->flags & SF_ALLOC) { + /* Read-only allocated data (.rdata). */ + r |= IMAGE_SCN_CNT_INITIALIZED_DATA; + } + if (s->flags & SF_ALLOC) r |= IMAGE_SCN_MEM_READ; + if (s->flags & SF_WRITE) r |= IMAGE_SCN_MEM_WRITE; + if (in_group) r |= IMAGE_SCN_LNK_COMDAT; + /* When a reader stashed format-specific flag bits on a COFF-origin + * section, OR them back in here. ext_type carries the raw + * Characteristics value (or zero if no override); ext_flags is a + * sibling bag for any bits the canonical mapping above would lose. */ + if (s->ext_kind == OBJ_EXT_COFF) { + if (s->ext_type) { + /* Preserve the raw characteristics verbatim — overrides the + * canonical mapping. Keeps round-trip byte-stable for sections + * carrying CNT_INFO / LNK_REMOVE / MEM_DISCARDABLE / etc. */ + r = s->ext_type & ~IMAGE_SCN_ALIGN_MASK; + } + r |= s->ext_flags; + } + return r; +} + +/* Append `len` bytes of `s` followed by a single NUL to `b`, returning + * the offset at which `s` was placed. Dedupe linearly — strtabs are + * small enough that this is fine without a hash table, and the + * dedupe matches what binutils / llvm-objcopy emit. Mirror of the + * helper in elf_emit. */ +static u32 strtab_add(Buf* b, const char* s, u32 len) { + if (len == 0) return 0; + u32 total = buf_pos(b); + if (total > len) { + u8 stack[256]; + u8* tmp = + total <= sizeof stack ? stack : (u8*)b->heap->alloc(b->heap, total, 1); + if (tmp) { + buf_flatten(b, tmp); + /* Skip the first 4 bytes (the size-prefix placeholder) when + * searching for matches. */ + u32 start = COFF_STRTAB_SIZE_FIELD_BYTES; + if (total > start + len) { + for (u32 i = start; i + len < total; ++i) { + if (tmp[i + len] == 0 && memcmp(tmp + i, s, len) == 0) { + if (tmp != stack) b->heap->free(b->heap, tmp, total); + return i; + } + } + } + if (tmp != stack) b->heap->free(b->heap, tmp, total); + } + } + u32 off = total; + buf_write(b, s, len); + { + u8 z = 0; + buf_write(b, &z, 1); + } + return off; +} + +/* Encode an 8-byte Name field. If the name fits in 8 bytes, copy + * verbatim and zero-pad. Otherwise allocate the name in `strtab` and + * write "/<decimal-offset>" (NUL-padded to 8 bytes). */ +static void encode_name8(char out[8], const char* name, u32 nlen, Buf* strtab) { + memset(out, 0, 8); + if (nlen <= 8) { + if (nlen) memcpy(out, name, nlen); + return; + } + u32 off = strtab_add(strtab, name, nlen); + /* "/<decimal-offset>" — up to 7 decimal digits leaves room for the + * leading slash within 8 bytes. COFF .obj strtabs are < 1 MiB in + * practice, so 7 digits is plenty. */ + char tmp[16]; + int n = 0; + tmp[n++] = '/'; + /* Decimal-format off into tmp+1. */ + char dig[12]; + int d = 0; + u32 v = off; + if (v == 0) { + dig[d++] = '0'; + } else { + while (v) { + dig[d++] = (char)('0' + (v % 10u)); + v /= 10u; + } + } + while (d > 0 && n < (int)sizeof tmp) tmp[n++] = dig[--d]; + if (n > 8) n = 8; + memcpy(out, tmp, (size_t)n); +} + +/* Write one 18-byte IMAGE_SYMBOL record into `dst`. */ +static void wr_sym(u8* dst, const char ShortName[8], u32 Zeroes, u32 Offset, + u32 Value, i16 SectionNumber, u16 Type, u8 StorageClass, + u8 NumberOfAuxSymbols) { + if (Zeroes == 0 && Offset != 0) { + /* LongName form: 4 zero bytes then 4-byte LE strtab offset. */ + memset(dst, 0, 4); + wr_u32_le(dst + 4, Offset); + } else { + memcpy(dst, ShortName, 8); + } + wr_u32_le(dst + 8, Value); + wr_u16_le(dst + 12, (u16)SectionNumber); + wr_u16_le(dst + 14, Type); + dst[16] = StorageClass; + dst[17] = NumberOfAuxSymbols; +} + +/* Write a section-definition aux record (18 bytes). */ +static void wr_aux_secdef(u8* dst, u32 Length, u16 NumberOfRelocations, + u16 NumberOfLinenumbers, u32 CheckSum, u16 Number, + u8 Selection) { + wr_u32_le(dst + 0, Length); + wr_u16_le(dst + 4, NumberOfRelocations); + wr_u16_le(dst + 6, NumberOfLinenumbers); + wr_u32_le(dst + 8, CheckSum); + wr_u16_le(dst + 12, Number); + dst[14] = Selection; + dst[15] = 0; + dst[16] = 0; + dst[17] = 0; +} + +/* Write a weak-externals aux record (18 bytes). */ +static void wr_aux_weak(u8* dst, u32 TagIndex, u32 Characteristics) { + wr_u32_le(dst + 0, TagIndex); + wr_u32_le(dst + 4, Characteristics); + memset(dst + 8, 0, 10); +} + +/* Look up the pool-interned string for a Sym. */ +static const char* sym_to_str(Compiler* c, Sym n, u32* len_out) { + Slice sl = pool_slice(c->global, n); + const char* s = sl.s; + if (!s) { + *len_out = 0; + return ""; + } + *len_out = (u32)sl.len; + return s; +} + +void emit_coff(Compiler* c, ObjBuilder* ob, Writer* w) { + Heap* h = (Heap*)c->ctx->heap; + + /* Tombstone sweep — see obj_sweep_dead. */ + obj_sweep_dead(ob); + + /* ---- target validation ----------------------------------------- */ + const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_COFF); + const ObjCoffArchOps* coff = + fmt && fmt->coff_arch ? fmt->coff_arch(c->target.arch) : NULL; + if (!coff || !coff->reloc_to) { + compiler_panic(c, no_loc(), "emit_coff: unsupported target arch %u", + (u32)c->target.arch); + } + u16 machine = coff->machine; + u32 (*reloc_to)(u32) = coff->reloc_to; + if (c->target.big_endian) { + compiler_panic(c, no_loc(), "emit_coff: big-endian COFF not supported"); + } + if (c->target.ptr_size != 8) { + compiler_panic(c, no_loc(), "emit_coff: ptr_size %u (expected 8)", + (u32)c->target.ptr_size); + } + + /* ---- pass 1: plan sections ------------------------------------- */ + u32 nobjsec = obj_section_count(ob); + CSec* secs = arena_zarray(c->scratch, CSec, nobjsec ? nobjsec : 1); + u32* obj_to_coff = arena_zarray(c->scratch, u32, nobjsec ? nobjsec : 1); + u32 nsecs = 0; + + /* String table — leading 4-byte size placeholder. Real strings start + * at offset 4. */ + Buf strtab; + buf_init(&strtab, h); + { + u8 zero4[COFF_STRTAB_SIZE_FIELD_BYTES] = {0, 0, 0, 0}; + buf_write(&strtab, zero4, COFF_STRTAB_SIZE_FIELD_BYTES); + } + + for (u32 i = 1; i < nobjsec; ++i) { + const Section* s = obj_section_get(ob, i); + if (s->removed) continue; + /* Skip ELF-style synthetic sections (a reader from another format + * may have surfaced them) — COFF stores symtab/strtab/relocs + * out-of-band, not as named sections. */ + if (s->sem == SSEM_SYMTAB || s->sem == SSEM_STRTAB || s->sem == SSEM_RELA || + s->sem == SSEM_REL || s->sem == SSEM_GROUP) { + continue; + } + + CSec* cs = &secs[nsecs]; + u32 nlen; + const char* nm = sym_to_str(c, s->name, &nlen); + encode_name8(cs->name8, nm, nlen, &strtab); + + cs->obj_sec = i; + cs->group_id = s->group_id; + cs->align = s->align ? s->align : 1; + + int in_group = (s->group_id != OBJ_GROUP_NONE); + u32 ch = sec_characteristics(s, in_group); + /* Alignment lives in bits 20..23. Cap at log2(8192)=13 -> nibble + * value 14 (IMAGE_SCN_ALIGN_8192BYTES). */ + u32 lg = log2_align(cs->align); + if (lg > 13) lg = 13; + ch &= ~IMAGE_SCN_ALIGN_MASK; + ch |= IMAGE_SCN_ALIGN_FROM_LOG2(lg); + cs->characteristics = ch; + + if (s->sem == SSEM_NOBITS || s->kind == SEC_BSS) { + cs->is_nobits = 1; + cs->virtual_size = s->bss_size; + cs->size_of_raw_data = 0; + cs->obj_bytes = NULL; + } else { + cs->is_nobits = 0; + cs->virtual_size = 0; + cs->size_of_raw_data = s->bytes.total; + cs->obj_bytes = &s->bytes; + } + + obj_to_coff[i] = nsecs + 1; /* 1-based; matches SectionNumber. */ + nsecs++; + } + + /* ---- pass 2: count and assign per-section reloc counts --------- */ + /* COFF stores NumberOfRelocations as u16; sections with > 65535 + * relocs use the IMAGE_SCN_LNK_NRELOC_OVFL extension which we don't + * implement in v1. Panic if any single section exceeds the limit. */ + u32 total_relocs = obj_reloc_total(ob); + for (u32 ci = 0; ci < nsecs; ++ci) { + CSec* cs = &secs[ci]; + u32 nr = obj_reloc_count(ob, cs->obj_sec); + if (nr > 0xFFFFu) { + compiler_panic(c, no_loc(), + "emit_coff: section %u has %u relocs (max 65535)", + (u32)cs->obj_sec, nr); + } + cs->number_of_relocations = (u16)nr; + } + + /* ---- pass 3: build the symbol table ---------------------------- */ + /* Count ObjSyms (incl. tombstoned — we'll skip those when emitting). */ + u32 nobjsym = 0; + { + ObjSymIter* it = obj_symiter_new(ob); + ObjSymEntry e; + while (obj_symiter_next(it, &e)) ++nobjsym; + obj_symiter_free(it); + } + + /* Upper bound on symbol-table records (including aux slots): + * - 2 records per section symbol (primary + 1 aux secdef) + * - 2 records per ObjSym (primary + up to 1 weak aux) + * - +2 spare for safety + * Worst case is generous; we trim by tracking nrecords as we emit. */ + u32 max_records = 2u * nsecs + 2u * nobjsym + 4u; + u8* symtab = + (u8*)arena_zarray(c->scratch, u8, (size_t)COFF_SYMBOL_SIZE * max_records); + u32 nrecords = 0; + + /* obj_id -> COFF symbol index (including aux slots). Index 0 is + * reserved as "none" in our internal map (a real COFF symbol may + * legitimately live at index 0, but no ObjSym ever maps there since + * we never put OBJ_SYM_NONE through). */ + u32* sym_to_coff = arena_zarray(c->scratch, u32, nobjsym + 2); + + /* Section symbols first — one STATIC per kept obj section, each + * followed by a SECTION DEFINITION aux. Reloc-against-section in + * other tools' output uses these; emitting them unconditionally + * matches what clang / mingw emit and gives readers a stable target. */ + u32* secsym_index = arena_zarray(c->scratch, u32, nsecs + 1); + for (u32 ci = 0; ci < nsecs; ++ci) { + CSec* cs = &secs[ci]; + char short_name[8]; + /* The section symbol's name is the section's own name (truncated + * to 8 bytes — section symbols never use the strtab spill form in + * MSVC/clang output). */ + memcpy(short_name, cs->name8, 8); + + u8* slot = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE; + wr_sym(slot, short_name, /*Zeroes*/ 1, /*Offset*/ 0, + /*Value*/ 0, + /*SectionNumber*/ (i16)(ci + 1), + /*Type*/ IMAGE_SYM_TYPE_NULL, + /*StorageClass*/ IMAGE_SYM_CLASS_STATIC, + /*NumberOfAuxSymbols*/ 1); + secsym_index[ci] = nrecords; + nrecords++; + + /* Section-definition aux. For COMDAT members we encode the + * Selection from the group; default to SELECT_ANY which is what + * gcc/clang emit unless the user requests a specific selection + * mode. The associated-section Number is left at 0 (cfree does + * not produce associative-COMDAT chains today). */ + u8 selection = 0; + if (cs->group_id != OBJ_GROUP_NONE) { + const ObjGroup* g = obj_group_get(ob, cs->group_id); + if (g && !g->removed) { + selection = g->flags ? (u8)IMAGE_COMDAT_SELECT_ANY + : (u8)IMAGE_COMDAT_SELECT_ANY; + } + } + u8* aux = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE; + wr_aux_secdef(aux, /*Length*/ cs->size_of_raw_data, + /*NumberOfRelocations*/ cs->number_of_relocations, + /*NumberOfLinenumbers*/ 0, + /*CheckSum*/ 0, + /*Number*/ 0, + /*Selection*/ selection); + nrecords++; + } + + /* File / regular symbols. */ + { + ObjSymIter* it = obj_symiter_new(ob); + ObjSymEntry e; + while (obj_symiter_next(it, &e)) { + const ObjSym* s = e.sym; + if (s->removed) continue; + if (s->kind == SK_IFUNC) { + compiler_panic(c, no_loc(), + "emit_coff: SK_IFUNC has no PE/COFF representation"); + } + /* Don't re-emit SK_SECTION symbols — section symbols are + * synthesized above. Map any input-side SK_SECTION onto the + * already-emitted one. */ + if (s->kind == SK_SECTION) { + if (s->section_id && s->section_id < nobjsec) { + u32 ci = obj_to_coff[s->section_id]; + if (ci) sym_to_coff[e.id] = secsym_index[ci - 1]; + } + continue; + } + + u32 nlen; + const char* nm = sym_to_str(c, s->name, &nlen); + + if (s->kind == SK_FILE) { + /* File symbol: name ".file" (short), section IMAGE_SYM_DEBUG, + * storage class FILE, followed by aux records carrying the + * NUL-padded file path (18 bytes per aux). */ + u32 file_len = nlen; + u32 naux = + file_len ? (file_len + COFF_AUX_FILE_SIZE - 1u) / COFF_AUX_FILE_SIZE + : 1u; + char short_name[8] = {'.', 'f', 'i', 'l', 'e', 0, 0, 0}; + u8* slot = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE; + wr_sym(slot, short_name, 1, 0, /*Value*/ 0, + /*SectionNumber*/ (i16)IMAGE_SYM_DEBUG, + /*Type*/ IMAGE_SYM_TYPE_NULL, + /*StorageClass*/ IMAGE_SYM_CLASS_FILE, + /*NumberOfAuxSymbols*/ (u8)naux); + sym_to_coff[e.id] = nrecords; + nrecords++; + for (u32 a = 0; a < naux; ++a) { + u8* aux = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE; + memset(aux, 0, COFF_AUX_FILE_SIZE); + u32 off = a * COFF_AUX_FILE_SIZE; + u32 copy = file_len > off ? file_len - off : 0; + if (copy > COFF_AUX_FILE_SIZE) copy = COFF_AUX_FILE_SIZE; + if (copy) memcpy(aux, nm + off, copy); + nrecords++; + } + continue; + } + + /* Regular symbol. */ + char short_name[8]; + u32 zeroes = 1, offset = 0; + memset(short_name, 0, 8); + if (nlen <= 8) { + if (nlen) memcpy(short_name, nm, nlen); + } else { + zeroes = 0; + offset = strtab_add(&strtab, nm, nlen); + } + + i16 section_number = 0; + u32 value = 0; + u8 storage_class = IMAGE_SYM_CLASS_NULL; + u16 type = IMAGE_SYM_TYPE_NULL; + u8 naux = 0; + int emit_weak_aux = 0; + + switch (s->kind) { + case SK_ABS: + section_number = (i16)IMAGE_SYM_ABSOLUTE; + value = (u32)s->value; + break; + case SK_COMMON: + /* COFF lacks a per-common alignment field; encode size in + * Value with SectionNumber=UNDEFINED and rely on the linker + * to pick a default alignment. (cfree's frontend uses + * COMMON only via __attribute__((common)) which is rare on + * PE/COFF targets.) */ + section_number = (i16)IMAGE_SYM_UNDEFINED; + value = (u32)s->size; + break; + default: + if (s->section_id == OBJ_SEC_NONE) { + section_number = (i16)IMAGE_SYM_UNDEFINED; + value = 0; + } else if (s->section_id < nobjsec && obj_to_coff[s->section_id]) { + section_number = (i16)obj_to_coff[s->section_id]; + value = (u32)s->value; + } else { + section_number = (i16)IMAGE_SYM_UNDEFINED; + value = 0; + } + break; + } + + if (s->kind == SK_FUNC) type = (u16)COFF_SYM_TYPE_FUNCTION; + + switch (s->bind) { + case SB_LOCAL: + storage_class = IMAGE_SYM_CLASS_STATIC; + break; + case SB_GLOBAL: + storage_class = IMAGE_SYM_CLASS_EXTERNAL; + break; + case SB_WEAK: + /* mingw / clang spell weak as EXTERNAL with a WeakExternal + * aux that points at the fallback symbol. cfree's obj layer + * doesn't carry a separate fallback symbol today, so we emit + * a self-referential weak aux (TagIndex=0) which the linker + * treats as "weak, no fallback" — equivalent to ELF STB_WEAK. */ + storage_class = IMAGE_SYM_CLASS_WEAK_EXTERNAL; + emit_weak_aux = 1; + naux = 1; + break; + default: + storage_class = IMAGE_SYM_CLASS_STATIC; + break; + } + + u8* slot = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE; + wr_sym(slot, short_name, zeroes, offset, value, section_number, type, + storage_class, naux); + sym_to_coff[e.id] = nrecords; + nrecords++; + if (emit_weak_aux) { + u8* aux = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE; + wr_aux_weak(aux, /*TagIndex*/ 0, + /*Characteristics*/ IMAGE_WEAK_EXTERN_SEARCH_LIBRARY); + nrecords++; + } + } + obj_symiter_free(it); + } + + /* ---- pass 4: build per-section relocation tables --------------- */ + for (u32 ci = 0; ci < nsecs; ++ci) { + CSec* cs = &secs[ci]; + u32 nr = cs->number_of_relocations; + if (!nr) continue; + u8* buf = (u8*)arena_alloc(c->scratch, (size_t)COFF_RELOC_SIZE * nr, + _Alignof(u32)); + u32 j = 0; + for (u32 ri = 0; ri < total_relocs; ++ri) { + const Reloc* r = obj_reloc_at(ob, ri); + if (r->removed) continue; + if (r->section_id != cs->obj_sec) continue; + if (r->sym == OBJ_SYM_NONE) { + compiler_panic(c, no_loc(), + "emit_coff: reloc without symbol not supported " + "(sec=%u offset=%u kind=%u)", + (u32)r->section_id, (u32)r->offset, (u32)r->kind); + } + if (r->has_explicit_addend && r->addend != 0 && + !coff_rel32_absorbs_minus4(c->target.arch, (RelocKind)r->kind, + r->addend)) { + /* v1 limitation: COFF carries the addend in the patched bytes, + * and we don't currently mutate the obj's section bytes to + * encode a separate explicit addend. cfree's MCEmitter writes + * the addend inline for COFF targets, so this branch only + * fires for inputs synthesized by external tools. */ + compiler_panic(c, no_loc(), + "emit_coff: explicit nonzero addend not supported " + "(sec=%u offset=%u kind=%u addend=%lld)", + (u32)r->section_id, (u32)r->offset, (u32)r->kind, + (long long)r->addend); + } + u32 wire = reloc_to(r->kind); + /* Both arch translators use 0 (IMAGE_REL_*_ABSOLUTE) as the + * unsupported-input sentinel; treat that as a panic unless the + * input really is R_NONE. */ + if (wire == 0 && r->kind != R_NONE) { + compiler_panic(c, no_loc(), + "emit_coff: unsupported relocation kind %u for arch %u", + (u32)r->kind, (u32)c->target.arch); + } + u32 sym_idx = sym_to_coff[r->sym]; + u8* slot = buf + (size_t)j * COFF_RELOC_SIZE; + wr_u32_le(slot + 0, r->offset); + wr_u32_le(slot + 4, sym_idx); + wr_u16_le(slot + 8, (u16)wire); + ++j; + } + cs->reloc_bytes = buf; + /* If a tombstoned reloc was skipped between count and emit, j may + * be less than nr; trust the latter count for the wire field. */ + if (j != nr) cs->number_of_relocations = (u16)j; + } + + /* ---- pass 5: assign file offsets ------------------------------- */ + /* Layout: + * [file header] [section headers] [per-section: bytes, relocs]* + * [symbol table] [string table] */ + u64 cur = + (u64)COFF_FILE_HEADER_SIZE + (u64)COFF_SECTION_HEADER_SIZE * (u64)nsecs; + + for (u32 ci = 0; ci < nsecs; ++ci) { + CSec* cs = &secs[ci]; + /* Raw data offset. NOBITS contributes nothing on disk. */ + if (cs->is_nobits || cs->size_of_raw_data == 0) { + cs->pointer_to_raw_data = 0; + } else { + cur = ALIGN_UP(cur, (u64)cs->align); + cs->pointer_to_raw_data = (u32)cur; + cur += cs->size_of_raw_data; + } + /* Reloc table. COFF doesn't mandate alignment for the reloc array, + * but llvm and binutils emit them naturally byte-packed; we 4-align + * for tidiness. */ + if (cs->number_of_relocations) { + cur = ALIGN_UP(cur, (u64)4); + cs->pointer_to_relocations = (u32)cur; + cur += (u64)cs->number_of_relocations * COFF_RELOC_SIZE; + } else { + cs->pointer_to_relocations = 0; + } + } + + cur = ALIGN_UP(cur, (u64)4); + u64 symtab_off = cur; + cur += (u64)nrecords * COFF_SYMBOL_SIZE; + + /* String table starts immediately after the symtab. Patch the 4-byte + * size prefix (inclusive). */ + u32 strtab_size = buf_pos(&strtab); + /* The size field is part of the on-disk strtab and is the total + * inclusive byte count. Patch it now. */ + { + u8 sz_le[4]; + wr_u32_le(sz_le, strtab_size); + /* Buf doesn't expose in-place patch; flatten, patch, re-emit when + * we write. Just remember the value. */ + (void)sz_le; + } + u64 strtab_off = cur; + cur += strtab_size; + + /* ---- pass 6: write the file ------------------------------------ */ + cfree_writer_seek(w, 0); + + /* IMAGE_FILE_HEADER */ + coff_wr_u16(w, machine); + coff_wr_u16(w, (u16)nsecs); + coff_wr_u32(w, 0); /* TimeDateStamp: reproducible */ + coff_wr_u32(w, (u32)symtab_off); + coff_wr_u32(w, nrecords); + coff_wr_u16(w, 0); /* SizeOfOptionalHeader: 0 for .obj */ + coff_wr_u16(w, IMAGE_FILE_LARGE_ADDRESS_AWARE); + + /* Section headers — one 40-byte block immediately after the file + * header. */ + for (u32 ci = 0; ci < nsecs; ++ci) { + const CSec* cs = &secs[ci]; + cfree_writer_write(w, cs->name8, 8); + coff_wr_u32(w, cs->virtual_size); + coff_wr_u32(w, 0); /* VirtualAddress: 0 for .obj */ + coff_wr_u32(w, cs->size_of_raw_data); + coff_wr_u32(w, cs->pointer_to_raw_data); + coff_wr_u32(w, cs->pointer_to_relocations); + coff_wr_u32(w, 0); /* PointerToLinenumbers: 0 */ + coff_wr_u16(w, cs->number_of_relocations); + coff_wr_u16(w, 0); /* NumberOfLinenumbers: 0 */ + coff_wr_u32(w, cs->characteristics); + } + + /* Section bytes + relocs (interleaved). */ + for (u32 ci = 0; ci < nsecs; ++ci) { + const CSec* cs = &secs[ci]; + if (!cs->is_nobits && cs->size_of_raw_data && cs->obj_bytes) { + cfree_writer_seek(w, cs->pointer_to_raw_data); + u32 sz = cs->obj_bytes->total; + u8* tmp = (u8*)h->alloc(h, sz ? sz : 1, 1); + if (sz) buf_flatten(cs->obj_bytes, tmp); + cfree_writer_write(w, tmp, sz); + h->free(h, tmp, sz ? sz : 1); + } + if (cs->number_of_relocations && cs->reloc_bytes) { + cfree_writer_seek(w, cs->pointer_to_relocations); + cfree_writer_write(w, cs->reloc_bytes, + (size_t)cs->number_of_relocations * COFF_RELOC_SIZE); + } + } + + /* Symbol table. */ + cfree_writer_seek(w, symtab_off); + cfree_writer_write(w, symtab, (size_t)nrecords * COFF_SYMBOL_SIZE); + + /* String table: 4-byte total size (inclusive) followed by the body. + * `strtab` was initialized with 4 placeholder zero bytes; rewrite + * them with the real size before flushing. */ + { + u8* flat = (u8*)arena_alloc(c->scratch, strtab_size ? strtab_size : 1, 1); + if (strtab_size) buf_flatten(&strtab, flat); + /* Patch the 4-byte size prefix in place. */ + if (strtab_size >= COFF_STRTAB_SIZE_FIELD_BYTES) { + wr_u32_le(flat, strtab_size); + } + cfree_writer_seek(w, strtab_off); + cfree_writer_write(w, flat, strtab_size); + } + buf_fini(&strtab); +} diff --git a/src/obj/coff/link.c b/src/obj/coff/link.c @@ -0,0 +1,1731 @@ +/* link_emit_coff: write a PE32+ MH_EXECUTABLE-style image to the + * caller-provided Writer. + * + * Phase 3.1 deliverable per doc/WINDOWS.md: skeleton + base-reloc + * handling for the four standard PE sections. Import-table synthesis + * (.idata / IAT) lands in Phase 3.2; per-arch IAT stub bytes in 3.3; + * TLS directory in 3.5; debug directory in 3.6 — those code paths + * panic loudly here so the strict-by-default posture surfaces them. + * + * File layout (in write order): + * + * [DOS stub IMAGE_DOS_HEADER] -- 64 bytes; e_lfanew=0x40 + * [PE signature "PE\0\0"] -- 4 bytes + * [IMAGE_FILE_HEADER] -- 20 bytes + * [IMAGE_OPTIONAL_HEADER64] -- 240 bytes (PE32+) + * [IMAGE_SECTION_HEADER * nsec] -- 40 bytes each + * [pad to FileAlignment] + * [.text bytes, padded to FileAlignment] + * [.rdata bytes, padded to FileAlignment] + * [.data bytes, padded to FileAlignment] + * [.reloc bytes, padded to FileAlignment] + * + * .bss is uninitialized — it has a section header (with VirtualSize) + * but no file bytes and PointerToRawData=0. + * + * RVAs follow SectionAlignment (0x1000); FileAlignment is 0x200; the + * first section starts at RVA 0x1000 (right after the headers map). + * ImageBase is the Win64 convention 0x140000000. + * + * Reloc strategy. The link layout pass has already placed every kept + * input section into img->sections / img->segments under the ELF/Mach-O + * coordinate system (image-relative vaddrs, often packed by permission + * bucket). COFF wants a different packing — the four standard + * sections at SectionAlignment-aligned RVAs — so this writer re-derives + * per-input-section vaddrs from scratch and shifts each LinkSection / + * symbol / LinkRelocApply by its section's per-section delta before + * applying relocations. link_emit_macho takes the same tack for its + * __DATA_CONST splits; the ELF writer leaves vaddrs alone because the + * link layout already matches ELF's PT_LOAD shape. */ + +#include "link/link.h" + +#include <stdlib.h> +#include <string.h> + +#include "core/core.h" +#include "core/heap.h" +#include "core/pool.h" +#include "core/slice.h" +#include "core/util.h" +#include "core/vec.h" +#include "link/link_internal.h" +#include "obj/coff/coff.h" +#include "obj/format.h" + +/* ---- .idata layout constants ---- + * + * Per doc/WINDOWS.md §3.2: the .idata content is a concatenation of an + * IMAGE_IMPORT_DESCRIPTOR table (NULL-terminated), one ILT per DLL + * (each NULL-terminated u64 array), one IAT per DLL (same shape), + * a hint/name table, and a DLL-name string pool. Each block is + * pointer-sized aligned within the section. AArch64 import thunks use + * PAGEOFFSET_12L for 64-bit ILT/IAT slots, so those sub-blocks must be + * 8-byte aligned. */ +#define PE_IDATA_BLOCK_ALIGN 8u +/* Hint field on IMAGE_IMPORT_BY_NAME records. cfree never has a real + * hint (the OS loader doesn't need one to do the bsearch on the DLL's + * export name table), so 0 is the canonical "no hint" value. */ +#define PE_IMPORT_HINT_NONE 0u + +static SrcLoc no_loc(void) { + SrcLoc l = {0, 0, 0}; + return l; +} + +/* ---- PE/Win64 layout constants ---- + * + * Centralised here so the wire-format numbers in this TU stay named + * (and the magic-numbers rule in CLAUDE.md is honoured). Values match + * the PE/COFF spec + Win64 conventions; mingw-w64's ld defaults agree. */ +#define PE_IMAGE_BASE LINK_PE_IMAGE_BASE +#define PE_SECTION_ALIGNMENT 0x1000u +#define PE_FILE_ALIGNMENT 0x200u +#define PE_FIRST_SECTION_RVA 0x1000u +#define PE_DOS_E_LFANEW 0x40u +#define PE_NUM_DATA_DIRS COFF_NUM_DATA_DIRECTORIES +#define PE_OPT_HDR_SIZE COFF_OPT_HDR64_SIZE +#define PE_LINKER_MAJOR 0u +#define PE_LINKER_MINOR 1u +#define PE_OS_MAJOR 6u /* Windows Vista+ — mingw default */ +#define PE_OS_MINOR 0u +#define PE_SUBSYS_MAJOR 6u +#define PE_SUBSYS_MINOR 0u +#define PE_STACK_RESERVE 0x100000ULL +#define PE_STACK_COMMIT 0x1000ULL +#define PE_HEAP_RESERVE 0x100000ULL +#define PE_HEAP_COMMIT 0x1000ULL +#define PE_DLL_CHARS \ + (IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA | \ + IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE | \ + IMAGE_DLLCHARACTERISTICS_NX_COMPAT | \ + IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE) + +/* PE32+ DOS-stub-to-PE-signature offsets (manual, since we marshal + * field-by-field rather than memcpy'ing the packed struct). */ +#define PE_DOS_HDR_SIZE COFF_DOS_HEADER_SIZE +#define PE_SIG_SIZE 4u +#define PE_FILE_HDR_SIZE COFF_FILE_HEADER_SIZE +#define PE_SECTION_HDR_SIZE COFF_SECTION_HEADER_SIZE + +/* Standard PE output buckets, plus .idata (import directory) and + * .reloc — both synthesised here rather than copied from input + * sections. Order matters: it's the on-image RVA order. */ +typedef enum CoffBucket { + COFF_BUCKET_TEXT = 0, + COFF_BUCKET_RDATA = 1, + COFF_BUCKET_IDATA = 2, + COFF_BUCKET_DATA = 3, + COFF_BUCKET_TLS = 4, + COFF_BUCKET_BSS = 5, + COFF_BUCKET_RELOC = 6, + COFF_NBUCKETS = 7, +} CoffBucket; + +/* IMAGE_TLS_DIRECTORY64 wire size: u64*4 + u32*2 = 40 bytes. */ +#define COFF_TLS_DIRECTORY64_SIZE 40u +/* Byte offsets of the four u64 VA fields within IMAGE_TLS_DIRECTORY64 + * — they need base relocations so ASLR can fix them up. */ +#define COFF_TLSDIR_OFF_START_ADDR 0u +#define COFF_TLSDIR_OFF_END_ADDR 8u +#define COFF_TLSDIR_OFF_INDEX_ADDR 16u +#define COFF_TLSDIR_OFF_CALLBACKS 24u + +typedef struct CoffSection { + const char* name; /* short ASCII; <= 8 bytes including NUL pad */ + u32 characteristics; + u8* bytes; /* NULL for .bss / .reloc-before-build */ + u32 size; /* VirtualSize (real bytes; for .bss, mem size) */ + u32 size_raw; /* SizeOfRawData (file size, FileAlignment-padded) */ + u32 rva; /* VirtualAddress in image */ + u32 file_offset; /* PointerToRawData; 0 for .bss */ + u8 in_image; /* 1 if this bucket is emitted as a section */ + u8 has_file_bytes; /* 0 for .bss */ + u8 pad[2]; +} CoffSection; + +/* ---- byte writer helpers ---- */ + +static void coff_write_zeroes(Writer* w, u64 n) { + static const u8 zeroes[256] = {0}; + while (n) { + u64 step = n > sizeof(zeroes) ? sizeof(zeroes) : n; + cfree_writer_write(w, zeroes, (size_t)step); + n -= step; + } +} + +/* Return the COFF bucket for a kept LinkSection. SF_TLS sections route + * into the dedicated .tls bucket so SECREL relocations from TLS access + * code resolve against the merged TLS image, not against .data. + * Everything else partitions on SF_EXEC / SF_WRITE plus the SSEM_NOBITS + * bit for .bss. */ +static CoffBucket coff_bucket_for(const LinkSection* ls) { + if (ls->flags & SF_EXEC) return COFF_BUCKET_TEXT; + if (ls->flags & SF_TLS) return COFF_BUCKET_TLS; + if (ls->sem == SSEM_NOBITS) return COFF_BUCKET_BSS; + if (ls->flags & SF_WRITE) return COFF_BUCKET_DATA; + return COFF_BUCKET_RDATA; +} + +/* True for relocation kinds that need an entry in .reloc so the OS + * loader can patch the site after ASLR picks a runtime ImageBase. + * PC-relative fixups don't need base-relocs — the displacement is + * load-invariant. */ +static int coff_reloc_needs_base_reloc(RelocKind k) { + return k == R_ABS64 || k == R_ABS32; +} + +/* Look up the LinkSection whose [vaddr, vaddr+size] range covers the + * given image-relative address `v`, or return NULL. Used to attribute + * symbol vaddrs to a containing section so we can apply per-section + * vaddr deltas after re-laying out for PE. */ +static const LinkSection* coff_section_at(const LinkImage* img, u64 v) { + u32 i; + for (i = 0; i < img->nsections; ++i) { + const LinkSection* ls = &img->sections[i]; + if (v >= ls->vaddr && v <= ls->vaddr + ls->size) return ls; + } + return NULL; +} + +/* Per-input-section delta map. Indexed by `LinkSection.id - 1`. + * Populated by coff_build_buckets. Consumed by every subsequent pass + * that needs to translate input-coordinate offsets (the world that + * img->sections / img->relocs live in) into PE-coordinate ones (where + * the writer plants bytes). delta is stored explicitly so callers + * avoid recomputing (new_rva + bucket.rva - old_vaddr) for every + * LinkRelocApply whose link_section_id points at the section. */ +typedef struct CoffSecMap { + u32 new_rva; /* image-relative RVA after PE relayout */ + u32 new_file_off; /* file offset of the patched byte */ + i64 delta; /* new_rva - old_vaddr */ + u8 bucket; + u8 pad[3]; +} CoffSecMap; + +/* TLS directory placement state. Populated when at least one SF_TLS + * section survives dead-strip; consumed by the optional-header writer, + * the .reloc builder (base-relocs for the four absolute VA fields), + * and the .rdata emit pass that writes the final 40-byte record. */ +typedef struct CoffTlsLayout { + int present; /* 1 iff at least one TLS section was kept */ + u32 dir_rdata_off; /* byte offset of the IMAGE_TLS_DIRECTORY64 within .rdata + */ + u32 tls_size; /* size of the merged .tls bucket */ + LinkSymId tls_index_sym; /* resolved _tls_index LinkSymbol */ + LinkSymId callbacks_sym; /* __xl_a when mingw's TLS callbacks are linked */ + u64 callbacks_addend; /* mingw points past the leading NULL sentinel */ +} CoffTlsLayout; + +static LinkSymId coff_find_sym(LinkImage* img, const char* name) { + Sym sym = pool_intern_slice(img->c->global, slice_from_cstr(name)); + u32 n = LinkSyms_count(&img->syms); + u32 i; + for (i = 0; i < n; ++i) { + const LinkSymbol* s = LinkSyms_at(&img->syms, i); + if (s->name == sym) return (LinkSymId)(i + 1); + } + return LINK_SYM_NONE; +} + +/* Locate _tls_index by name in the resolved symbol table. mingw's + * libmingwex defines it (as part of tlsmcrt); without a CRT the link + * fails here with a clear message rather than producing a TLS + * directory pointing at a stale address. */ +static LinkSymId coff_find_tls_index_sym(LinkImage* img) { + return coff_find_sym(img, "_tls_index"); +} + +static const LinkSection* coff_symbol_section(const LinkImage* img, + const LinkSymbol* s) { + if (s->name) { + Slice nm_s = pool_slice(img->c->global, s->name); + const char* nm = nm_s.s; + size_t n = nm_s.len; + const char* sec_name = NULL; + if (nm && n == 6 && memcmp(nm, "__xd_a", 6) == 0) + sec_name = ".CRT$XDA"; + else if (nm && n == 6 && memcmp(nm, "__xd_z", 6) == 0) + sec_name = ".CRT$XDZ"; + else if (nm && n == 6 && memcmp(nm, "__xl_a", 6) == 0) + sec_name = ".CRT$XLA"; + else if (nm && n == 6 && memcmp(nm, "__xl_c", 6) == 0) + sec_name = ".CRT$XLC"; + else if (nm && n == 6 && memcmp(nm, "__xl_d", 6) == 0) + sec_name = ".CRT$XLD"; + else if (nm && n == 6 && memcmp(nm, "__xl_z", 6) == 0) + sec_name = ".CRT$XLZ"; + if (sec_name) { + u32 i; + for (i = 0; i < img->nsections; ++i) { + const LinkSection* ls = &img->sections[i]; + if (ls->name && + slice_eq_cstr(pool_slice(img->c->global, ls->name), sec_name)) + return ls; + } + } + } + if (s->section_id != LINK_SEC_NONE && s->section_id <= img->nsections) + return &img->sections[s->section_id - 1]; + return coff_section_at(img, s->vaddr); +} + +static u64 coff_symbol_final_va(const LinkImage* img, + const CoffSection out[COFF_NBUCKETS], + const CoffSecMap* map, LinkSymId id, + const char* what) { + const LinkSymbol* s = LinkSyms_at(&img->syms, id - 1); + if (!s->defined || s->kind == SK_ABS) { + compiler_panic(img->c, no_loc(), + "link_emit_coff: `%.*s` is not a defined section-bound " + "symbol", + SLICE_ARG(slice_from_cstr(what))); + } + const LinkSection* sec = coff_symbol_section(img, s); + if (!sec) { + compiler_panic(img->c, no_loc(), + "link_emit_coff: `%.*s` has no containing section", + SLICE_ARG(slice_from_cstr(what))); + } + u8 b = map[sec->id - 1].bucket; + return PE_IMAGE_BASE + (u64)out[b].rva + (u64)map[sec->id - 1].new_rva + + (s->vaddr - sec->vaddr); +} + +/* Reserve 40 bytes at the tail of the .rdata bucket for the + * IMAGE_TLS_DIRECTORY64 record. Records the offset for later emit and + * grows the bucket if needed. The bytes start zeroed; coff_emit_tls_dir + * fills them in once final RVAs are known. */ +static void coff_plan_tls_layout(LinkImage* img, CoffSection out[COFF_NBUCKETS], + u32* rdata_cap, CoffTlsLayout* tls) { + memset(tls, 0, sizeof(*tls)); + if (out[COFF_BUCKET_TLS].size == 0) return; + tls->present = 1; + tls->tls_size = out[COFF_BUCKET_TLS].size; + tls->tls_index_sym = coff_find_tls_index_sym(img); + if (tls->tls_index_sym == LINK_SYM_NONE) { + compiler_panic(img->c, no_loc(), + "link_emit_coff: .tls section requires `_tls_index` " + "(provided by mingw libmingwex / tlsmcrt.o) — none of " + "the linked inputs define it"); + } + /* IMAGE_TLS_DIRECTORY64 needs 8-byte alignment for its u64 fields; + * round the .rdata size up before reserving the 40-byte record. */ + tls->callbacks_sym = coff_find_sym(img, "__xl_a"); + if (tls->callbacks_sym != LINK_SYM_NONE) { + tls->callbacks_addend = 8; + } else { + tls->callbacks_sym = coff_find_sym(img, "__xl_c"); + tls->callbacks_addend = 0; + } + u32 rdata_size = (u32)ALIGN_UP((u64)out[COFF_BUCKET_RDATA].size, 8ull); + u32 need = rdata_size + COFF_TLS_DIRECTORY64_SIZE; + if (need > *rdata_cap) { + (void)VEC_GROW(img->heap, out[COFF_BUCKET_RDATA].bytes, *rdata_cap, need); + } + /* Zero any padding bytes introduced by the alignment bump and the + * directory slot itself. */ + if (rdata_size > out[COFF_BUCKET_RDATA].size) { + memset(out[COFF_BUCKET_RDATA].bytes + out[COFF_BUCKET_RDATA].size, 0, + rdata_size - out[COFF_BUCKET_RDATA].size); + } + memset(out[COFF_BUCKET_RDATA].bytes + rdata_size, 0, + COFF_TLS_DIRECTORY64_SIZE); + tls->dir_rdata_off = rdata_size; + out[COFF_BUCKET_RDATA].size = need; +} + +/* Write the IMAGE_TLS_DIRECTORY64 bytes once all bucket RVAs are + * final. Each u64 VA field gets ImageBase + RVA; the base-reloc pass + * will emit IMAGE_REL_BASED_DIR64 entries so ASLR keeps them valid. */ +static void coff_emit_tls_dir(const LinkImage* img, + const CoffSection out[COFF_NBUCKETS], + const CoffSecMap* map, const CoffTlsLayout* tls) { + if (!tls->present) return; + u64 tls_start = PE_IMAGE_BASE + (u64)out[COFF_BUCKET_TLS].rva; + u64 tls_end = tls_start + (u64)tls->tls_size; + u64 idx_vaddr = + coff_symbol_final_va(img, out, map, tls->tls_index_sym, "_tls_index"); + const char* callbacks_name = tls->callbacks_addend ? "__xl_a" : "__xl_c"; + u64 callbacks_vaddr = + tls->callbacks_sym + ? coff_symbol_final_va(img, out, map, tls->callbacks_sym, + callbacks_name) + + tls->callbacks_addend + : 0; + + u8* p = out[COFF_BUCKET_RDATA].bytes + tls->dir_rdata_off; + wr_u64_le(p + COFF_TLSDIR_OFF_START_ADDR, tls_start); + wr_u64_le(p + COFF_TLSDIR_OFF_END_ADDR, tls_end); + wr_u64_le(p + COFF_TLSDIR_OFF_INDEX_ADDR, idx_vaddr); + wr_u64_le(p + COFF_TLSDIR_OFF_CALLBACKS, callbacks_vaddr); + wr_u32_le(p + 32, 0); /* SizeOfZeroFill */ + wr_u32_le(p + 36, 0); /* Characteristics */ +} + +static void coff_define_tls_used(LinkImage* img, + const CoffSection out[COFF_NBUCKETS], + const CoffTlsLayout* tls) { + if (!tls->present) return; + if (!img->linker) return; + link_emit_boundary_sym(img->linker, img, "_tls_used", + PE_IMAGE_BASE + (u64)out[COFF_BUCKET_RDATA].rva + + (u64)tls->dir_rdata_off); +} + +/* ---- import-table synthesis (Phase 3.2) --------------------------- + * + * Per doc/WINDOWS.md §3.2: every LinkSymbol with `imported = 1` gets + * routed through an IAT slot synthesized in `.idata`. Function + * imports additionally receive a small per-arch stub in `.text` + * (`ff 25 disp32` on x64 / `adrp;ldr;br` on aa64) so a direct CALL26 + * or PC32 against the symbol lands on a stub that indirects through + * the IAT. Data imports skip the stub — the symbol's final vaddr is + * just the IAT slot vaddr, and code-gen emits a `mov rax, [slot]` + * sequence the same way it would for any other GOT-style load. + * + * cfree's COFF code-gen uses direct symbol references; there is no + * separate `__imp_<name>` LinkSymbol consulted at link time. The + * IAT-slot rewrite happens entirely by overriding the imported + * symbol's vaddr in apply_all_relocs. */ + +typedef struct CoffImport { + LinkSymId sym; /* canonical LinkSymId from img->syms */ + u32 dll_idx; /* index into CoffImportTable.dlls */ + u32 stub_off; /* offset in .text bucket (functions only) */ + u32 iat_off; /* offset in .idata IAT block */ + u32 ilt_off; /* offset in .idata ILT block */ + u32 hint_off; /* offset in .idata hint/name table */ + u8 is_func; + u8 pad[3]; +} CoffImport; + +typedef struct CoffImportDll { + Sym soname; + u32 first; /* index of first import in CoffImportTable.imports */ + u32 count; + u32 ilt_off; /* offset of this DLL's ILT block in .idata */ + u32 iat_off; /* offset of this DLL's IAT block in .idata */ + u32 name_off; /* offset of DLL name string in .idata */ +} CoffImportDll; + +typedef struct CoffImportTable { + CoffImport* imports; + u32 nimports; + u32 imports_cap; /* heap-allocation size for cleanup */ + u32 nfunc_imports; /* subset of nimports that needs a .text stub */ + CoffImportDll* dlls; + u32 ndlls; + u32 dlls_cap; /* heap-allocation size for cleanup */ + /* Offsets within .idata of the five sub-blocks. Filled in by + * coff_plan_idata_layout once nimports / ndlls is known. */ + u32 desc_off; /* always 0 — descriptors come first */ + u32 desc_size; + u32 ilt_base; + u32 ilt_total; + u32 iat_base; + u32 iat_total; + u32 hint_base; + u32 hint_total; + u32 name_base; + u32 name_total; + u32 idata_size; + /* Stub region in .text bucket. Stubs are appended after every + * input .text section has been bucketed. stub_text_off is the + * bucket-local offset of the first stub; per-import stub offsets + * are stored in CoffImport.stub_off. */ + u32 stub_text_off; + u32 stub_total; +} CoffImportTable; + +/* Sort comparator: imports grouped by DLL slot, stable on input + * order within a DLL (sort is stable enough via secondary key). */ +static int coff_import_cmp(const void* a, const void* b) { + const CoffImport* ia = (const CoffImport*)a; + const CoffImport* ib = (const CoffImport*)b; + if (ia->dll_idx < ib->dll_idx) return -1; + if (ia->dll_idx > ib->dll_idx) return 1; + /* Secondary: LinkSymId so the order is reproducible. */ + if (ia->sym < ib->sym) return -1; + if (ia->sym > ib->sym) return 1; + return 0; +} + +static const char* coff_import_lookup_name(Compiler* c, const LinkSymbol* s, + size_t* nlen_out) { + Slice nm_s = s->name ? pool_slice(c->global, s->name) : SLICE_NULL; + const char* nm = nm_s.s; + size_t nlen = nm_s.len; + static const char kImpPrefix[] = "__imp_"; + const size_t kImpPrefixLen = sizeof(kImpPrefix) - 1u; + if (nm && nlen > kImpPrefixLen && + memcmp(nm, kImpPrefix, kImpPrefixLen) == 0) { + nm += kImpPrefixLen; + nlen -= kImpPrefixLen; + } + if (nlen_out) *nlen_out = nlen; + return nm; +} + +/* True iff the import classifies as function-like. Mirrors the ELF + * `sym_is_func_import` heuristic: if the canonical kind is known + * we trust it, otherwise we default to function (which matches the + * COFF code-gen contract — direct calls are by far the common case + * and a data import wrongly stubbed would still fail loudly via the + * IAT-routed call). */ +static int coff_import_is_func(Compiler* c, const LinkSymbol* s) { + if (s->name) { + Slice nm_s = pool_slice(c->global, s->name); + const char* nm = nm_s.s; + size_t nlen = nm_s.len; + if (nm && nlen > 6u && memcmp(nm, "__imp_", 6u) == 0) return 0; + } + if (s->kind == SK_FUNC || s->kind == SK_IFUNC) return 1; + if (s->kind == SK_OBJ) return 0; + /* SK_UNDEF / SK_NOTYPE: assume function (the common case). */ + return 1; +} + +/* Walk LinkSyms, collect imports, group by DLL soname. Returns 1 if + * any imports were collected, 0 otherwise (caller skips the entire + * .idata path). */ +static int coff_collect_imports(LinkImage* img, CoffImportTable* it) { + Heap* heap = img->heap; + Compiler* c = img->c; + Linker* l = img->linker; + u32 nsyms = LinkSyms_count(&img->syms); + u32 imp_cap = 0; + u32 dll_cap = 0; + u32 i; + + memset(it, 0, sizeof(*it)); + if (!l) return 0; + for (i = 0; i < nsyms; ++i) { + LinkSymbol* s = LinkSyms_at(&img->syms, i); + LinkInput* in; + u32 dll_idx = (u32)-1; + u32 d; + if (!s->imported) continue; + if (s->name == 0) continue; + if (s->dso_input_id == LINK_INPUT_NONE) { + compiler_panic(c, no_loc(), + "link_emit_coff: imported symbol has no providing DSO"); + } + /* img->globals only carries defined globals/weaks; imported undefs + * never land there. Dedup by name: skip if any earlier slot + * already collected this name. */ + { + int dup = 0; + for (u32 k = 0; k < it->nimports; ++k) { + LinkSymbol* prev = LinkSyms_at(&img->syms, it->imports[k].sym - 1); + if (prev->name == s->name) { + dup = 1; + break; + } + } + if (dup) continue; + } + if (s->dso_input_id - 1u >= LinkInputs_count(&l->inputs)) { + compiler_panic(c, no_loc(), + "link_emit_coff: import dso_input_id out of range"); + } + in = LinkInputs_at(&l->inputs, s->dso_input_id - 1u); + if (in->soname == 0) { + compiler_panic(c, no_loc(), + "link_emit_coff: providing DSO has no soname; cannot " + "emit IMAGE_IMPORT_DESCRIPTOR.Name"); + } + /* Find-or-add the DLL slot. */ + for (d = 0; d < it->ndlls; ++d) { + if (it->dlls[d].soname == in->soname) { + dll_idx = d; + break; + } + } + if (dll_idx == (u32)-1) { + if (VEC_GROW(heap, it->dlls, dll_cap, it->ndlls + 1u)) + compiler_panic(c, no_loc(), "link_emit_coff: oom on import dlls"); + dll_idx = it->ndlls++; + memset(&it->dlls[dll_idx], 0, sizeof(it->dlls[dll_idx])); + it->dlls[dll_idx].soname = in->soname; + } + if (VEC_GROW(heap, it->imports, imp_cap, it->nimports + 1u)) + compiler_panic(c, no_loc(), "link_emit_coff: oom on imports"); + memset(&it->imports[it->nimports], 0, sizeof(it->imports[it->nimports])); + it->imports[it->nimports].sym = s->id; + it->imports[it->nimports].dll_idx = dll_idx; + it->imports[it->nimports].is_func = (u8)coff_import_is_func(c, s); + if (it->imports[it->nimports].is_func) ++it->nfunc_imports; + ++it->nimports; + it->dlls[dll_idx].count++; + } + if (it->nimports == 0) return 0; + /* Re-bucket the imports array so each DLL's run is contiguous. */ + qsort(it->imports, it->nimports, sizeof(*it->imports), coff_import_cmp); + /* Fix up CoffImportDll.first now that imports[] is sorted. */ + { + u32 cur = 0; + for (u32 d = 0; d < it->ndlls; ++d) { + it->dlls[d].first = cur; + cur += it->dlls[d].count; + } + } + it->imports_cap = imp_cap; + it->dlls_cap = dll_cap; + return 1; +} + +static void coff_imports_free(LinkImage* img, CoffImportTable* it) { + Heap* heap = img->heap; + if (it->imports) { + heap->free(heap, it->imports, + (size_t)it->imports_cap * sizeof(*it->imports)); + } + if (it->dlls) { + heap->free(heap, it->dlls, (size_t)it->dlls_cap * sizeof(*it->dlls)); + } +} + +/* Compute every per-block / per-import offset inside .idata and the + * total .idata size in bytes. Also assigns per-import hint/name and + * dll-name offsets so the descriptor table can reference them by RVA + * later (RVAs need the bucket's final RVA, added in coff_emit_idata). */ +static void coff_plan_idata_layout(LinkImage* img, CoffImportTable* it) { + Compiler* c = img->c; + u32 off; + + /* Block 1: import descriptors (one per DLL + zero terminator). */ + it->desc_off = 0; + it->desc_size = (it->ndlls + 1u) * COFF_IMPORT_DESCRIPTOR_SIZE; + off = (u32)ALIGN_UP((u64)it->desc_size, (u64)PE_IDATA_BLOCK_ALIGN); + + /* Block 2: ILTs. Per DLL: count entries + 1 (terminator), 8 B each. */ + it->ilt_base = off; + for (u32 d = 0; d < it->ndlls; ++d) { + it->dlls[d].ilt_off = off; + /* Per-import: assign ilt_off within this DLL's block. */ + for (u32 k = 0; k < it->dlls[d].count; ++k) { + it->imports[it->dlls[d].first + k].ilt_off = + off + k * (u32)COFF_THUNK_DATA64_SIZE; + } + off += (it->dlls[d].count + 1u) * (u32)COFF_THUNK_DATA64_SIZE; + } + it->ilt_total = off - it->ilt_base; + off = (u32)ALIGN_UP((u64)off, (u64)PE_IDATA_BLOCK_ALIGN); + + /* Block 3: IATs (same shape as ILTs). */ + it->iat_base = off; + for (u32 d = 0; d < it->ndlls; ++d) { + it->dlls[d].iat_off = off; + for (u32 k = 0; k < it->dlls[d].count; ++k) { + it->imports[it->dlls[d].first + k].iat_off = + off + k * (u32)COFF_THUNK_DATA64_SIZE; + } + off += (it->dlls[d].count + 1u) * (u32)COFF_THUNK_DATA64_SIZE; + } + it->iat_total = off - it->iat_base; + off = (u32)ALIGN_UP((u64)off, (u64)PE_IDATA_BLOCK_ALIGN); + + /* Block 4: hint/name records. Each: u16 hint + NUL-term name + + * 1-byte pad if the resulting size is odd (PE/COFF spec). */ + it->hint_base = off; + for (u32 i = 0; i < it->nimports; ++i) { + LinkSymbol* s = LinkSyms_at(&img->syms, it->imports[i].sym - 1); + size_t nlen = 0; + const char* nm = coff_import_lookup_name(c, s, &nlen); + if (!nm || nlen == 0) + compiler_panic(c, no_loc(), + "link_emit_coff: imported symbol has empty name"); + it->imports[i].hint_off = off; + /* hint (2 B) + name (nlen + 1) + optional pad to even. */ + u32 rec = 2u + (u32)nlen + 1u; + if (rec & 1u) ++rec; + off += rec; + } + it->hint_total = off - it->hint_base; + off = (u32)ALIGN_UP((u64)off, (u64)PE_IDATA_BLOCK_ALIGN); + + /* Block 5: DLL name strings (NUL-terminated). */ + it->name_base = off; + for (u32 d = 0; d < it->ndlls; ++d) { + Slice nm_s = pool_slice(c->global, it->dlls[d].soname); + const char* nm = nm_s.s; + size_t nlen = nm_s.len; + if (!nm || nlen == 0) + compiler_panic(c, no_loc(), + "link_emit_coff: providing DSO has empty soname"); + it->dlls[d].name_off = off; + off += (u32)nlen + 1u; + } + it->name_total = off - it->name_base; + it->idata_size = off; +} + +/* Append the function-import stubs to the .text bucket. Each stub is + * the format arch descriptor's stub size. Records each stub's bucket- + * local offset on the matching CoffImport so the per-symbol stub vaddr + * can be computed once the .text bucket's RVA is final. */ +static void coff_append_stubs(LinkImage* img, CoffImportTable* it, + CoffSection* text_bucket, u32* text_bucket_cap) { + Heap* heap = img->heap; + Compiler* c = img->c; + const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_COFF); + const ObjCoffArchOps* arch = + fmt && fmt->coff_arch ? fmt->coff_arch(c->target.arch) : NULL; + u32 stub_size; + u32 stub_align; + u64 cur; + if (!arch || arch->stub_size == 0 || !arch->emit_iat_stub) { + compiler_panic(c, no_loc(), + "link_emit_coff: arch has no COFF IAT stub emitter"); + } + stub_size = arch->stub_size; + /* Stubs are pure code; aligning to instruction alignment is enough. + * x64 wants byte-granular, aa64 wants 4 B; align to stub size as a + * convenient upper bound. */ + stub_align = stub_size; + cur = (u64)text_bucket->size; + cur = ALIGN_UP(cur, (u64)stub_align); + it->stub_text_off = (u32)cur; + for (u32 i = 0; i < it->nimports; ++i) { + if (!it->imports[i].is_func) continue; + it->imports[i].stub_off = (u32)cur; + cur += stub_size; + } + it->stub_total = (u32)cur - it->stub_text_off; + if (it->stub_total == 0) return; + /* Grow the .text bucket buffer to hold the new region. */ + u32 need = (u32)cur; + if (need > *text_bucket_cap) { + (void)VEC_GROW(heap, text_bucket->bytes, *text_bucket_cap, need); + } + /* Zero the alignment pad; stub bytes are written later by + * coff_emit_stubs once vaddrs are known. */ + if ((u32)cur > text_bucket->size) { + memset(text_bucket->bytes + text_bucket->size, 0, + (size_t)((u32)cur - text_bucket->size)); + } + text_bucket->size = (u32)cur; +} + +/* Emit each function import's IAT stub into the .text bucket. Must + * run after coff_assign_layout has fixed both .text's RVA and + * .idata's RVA, since the stub bakes in the post-shift IAT slot + * displacement. */ +static void coff_emit_stubs(LinkImage* img, const CoffImportTable* it, + const CoffSection out[COFF_NBUCKETS]) { + Compiler* c = img->c; + const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_COFF); + const ObjCoffArchOps* arch = + fmt && fmt->coff_arch ? fmt->coff_arch(c->target.arch) : NULL; + u64 img_base = PE_IMAGE_BASE; + u32 text_rva = out[COFF_BUCKET_TEXT].rva; + u32 idata_rva = out[COFF_BUCKET_IDATA].rva; + if (!arch || !arch->emit_iat_stub) { + compiler_panic(c, no_loc(), + "link_emit_coff: arch has no COFF IAT stub emitter"); + } + for (u32 i = 0; i < it->nimports; ++i) { + u64 stub_va, slot_va; + if (!it->imports[i].is_func) continue; + stub_va = img_base + (u64)text_rva + (u64)it->imports[i].stub_off; + slot_va = img_base + (u64)idata_rva + (u64)it->imports[i].iat_off; + arch->emit_iat_stub(out[COFF_BUCKET_TEXT].bytes + it->imports[i].stub_off, + stub_va, slot_va); + } +} + +/* Emit .idata content into the bucket buffer. Allocates the buffer + * here (size is already known from coff_plan_idata_layout). */ +static void coff_emit_idata(LinkImage* img, const CoffImportTable* it, + CoffSection out[COFF_NBUCKETS], + u32* idata_bucket_cap) { + Heap* heap = img->heap; + Compiler* c = img->c; + CoffSection* idata = &out[COFF_BUCKET_IDATA]; + u32 idata_rva = idata->rva; + u8* buf; + /* Allocate the bucket buffer (idata_size is already block-aligned). */ + buf = (u8*)heap->alloc(heap, it->idata_size, _Alignof(u64)); + if (!buf) compiler_panic(c, no_loc(), "link_emit_coff: oom on .idata buffer"); + memset(buf, 0, it->idata_size); + idata->bytes = buf; + idata->size = it->idata_size; + *idata_bucket_cap = it->idata_size; + + /* Block 1: IMAGE_IMPORT_DESCRIPTOR table. */ + for (u32 d = 0; d < it->ndlls; ++d) { + u8* p = buf + d * (u32)COFF_IMPORT_DESCRIPTOR_SIZE; + u32 ilt_rva = idata_rva + it->dlls[d].ilt_off; + u32 iat_rva = idata_rva + it->dlls[d].iat_off; + u32 name_rva = idata_rva + it->dlls[d].name_off; + wr_u32_le(p + 0, ilt_rva); /* OriginalFirstThunk */ + wr_u32_le(p + 4, 0u); /* TimeDateStamp */ + wr_u32_le(p + 8, 0u); /* ForwarderChain */ + wr_u32_le(p + 12, name_rva); /* Name */ + wr_u32_le(p + 16, iat_rva); /* FirstThunk */ + } + /* Trailing zero descriptor already zero-filled by memset. */ + + /* Blocks 2+3: ILT + IAT. Both initially point at the same hint/name + * record for each import; the OS loader rewrites IAT entries at + * load time. */ + for (u32 i = 0; i < it->nimports; ++i) { + u64 hint_rva = (u64)idata_rva + (u64)it->imports[i].hint_off; + wr_u64_le(buf + it->imports[i].ilt_off, hint_rva); + wr_u64_le(buf + it->imports[i].iat_off, hint_rva); + } + /* Per-DLL ILT/IAT terminators are u64 0, already zero-filled. */ + + /* Block 4: hint/name records. */ + for (u32 i = 0; i < it->nimports; ++i) { + LinkSymbol* s = LinkSyms_at(&img->syms, it->imports[i].sym - 1); + size_t nlen = 0; + const char* nm = coff_import_lookup_name(c, s, &nlen); + u8* p = buf + it->imports[i].hint_off; + wr_u16_le(p, PE_IMPORT_HINT_NONE); + memcpy(p + 2, nm, nlen); + /* NUL terminator + optional pad already zero. */ + } + + /* Block 5: DLL name strings. */ + for (u32 d = 0; d < it->ndlls; ++d) { + Slice nm_s = pool_slice(c->global, it->dlls[d].soname); + const char* nm = nm_s.s; + size_t nlen = nm_s.len; + memcpy(buf + it->dlls[d].name_off, nm, nlen); + /* NUL already zero. */ + } +} + +/* Per-LinkSymId vaddr override table for imports. Indexed by + * LinkSymId-1; 0 means "not an import". Built once after the .idata + * bucket RVA is final. Consumed by coff_apply_all_relocs in lieu of + * the symbol's own vaddr field (which is 0 for imports). */ +typedef struct CoffImportVaddr { + u64* by_sym; /* size = nsyms; 0 entries mean "not imported" */ + u32 nsyms; +} CoffImportVaddr; + +static void coff_import_vaddr_build(LinkImage* img, const CoffImportTable* it, + const CoffSection out[COFF_NBUCKETS], + CoffImportVaddr* iv) { + Heap* heap = img->heap; + u64 img_base = PE_IMAGE_BASE; + u32 text_rva = out[COFF_BUCKET_TEXT].rva; + u32 idata_rva = out[COFF_BUCKET_IDATA].rva; + iv->nsyms = LinkSyms_count(&img->syms); + iv->by_sym = (u64*)heap->alloc(heap, sizeof(u64) * (size_t)(iv->nsyms + 1u), + _Alignof(u64)); + if (!iv->by_sym) + compiler_panic(img->c, no_loc(), + "link_emit_coff: oom on import vaddr table"); + memset(iv->by_sym, 0, sizeof(u64) * (size_t)(iv->nsyms + 1u)); + for (u32 i = 0; i < it->nimports; ++i) { + LinkSymId sid = it->imports[i].sym; + u64 va; + if (it->imports[i].is_func) { + va = img_base + (u64)text_rva + (u64)it->imports[i].stub_off; + } else { + va = img_base + (u64)idata_rva + (u64)it->imports[i].iat_off; + } + iv->by_sym[sid - 1u] = va; + /* Fan out across every shadow LinkSymId with the same name so a + * per-input undef reference resolves to the same import slot. */ + { + LinkSymbol* canonical = LinkSyms_at(&img->syms, sid - 1u); + for (u32 j = 0; j < iv->nsyms; ++j) { + LinkSymbol* s = LinkSyms_at(&img->syms, j); + if (s->name == canonical->name && s->imported) { + iv->by_sym[s->id - 1u] = va; + } + } + } + } +} + +static void coff_import_vaddr_free(LinkImage* img, CoffImportVaddr* iv) { + Heap* heap = img->heap; + if (iv->by_sym) { + heap->free(heap, iv->by_sym, sizeof(u64) * (size_t)(iv->nsyms + 1u)); + } +} + +/* Resolve Compiler.target.arch -> IMAGE_FILE_MACHINE_* via the per-arch + * coff ops table. Panic if the arch has no COFF descriptor or the + * machine value is one cfree doesn't ship (Phase 1 supports AMD64 and + * ARM64 only). */ +static u16 coff_machine_or_panic(Compiler* c) { + const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_COFF); + const ObjCoffArchOps* arch = + fmt && fmt->coff_arch ? fmt->coff_arch(c->target.arch) : NULL; + u16 m; + if (!arch) + compiler_panic(c, no_loc(), "link_emit_coff: no COFF arch descriptor"); + m = arch->machine; + if (m != IMAGE_FILE_MACHINE_AMD64 && m != IMAGE_FILE_MACHINE_ARM64) + compiler_panic(c, no_loc(), "link_emit_coff: unsupported machine 0x%x", + (unsigned)m); + return m; +} + +static int coff_section_name_starts(Compiler* c, const LinkSection* ls, + const char* prefix) { + size_t pn = slice_from_cstr(prefix).len; + Slice s_s = ls->name ? pool_slice(c->global, ls->name) : SLICE_NULL; + const char* s = s_s.s; + size_t n = s_s.len; + return s && n >= pn && memcmp(s, prefix, pn) == 0; +} + +static int coff_section_name_cmp(Compiler* c, const LinkSection* a, + const LinkSection* b) { + Slice as_s = a->name ? pool_slice(c->global, a->name) : SLICE_NULL; + Slice bs_s = b->name ? pool_slice(c->global, b->name) : SLICE_NULL; + const char* as = as_s.s ? as_s.s : ""; + const char* bs = bs_s.s ? bs_s.s : ""; + size_t an = as_s.len, bn = bs_s.len; + size_t n = an < bn ? an : bn; + int cmp = n ? memcmp(as, bs, n) : 0; + if (cmp) return cmp; + if (an < bn) return -1; + if (an > bn) return 1; + if (a->id < b->id) return -1; + if (a->id > b->id) return 1; + return 0; +} + +static void coff_place_section(LinkImage* img, CoffSection out[COFF_NBUCKETS], + CoffSecMap* map, u64 bucket_cur[COFF_NBUCKETS], + u32 bucket_cap[COFF_NBUCKETS], + const LinkSection* ls) { + Heap* heap = img->heap; + CoffBucket b2 = coff_bucket_for(ls); + u32 align = ls->align ? ls->align : 1u; + u64 cur = bucket_cur[b2]; + cur = ALIGN_UP(cur, (u64)align); + map[ls->id - 1].bucket = (u8)b2; + /* Record the bucket-local offset; the absolute RVA / file offset + * are filled in after bucket placement (RVAs need + * SectionAlignment, file offsets need FileAlignment). */ + map[ls->id - 1].new_rva = (u32)cur; + if (b2 != COFF_BUCKET_BSS) { + /* Copy bytes from the source segment buffer into the bucket. */ + if (ls->size) { + u32 need = (u32)(cur + ls->size); + if (need > bucket_cap[b2]) { + (void)VEC_GROW(heap, out[b2].bytes, bucket_cap[b2], need); + } + memset(out[b2].bytes + bucket_cur[b2], 0, (size_t)(cur - bucket_cur[b2])); + if (ls->sem != SSEM_NOBITS) { + const LinkSegment* seg = &img->segments[ls->segment_id - 1]; + const u8* src = img->segment_bytes[seg->id - 1] + + (size_t)(ls->file_offset - seg->file_offset); + memcpy(out[b2].bytes + cur, src, (size_t)ls->size); + } else { + memset(out[b2].bytes + cur, 0, (size_t)ls->size); + } + } + } + cur += ls->size; + bucket_cur[b2] = cur; + out[b2].size = (u32)cur; +} + +static void coff_insert_sorted_section(Compiler* c, const LinkSection** a, + u32* n, const LinkSection* ls) { + u32 i = *n; + while (i > 0 && coff_section_name_cmp(c, ls, a[i - 1u]) < 0) { + a[i] = a[i - 1u]; + --i; + } + a[i] = ls; + *n += 1u; +} + +/* ---- pass 1: bucket input sections, assemble bytes, assign deltas ---- + * CoffSecMap is defined above (alongside CoffTlsLayout) because the + * TLS planning helpers need to consume one. */ + +/* Build the four payload buckets (.text/.rdata/.data/.bss). + * + * `map[secid-1]` is populated for every kept LinkSection with the + * section's new RVA, new file offset, the bucket it landed in, and the + * delta to add to in-section vaddrs. Bucket buffers are + * heap-allocated; the caller frees them after emit. */ +static void coff_build_buckets(LinkImage* img, CoffSection out[COFF_NBUCKETS], + CoffSecMap* map) { + Heap* heap = img->heap; + Compiler* c = img->c; + const LinkSection** tls_sorted = NULL; + const LinkSection** crt_sorted = NULL; + u32 ntls_sorted = 0; + u32 ncrt_sorted = 0; + u32 i, b; + + for (b = 0; b < COFF_NBUCKETS; ++b) { + memset(&out[b], 0, sizeof(out[b])); + } + out[COFF_BUCKET_TEXT].name = ".text"; + out[COFF_BUCKET_TEXT].characteristics = + IMAGE_SCN_CNT_CODE | IMAGE_SCN_MEM_EXECUTE | IMAGE_SCN_MEM_READ; + out[COFF_BUCKET_TEXT].has_file_bytes = 1; + out[COFF_BUCKET_RDATA].name = ".rdata"; + out[COFF_BUCKET_RDATA].characteristics = + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ; + out[COFF_BUCKET_RDATA].has_file_bytes = 1; + out[COFF_BUCKET_IDATA].name = ".idata"; + out[COFF_BUCKET_IDATA].characteristics = + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ; + out[COFF_BUCKET_IDATA].has_file_bytes = 1; + out[COFF_BUCKET_DATA].name = ".data"; + out[COFF_BUCKET_DATA].characteristics = + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE; + out[COFF_BUCKET_DATA].has_file_bytes = 1; + /* The Windows loader uses .tls as a *template*: the bytes on disk + * seed each thread's per-TLS copy at thread creation, and threads + * write to their copies, not the template. The PE section is still + * marked writable because that's what mingw and link.exe emit; the + * loader special-cases it via the TLS directory. */ + out[COFF_BUCKET_TLS].name = ".tls"; + out[COFF_BUCKET_TLS].characteristics = + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE; + out[COFF_BUCKET_TLS].has_file_bytes = 1; + out[COFF_BUCKET_BSS].name = ".bss"; + out[COFF_BUCKET_BSS].characteristics = IMAGE_SCN_CNT_UNINITIALIZED_DATA | + IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_WRITE; + out[COFF_BUCKET_BSS].has_file_bytes = 0; + out[COFF_BUCKET_RELOC].name = ".reloc"; + out[COFF_BUCKET_RELOC].characteristics = IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_DISCARDABLE; + out[COFF_BUCKET_RELOC].has_file_bytes = 1; + + /* Track per-bucket cursors. Bucket sizes are bounded by the sum of + * input section sizes plus per-section alignment padding; we grow + * lazily via VEC_GROW. */ + u64 bucket_cur[COFF_NBUCKETS]; + u32 bucket_cap[COFF_NBUCKETS]; + for (b = 0; b < COFF_NBUCKETS; ++b) { + bucket_cur[b] = 0; + bucket_cap[b] = 0; + } + + tls_sorted = img->nsections ? (const LinkSection**)heap->alloc( + heap, sizeof(*tls_sorted) * img->nsections, + _Alignof(const LinkSection*)) + : NULL; + crt_sorted = img->nsections ? (const LinkSection**)heap->alloc( + heap, sizeof(*crt_sorted) * img->nsections, + _Alignof(const LinkSection*)) + : NULL; + if (img->nsections && (!tls_sorted || !crt_sorted)) + compiler_panic(c, no_loc(), "link_emit_coff: oom sorting sections"); + + for (i = 0; i < img->nsections; ++i) { + const LinkSection* ls = &img->sections[i]; + if (!(ls->flags & SF_ALLOC)) continue; + if (ls->flags & SF_TLS) { + coff_insert_sorted_section(c, tls_sorted, &ntls_sorted, ls); + continue; + } + if (coff_section_name_starts(c, ls, ".CRT$")) { + coff_insert_sorted_section(c, crt_sorted, &ncrt_sorted, ls); + continue; + } + coff_place_section(img, out, map, bucket_cur, bucket_cap, ls); + } + + for (i = 0; i < ntls_sorted; ++i) { + coff_place_section(img, out, map, bucket_cur, bucket_cap, tls_sorted[i]); + } + for (i = 0; i < ncrt_sorted; ++i) { + coff_place_section(img, out, map, bucket_cur, bucket_cap, crt_sorted[i]); + } + + /* Track caps so we can free with the right size later (heap->free + * needs the original allocation size). Stash into size_raw + * temporarily — overwritten below with the proper PE value. */ + for (b = 0; b < COFF_NBUCKETS; ++b) out[b].size_raw = bucket_cap[b]; + if (tls_sorted) + heap->free(heap, tls_sorted, sizeof(*tls_sorted) * img->nsections); + if (crt_sorted) + heap->free(heap, crt_sorted, sizeof(*crt_sorted) * img->nsections); +} + +/* Assign RVAs and file offsets to the buckets that participate in the + * image. Returns the file offset at which trailing pad-to-EOF should + * land (== file size). */ +static u64 coff_assign_layout(CoffSection out[COFF_NBUCKETS], + u32 headers_file_size, u32 first_section_rva) { + u32 rva = first_section_rva; + u64 file = ALIGN_UP((u64)headers_file_size, (u64)PE_FILE_ALIGNMENT); + u32 b; + for (b = 0; b < COFF_NBUCKETS; ++b) { + if (out[b].size == 0) { + out[b].in_image = 0; + out[b].rva = 0; + out[b].file_offset = 0; + out[b].size_raw = 0; + continue; + } + out[b].in_image = 1; + out[b].rva = (u32)ALIGN_UP((u64)rva, (u64)PE_SECTION_ALIGNMENT); + if (out[b].has_file_bytes) { + out[b].file_offset = (u32)file; + out[b].size_raw = (u32)ALIGN_UP((u64)out[b].size, (u64)PE_FILE_ALIGNMENT); + file += out[b].size_raw; + } else { + out[b].file_offset = 0; + out[b].size_raw = 0; + } + rva = out[b].rva + out[b].size; + } + return file; +} + +/* Build the .reloc bytes by grouping absolute relocs by 4-KiB page. + * The map[] array maps LinkSectionId-1 to the per-section post-PE-relayout + * RVA, so we can compute each reloc's site_rva = section_rva + (orig + * write_vaddr - orig section_vaddr). + * + * Layout per page: + * u32 page_rva + * u32 size_of_block (8 + n_entries*2, padded to a multiple of 4) + * u16 entries[]: (type << 12) | (offset & 0xfff) + * optional trailing u16 = 0 (IMAGE_REL_BASED_ABSOLUTE) for u32 alignment */ +typedef struct CoffRelocEntry { + u32 site_rva; + u16 type; + u16 pad; +} CoffRelocEntry; + +static int coff_reloc_entry_cmp(const void* a, const void* b) { + const CoffRelocEntry* ea = (const CoffRelocEntry*)a; + const CoffRelocEntry* eb = (const CoffRelocEntry*)b; + if (ea->site_rva < eb->site_rva) return -1; + if (ea->site_rva > eb->site_rva) return 1; + return 0; +} + +static void coff_build_reloc_section(LinkImage* img, + const CoffSection out[COFF_NBUCKETS], + const CoffSecMap* map, CoffSection* reloc, + const CoffRelocEntry* extras, + u32 n_extras) { + Heap* heap = img->heap; + Compiler* c = img->c; + u32 nrel = LinkRelocs_count(&img->relocs); + CoffRelocEntry* entries = NULL; + u32 nentries = 0; + u32 cap = 0; + u32 i; + + if (!img->pie) { + reloc->bytes = NULL; + reloc->size = 0; + return; + } + for (i = 0; i < nrel; ++i) { + const LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); + const LinkSection* ls; + u64 site_old_vaddr; + u32 site_rva; + u16 type; + if (!coff_reloc_needs_base_reloc(r->kind)) continue; + if (r->link_section_id == LINK_SEC_NONE) continue; + ls = &img->sections[r->link_section_id - 1]; + /* r->write_vaddr is in the pre-relayout coordinate system (same as + * ls->vaddr), so the offset into the section is stable. Add the + * containing bucket's final RVA to land at the image RVA. */ + site_old_vaddr = r->write_vaddr; + u8 sb = map[ls->id - 1].bucket; + site_rva = out[sb].rva + map[ls->id - 1].new_rva + + (u32)(site_old_vaddr - ls->vaddr); + if (r->kind == R_ABS64) { + type = (u16)IMAGE_REL_BASED_DIR64; + } else { + type = (u16)IMAGE_REL_BASED_HIGHLOW; + } + if (nentries == cap) { + (void)VEC_GROW(heap, entries, cap, nentries + 1u); + } + entries[nentries].site_rva = site_rva; + entries[nentries].type = type; + entries[nentries].pad = 0; + ++nentries; + } + /* Append caller-supplied extras (TLS directory absolute-VA fields, + * etc.). These are already site-RVAs in the final image. */ + for (i = 0; i < n_extras; ++i) { + if (nentries == cap) { + (void)VEC_GROW(heap, entries, cap, nentries + 1u); + } + entries[nentries] = extras[i]; + ++nentries; + } + if (nentries == 0) { + reloc->bytes = NULL; + reloc->size = 0; + if (entries) heap->free(heap, entries, cap * sizeof(*entries)); + (void)c; + return; + } + /* Sort entries by RVA so we can group runs sharing a 4-KiB page. */ + qsort(entries, nentries, sizeof(*entries), coff_reloc_entry_cmp); + + /* Two-pass: first compute the total size (so we can allocate the + * blob exactly), then emit. */ + u32 blob_size = 0; + u32 run_start = 0; + while (run_start < nentries) { + u32 page = entries[run_start].site_rva & ~0xfffu; + u32 run_end = run_start; + while (run_end < nentries && + (entries[run_end].site_rva & ~0xfffu) == page) { + ++run_end; + } + u32 n = run_end - run_start; + u32 block = COFF_BASE_RELOCATION_SIZE + n * 2u; + block = (u32)ALIGN_UP((u64)block, 4ull); + blob_size += block; + run_start = run_end; + } + reloc->bytes = (u8*)heap->alloc(heap, blob_size, 4); + if (!reloc->bytes && blob_size) + compiler_panic(c, no_loc(), "link_emit_coff: oom on .reloc blob"); + memset(reloc->bytes, 0, blob_size); + reloc->size = blob_size; + /* Stash allocation size for free path. */ + reloc->size_raw = blob_size; + + u32 cursor = 0; + run_start = 0; + while (run_start < nentries) { + u32 page = entries[run_start].site_rva & ~0xfffu; + u32 run_end = run_start; + while (run_end < nentries && + (entries[run_end].site_rva & ~0xfffu) == page) { + ++run_end; + } + u32 n = run_end - run_start; + u32 raw_size = COFF_BASE_RELOCATION_SIZE + n * 2u; + u32 block = (u32)ALIGN_UP((u64)raw_size, 4ull); + u8* p = reloc->bytes + cursor; + wr_u32_le(p, page); + wr_u32_le(p + 4, block); + u32 k; + for (k = 0; k < n; ++k) { + u16 entry = (u16)(((u16)entries[run_start + k].type << 12) | + (entries[run_start + k].site_rva & 0xfffu)); + wr_u16_le(p + 8 + k * 2u, entry); + } + /* Optional trailing pad: a single IMAGE_REL_BASED_ABSOLUTE (0). */ + if (block > raw_size) { + wr_u16_le(p + 8 + n * 2u, 0); + } + cursor += block; + run_start = run_end; + } + heap->free(heap, entries, cap * sizeof(*entries)); +} + +/* Patch each LinkRelocApply against the PE-relayout coordinates and + * apply. `bucket_bytes[bucket]` gives the writable buffer for that + * bucket; the per-section delta in map[] turns the old in-section + * offsets into bucket-local offsets. + * + * Imported targets (LinkSymbol.imported == 1) have no vaddr of their + * own — instead the .idata pass populated `iv->by_sym[id-1]` with the + * function stub's vaddr (for callable imports) or the IAT slot's + * vaddr (for data imports). This is the spot where that table is + * consulted in lieu of the symbol's own zero vaddr. */ +static void coff_apply_all_relocs(LinkImage* img, + const CoffSection out[COFF_NBUCKETS], + const CoffSecMap* map, + const CoffImportVaddr* iv) { + Compiler* c = img->c; + u32 i; + u64 img_base = PE_IMAGE_BASE; + u32 nrel = LinkRelocs_count(&img->relocs); + for (i = 0; i < nrel; ++i) { + LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); + const LinkSymbol* tgt = LinkSyms_at(&img->syms, r->target - 1); + const LinkSection* sec; + const LinkSection* tgt_sec; + u64 S, P; + u8* P_bytes; + u8 bucket; + u32 site_off_in_sec; + u32 site_bucket_off; + if (r->link_section_id == LINK_SEC_NONE) continue; + sec = &img->sections[r->link_section_id - 1]; + bucket = map[sec->id - 1].bucket; + if (!out[bucket].has_file_bytes || !out[bucket].bytes) { + /* Shouldn't happen — .bss has no relocations. */ + continue; + } + site_off_in_sec = (u32)(r->write_vaddr - sec->vaddr); + site_bucket_off = map[sec->id - 1].new_rva + site_off_in_sec; + P_bytes = out[bucket].bytes + site_bucket_off; + /* P = ImageBase + bucket_rva + map[].new_rva + site_off_in_sec + * — i.e. the final runtime address of the patch site. */ + P = img_base + (u64)out[bucket].rva + (u64)map[sec->id - 1].new_rva + + site_off_in_sec; + + /* Resolve S: target symbol's new image-relative address. Look up + * the LinkSection that contains the symbol's original vaddr, then + * apply that section's delta. */ + if (tgt->imported) { + /* IAT-routed: stub vaddr (functions) / slot vaddr (data). */ + if (!iv || iv->by_sym[r->target - 1u] == 0) + compiler_panic(c, no_loc(), + "link_emit_coff: imported target lacks IAT slot"); + S = iv->by_sym[r->target - 1u]; + } else if (tgt->kind == SK_ABS) { + S = tgt->vaddr; + } else if (tgt->defined) { + tgt_sec = coff_symbol_section(img, tgt); + if (!tgt_sec) { + compiler_panic(c, no_loc(), + "link_emit_coff: symbol vaddr 0x%llx has no " + "containing section", + (unsigned long long)tgt->vaddr); + } + u8 tb = map[tgt_sec->id - 1].bucket; + u64 sym_off = tgt->vaddr - tgt_sec->vaddr; + S = img_base + (u64)out[tb].rva + (u64)map[tgt_sec->id - 1].new_rva + + sym_off; + } else { + /* Undef and not imported — shouldn't survive resolve_undefs. */ + compiler_panic(c, no_loc(), + "link_emit_coff: unresolved non-imported symbol"); + } + /* COFF-only section-relative kinds: the SECREL value is the + * symbol's offset from the start of its containing output section + * (PE bucket), and SECTION is the 1-based PE section index. + * link_reloc_apply only sees S and P, so we patch these inline + * before delegating common kinds. */ + if (r->kind == R_COFF_SECREL || r->kind == R_COFF_SECTION || + r->kind == R_COFF_AARCH64_SECREL_LOW12A || + r->kind == R_COFF_AARCH64_SECREL_HIGH12A) { + if (!tgt->defined || tgt->kind == SK_ABS) { + compiler_panic(c, no_loc(), + "link_emit_coff: COFF SECREL/SECTION requires a " + "defined section-bound target symbol"); + } + u8 tb = map[tgt_sec->id - 1].bucket; + u64 sym_off_in_bucket = + (u64)map[tgt_sec->id - 1].new_rva + (tgt->vaddr - tgt_sec->vaddr); + if (r->kind == R_COFF_SECREL) { + u64 v = sym_off_in_bucket + (u64)r->addend; + wr_u32_le(P_bytes, (u32)(v & 0xffffffffu)); + } else if (r->kind == R_COFF_SECTION) { + /* PE section indices are 1-based; buckets are 0-based, so add 1. */ + wr_u16_le(P_bytes, (u16)((tb + 1u) & 0xffffu)); + } else { + /* AArch64 SECREL_{LOW,HIGH}12A: patch the imm12 field of an + * existing ADD-imm12 instruction. LOW12A = bits [11:0] of the + * SECREL; HIGH12A = bits [23:12]. The instruction's sh bit was + * already set by the codegen (0 for LOW, 1 for HIGH). */ + u64 v = sym_off_in_bucket + (u64)r->addend; + u32 imm12 = (r->kind == R_COFF_AARCH64_SECREL_HIGH12A) + ? (u32)((v >> 12) & 0xfffu) + : (u32)(v & 0xfffu); + u32 instr = rd_u32_le(P_bytes); + instr = (instr & ~(0xfffu << 10)) | (imm12 << 10); + wr_u32_le(P_bytes, instr); + } + continue; + } + link_reloc_apply(c, r->kind, P_bytes, S, r->addend, P); + } +} + +/* ---- header marshalling ---- + * + * Each helper streams its on-disk shape to the writer field-by-field; + * we avoid sizeof(struct) on the packed PE wire types since they carry + * implicit-padding hazards on hosts that disagree with #pragma pack(1) + * defaults. */ + +static void coff_write_dos_stub(Writer* w) { + u8 buf[PE_DOS_HDR_SIZE]; + memset(buf, 0, sizeof(buf)); + /* e_magic ("MZ") + e_lfanew (offset of PE signature). All other + * legacy fields zero. */ + buf[0] = (u8)(IMAGE_DOS_SIGNATURE & 0xffu); + buf[1] = (u8)((IMAGE_DOS_SIGNATURE >> 8) & 0xffu); + wr_u32_le(buf + 0x3c, PE_DOS_E_LFANEW); + cfree_writer_write(w, buf, sizeof(buf)); +} + +static void coff_write_file_header(Writer* w, u16 machine, u16 nsec, + u16 characteristics) { + coff_wr_u16(w, machine); + coff_wr_u16(w, nsec); + coff_wr_u32(w, 0u); /* TimeDateStamp */ + coff_wr_u32(w, 0u); /* PointerToSymbolTable */ + coff_wr_u32(w, 0u); /* NumberOfSymbols */ + coff_wr_u16(w, (u16)PE_OPT_HDR_SIZE); /* SizeOfOptionalHeader */ + coff_wr_u16(w, characteristics); +} + +/* Per-section meta used by both the data-directory fill and the + * IMAGE_SECTION_HEADER emit. Compactly captures everything the writer + * needs to know about the four-or-five output sections. */ +typedef struct CoffOutHdr { + const char* name; + u32 vsize; + u32 rva; + u32 size_raw; + u32 file_offset; + u32 characteristics; +} CoffOutHdr; + +static void coff_write_optional_header(Writer* w, u32 entry_rva, + const CoffSection out[COFF_NBUCKETS], + u32 headers_size_padded, u32 image_size, + int pie, u16 subsystem, + const CoffImportTable* it, + const CoffTlsLayout* tls) { + /* Standard fields. */ + coff_wr_u16(w, IMAGE_NT_OPTIONAL_HDR64_MAGIC); + coff_wr_u8(w, PE_LINKER_MAJOR); + coff_wr_u8(w, PE_LINKER_MINOR); + /* SizeOfCode / SizeOfInitializedData / SizeOfUninitializedData. */ + u32 size_code = + out[COFF_BUCKET_TEXT].in_image ? out[COFF_BUCKET_TEXT].size_raw : 0; + u32 size_init = + (out[COFF_BUCKET_RDATA].in_image ? out[COFF_BUCKET_RDATA].size_raw : 0) + + (out[COFF_BUCKET_DATA].in_image ? out[COFF_BUCKET_DATA].size_raw : 0); + u32 size_uninit = + out[COFF_BUCKET_BSS].in_image ? out[COFF_BUCKET_BSS].size : 0; + coff_wr_u32(w, size_code); + coff_wr_u32(w, size_init); + coff_wr_u32(w, size_uninit); + coff_wr_u32(w, entry_rva); + coff_wr_u32(w, + out[COFF_BUCKET_TEXT].in_image ? out[COFF_BUCKET_TEXT].rva : 0); + /* Windows-specific fields. */ + coff_wr_u64(w, PE_IMAGE_BASE); + coff_wr_u32(w, PE_SECTION_ALIGNMENT); + coff_wr_u32(w, PE_FILE_ALIGNMENT); + coff_wr_u16(w, PE_OS_MAJOR); + coff_wr_u16(w, PE_OS_MINOR); + coff_wr_u16(w, 0u); /* MajorImageVersion */ + coff_wr_u16(w, 0u); /* MinorImageVersion */ + coff_wr_u16(w, PE_SUBSYS_MAJOR); + coff_wr_u16(w, PE_SUBSYS_MINOR); + coff_wr_u32(w, 0u); /* Win32VersionValue */ + coff_wr_u32(w, image_size); + coff_wr_u32(w, headers_size_padded); + coff_wr_u32(w, 0u); /* CheckSum */ + coff_wr_u16(w, subsystem ? subsystem : IMAGE_SUBSYSTEM_WINDOWS_CUI); + coff_wr_u16(w, PE_DLL_CHARS); + coff_wr_u64(w, PE_STACK_RESERVE); + coff_wr_u64(w, PE_STACK_COMMIT); + coff_wr_u64(w, PE_HEAP_RESERVE); + coff_wr_u64(w, PE_HEAP_COMMIT); + coff_wr_u32(w, 0u); /* LoaderFlags */ + coff_wr_u32(w, (u32)PE_NUM_DATA_DIRS); + /* DataDirectory[16]. Populated entries: + * [1] IMPORT — descriptor table RVA + total descriptor bytes + * [5] BASERELOC — when PIE and .reloc is in the image + * [12] IAT — first IAT block RVA + sum of per-DLL IAT sizes + * Everything else stays zero. */ + u32 i; + int has_idata = it && it->nimports > 0 && out[COFF_BUCKET_IDATA].in_image; + for (i = 0; i < PE_NUM_DATA_DIRS; ++i) { + if (i == IMAGE_DIRECTORY_ENTRY_IMPORT && has_idata) { + coff_wr_u32(w, out[COFF_BUCKET_IDATA].rva + it->desc_off); + coff_wr_u32(w, it->desc_size); + } else if (i == IMAGE_DIRECTORY_ENTRY_IAT && has_idata) { + coff_wr_u32(w, out[COFF_BUCKET_IDATA].rva + it->iat_base); + coff_wr_u32(w, it->iat_total); + } else if (i == IMAGE_DIRECTORY_ENTRY_BASERELOC && pie && + out[COFF_BUCKET_RELOC].in_image) { + coff_wr_u32(w, out[COFF_BUCKET_RELOC].rva); + coff_wr_u32(w, out[COFF_BUCKET_RELOC].size); + } else if (i == IMAGE_DIRECTORY_ENTRY_TLS && tls && tls->present) { + coff_wr_u32(w, out[COFF_BUCKET_RDATA].rva + tls->dir_rdata_off); + coff_wr_u32(w, COFF_TLS_DIRECTORY64_SIZE); + } else { + coff_wr_u32(w, 0u); + coff_wr_u32(w, 0u); + } + } +} + +static void coff_write_section_header(Writer* w, const char* name, u32 vsize, + u32 rva, u32 size_raw, u32 file_offset, + u32 characteristics) { + u8 nm[8] = {0, 0, 0, 0, 0, 0, 0, 0}; + size_t n = slice_from_cstr(name).len; + if (n > 8) n = 8; + memcpy(nm, name, n); + cfree_writer_write(w, nm, 8); + coff_wr_u32(w, vsize); + coff_wr_u32(w, rva); + coff_wr_u32(w, size_raw); + coff_wr_u32(w, file_offset); + coff_wr_u32(w, 0u); /* PointerToRelocations */ + coff_wr_u32(w, 0u); /* PointerToLinenumbers */ + coff_wr_u16(w, 0u); /* NumberOfRelocations */ + coff_wr_u16(w, 0u); /* NumberOfLinenumbers */ + coff_wr_u32(w, characteristics); +} + +/* ---- main entry ---- */ + +void link_emit_coff(LinkImage* img, Writer* w) { + Heap* heap = img->heap; + Compiler* c = img->c; + u16 machine = coff_machine_or_panic(c); + if (img->entry_sym == LINK_SYM_NONE) + compiler_panic(c, no_loc(), "link_emit_coff: no resolved entry symbol"); + + /* ---- pass 1: build buckets + per-section delta map ---- */ + CoffSection out[COFF_NBUCKETS]; + CoffSecMap* map = (CoffSecMap*)heap->alloc( + heap, sizeof(CoffSecMap) * (img->nsections + 1u), _Alignof(CoffSecMap)); + if (!map && img->nsections) + compiler_panic(c, no_loc(), "link_emit_coff: oom on section map"); + memset(map, 0, sizeof(CoffSecMap) * (img->nsections + 1u)); + + /* coff_build_buckets stashes per-bucket allocation caps in size_raw; + * we read them back into a local before size_raw is recomputed by + * coff_assign_layout so the cleanup path can free with the right + * size. */ + coff_build_buckets(img, out, map); + /* coff_build_buckets stashes per-bucket allocation caps in size_raw + * (the only bucket field we own for the duration of layout); read + * them out before coff_assign_layout overwrites the field. .reloc + * and .idata aren't touched by coff_build_buckets — their caps are + * filled in below once coff_build_reloc_section / coff_emit_idata + * run. */ + u32 bucket_caps[COFF_NBUCKETS]; + u32 b; + for (b = 0; b < COFF_NBUCKETS; ++b) bucket_caps[b] = out[b].size_raw; + + /* ---- pass 1b: collect imports and reserve .idata + .text stubs ---- + * + * Builds the per-DLL / per-import layout and appends one IAT-routing + * stub per imported function to the .text bucket. The .idata bucket + * size is set here (so it counts in nsec); the stub vaddrs and + * IAT-slot vaddrs are finalised after coff_assign_layout. */ + CoffImportTable imports; + int have_imports = coff_collect_imports(img, &imports); + if (have_imports) { + coff_plan_idata_layout(img, &imports); + coff_append_stubs(img, &imports, &out[COFF_BUCKET_TEXT], + &bucket_caps[COFF_BUCKET_TEXT]); + /* Reserve the .idata bucket size so coff_assign_layout / nsec + * accounting sees it. Actual bytes are written by coff_emit_idata + * once the bucket RVA is known. */ + out[COFF_BUCKET_IDATA].size = imports.idata_size; + } + + /* ---- pass 1c: plan the TLS directory record ---- + * + * If any SF_TLS sections survived, reserve 40 bytes at the tail of + * .rdata for the IMAGE_TLS_DIRECTORY64. Bytes are zeroed now and + * filled in by coff_emit_tls_dir once the bucket RVAs are final. */ + CoffTlsLayout tls; + coff_plan_tls_layout(img, out, &bucket_caps[COFF_BUCKET_RDATA], &tls); + + /* ---- pass 2: decide whether .reloc will be in the image ---- + * + * The headers' file size (and therefore every section's file + * offset) depends on the section-table entry count, so we need to + * commit to "is .reloc emitted?" before laying out file offsets. + * .reloc lights up iff PIE and at least one absolute reloc points + * into a kept section, OR a TLS directory is emitted (its four u64 + * VA fields all need base-relocs). */ + int emit_reloc = 0; + if (img->pie) { + u32 i; + u32 nrel = LinkRelocs_count(&img->relocs); + for (i = 0; i < nrel; ++i) { + const LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); + if (!coff_reloc_needs_base_reloc(r->kind)) continue; + if (r->link_section_id == LINK_SEC_NONE) continue; + emit_reloc = 1; + break; + } + if (!emit_reloc && tls.present) emit_reloc = 1; + } + + u32 nsec = 0; + for (b = 0; b < COFF_NBUCKETS; ++b) { + if (b == COFF_BUCKET_RELOC) { + if (emit_reloc) ++nsec; /* tentative; size set below */ + continue; + } + if (out[b].size) ++nsec; + } + u32 headers_size_unpadded = PE_DOS_HDR_SIZE + PE_SIG_SIZE + PE_FILE_HDR_SIZE + + PE_OPT_HDR_SIZE + nsec * PE_SECTION_HDR_SIZE; + u32 headers_size_padded = + (u32)ALIGN_UP((u64)headers_size_unpadded, (u64)PE_FILE_ALIGNMENT); + + /* First layout pass: fixes RVAs / file offsets for buckets that + * already have a finalised size (.text, .rdata, .idata, .data, .bss). + * .reloc's RVA is provisional — it depends on .reloc's own size, + * which is still 0 at this point. */ + (void)coff_assign_layout(out, headers_size_padded, PE_FIRST_SECTION_RVA); + + /* ---- pass 2b: emit .idata bytes + per-arch IAT stubs ---- + * + * The .idata bucket's RVA is final after the first assign_layout; + * stubs need it (the indirect-jump displacement targets an IAT slot) + * and .idata's own descriptor / ILT / IAT records all carry RVAs. + * coff_import_vaddr_build builds the per-LinkSymId override table + * that apply_all_relocs consults in place of the (zero) symbol + * vaddr for imported targets. */ + CoffImportVaddr import_vaddr; + memset(&import_vaddr, 0, sizeof(import_vaddr)); + if (have_imports) { + coff_emit_idata(img, &imports, out, &bucket_caps[COFF_BUCKET_IDATA]); + coff_emit_stubs(img, &imports, out); + coff_import_vaddr_build(img, &imports, out, &import_vaddr); + } + + /* Write the TLS directory bytes now that bucket RVAs are final. */ + coff_emit_tls_dir(img, out, map, &tls); + + /* ---- pass 3: build .reloc using the now-final bucket RVAs ---- + * + * coff_build_reloc_section reads out[bucket].rva indirectly via + * map[].new_rva + (write_vaddr - sec->vaddr) → site offset within + * the bucket; the absolute site_rva is bucket.rva + that offset. + * Patch site RVAs are page-quantised in the emitted blob, so this + * is the spot where the bucket RVAs need to be already final. + * + * TLS directory's four absolute-VA fields ride into the entries via + * the `extras` array — they aren't ordinary symbol relocations, so + * they don't show up in img->relocs. */ + if (emit_reloc) { + CoffRelocEntry tls_extras[4]; + u32 n_tls_extras = 0; + if (tls.present) { + u32 dir_rva = out[COFF_BUCKET_RDATA].rva + tls.dir_rdata_off; + static const u32 field_offs[4] = { + COFF_TLSDIR_OFF_START_ADDR, + COFF_TLSDIR_OFF_END_ADDR, + COFF_TLSDIR_OFF_INDEX_ADDR, + COFF_TLSDIR_OFF_CALLBACKS, + }; + u32 k; + for (k = 0; k < 4; ++k) { + if (field_offs[k] == COFF_TLSDIR_OFF_CALLBACKS && !tls.callbacks_sym) + continue; + tls_extras[n_tls_extras].site_rva = dir_rva + field_offs[k]; + tls_extras[n_tls_extras].type = (u16)IMAGE_REL_BASED_DIR64; + tls_extras[n_tls_extras].pad = 0; + ++n_tls_extras; + } + } + coff_build_reloc_section(img, out, map, &out[COFF_BUCKET_RELOC], tls_extras, + n_tls_extras); + bucket_caps[COFF_BUCKET_RELOC] = out[COFF_BUCKET_RELOC].size_raw; + /* size_raw was stashed by build; assign_layout below recomputes it + * as the FileAlignment-padded length. */ + (void)coff_assign_layout(out, headers_size_padded, PE_FIRST_SECTION_RVA); + } + + /* `_tls_used` is the public mingw/PE name for the TLS directory + * record. Keep it in lockstep with the optional-header TLS data + * directory, rather than leaving references bound to mingw's tlssup.o + * placeholder record. */ + coff_define_tls_used(img, out, &tls); + + /* ---- pass 4: resolve entry symbol's PE RVA ---- + * + * Done before apply so the optional-header field has its final + * value. */ + const LinkSymbol* entry_sym = LinkSyms_at(&img->syms, img->entry_sym - 1); + if (!entry_sym->defined || entry_sym->kind == SK_ABS) + compiler_panic(c, no_loc(), + "link_emit_coff: entry symbol is not a defined " + "image-relative function"); + const LinkSection* entry_sec = coff_section_at(img, entry_sym->vaddr); + if (!entry_sec) + compiler_panic(c, no_loc(), + "link_emit_coff: entry symbol has no containing " + "section"); + u8 entry_bucket = map[entry_sec->id - 1].bucket; + u32 entry_rva = out[entry_bucket].rva + map[entry_sec->id - 1].new_rva + + (u32)(entry_sym->vaddr - entry_sec->vaddr); + + /* ---- pass 5: apply all relocations into bucket bytes ---- */ + coff_apply_all_relocs(img, out, map, have_imports ? &import_vaddr : NULL); + + /* ---- pass 6: compute SizeOfImage (in-memory size) ---- */ + u32 image_size = 0; + for (b = 0; b < COFF_NBUCKETS; ++b) { + if (!out[b].in_image) continue; + u32 end = out[b].rva + out[b].size; + if (end > image_size) image_size = end; + } + image_size = (u32)ALIGN_UP((u64)image_size, (u64)PE_SECTION_ALIGNMENT); + + /* ---- pass 7: write everything ---- */ + u16 file_chars = IMAGE_FILE_EXECUTABLE_IMAGE | IMAGE_FILE_LARGE_ADDRESS_AWARE; + if (!img->pie || !out[COFF_BUCKET_RELOC].in_image) { + file_chars |= IMAGE_FILE_RELOCS_STRIPPED; + } + + coff_write_dos_stub(w); + /* PE signature. */ + coff_wr_u32(w, IMAGE_NT_SIGNATURE); + coff_write_file_header(w, machine, (u16)nsec, file_chars); + u16 subsystem = img->linker ? img->linker->pe_subsystem : 0; + coff_write_optional_header(w, entry_rva, out, headers_size_padded, image_size, + img->pie, subsystem, + have_imports ? &imports : NULL, &tls); + + /* Section table. */ + for (b = 0; b < COFF_NBUCKETS; ++b) { + if (!out[b].in_image) continue; + coff_write_section_header(w, out[b].name, out[b].size, out[b].rva, + out[b].size_raw, out[b].file_offset, + out[b].characteristics); + } + + /* Pad to first section's file offset. */ + u64 cur = (u64)headers_size_unpadded; + u64 first_file_off = headers_size_padded; + if (cur < first_file_off) { + coff_write_zeroes(w, first_file_off - cur); + cur = first_file_off; + } + + /* Section bodies. */ + for (b = 0; b < COFF_NBUCKETS; ++b) { + if (!out[b].in_image) continue; + if (!out[b].has_file_bytes) continue; + if (cur < out[b].file_offset) { + coff_write_zeroes(w, out[b].file_offset - cur); + cur = out[b].file_offset; + } + cfree_writer_write(w, out[b].bytes, out[b].size); + cur += out[b].size; + if (out[b].size_raw > out[b].size) { + coff_write_zeroes(w, out[b].size_raw - out[b].size); + cur += out[b].size_raw - out[b].size; + } + } + + /* ---- cleanup ---- */ + for (b = 0; b < COFF_NBUCKETS; ++b) { + if (out[b].bytes) heap->free(heap, out[b].bytes, bucket_caps[b]); + } + heap->free(heap, map, sizeof(CoffSecMap) * (img->nsections + 1u)); + if (have_imports) { + coff_import_vaddr_free(img, &import_vaddr); + coff_imports_free(img, &imports); + } +} diff --git a/src/obj/coff/read.c b/src/obj/coff/read.c @@ -0,0 +1,739 @@ +/* PE/COFF .obj (IMAGE_FILE_HEADER + sections) reader. Parses a 64-bit + * little-endian relocatable object back into a fresh ObjBuilder. Peer + * of read_elf / read_macho; the post-finalize ObjBuilder shape is the + * canonical superset doc/DESIGN.md §5.5 promises: read_coff of an + * emit_coff output produces an ObjBuilder shape-equivalent to the + * writer's input, modulo synthesized SECTION symbols and the COMDAT + * section-definition aux records. + * + * Scope: IMAGE_FILE_MACHINE_AMD64 and IMAGE_FILE_MACHINE_ARM64. PE + * executables (with a non-zero SizeOfOptionalHeader) are rejected — a + * future read_coff_pe would handle those. Microsoft "short import" + * records (Sig1=0, Sig2=0xFFFF) found inside .lib archive members are + * detected at entry and dispatched to read_coff_short_import, which + * synthesizes a DSO-shaped ObjBuilder annotated with the providing + * DLL name via obj_set_coff_import_dll. */ + +#include <string.h> + +#include "core/arena.h" +#include "core/heap.h" +#include "core/pool.h" +#include "core/slice.h" +#include "obj/coff/coff.h" +#include "obj/format.h" + +static SrcLoc no_loc(void) { + SrcLoc l = {0, 0, 0}; + return l; +} + +/* ---- section-header scratch ---- */ + +typedef struct CSecRec { + char raw_name[8]; + u32 virtual_size; + u32 size_of_raw_data; + u32 pointer_to_raw_data; + u32 pointer_to_relocations; + u16 number_of_relocations; + u32 characteristics; + ObjSecId obj_sec; /* OBJ_SEC_NONE if skipped */ +} CSecRec; + +static void parse_shdr(const u8* p, CSecRec* out) { + memcpy(out->raw_name, p, 8); + out->virtual_size = coff_rd_u32(p + 8); + out->size_of_raw_data = coff_rd_u32(p + 16); + out->pointer_to_raw_data = coff_rd_u32(p + 20); + out->pointer_to_relocations = coff_rd_u32(p + 24); + out->number_of_relocations = coff_rd_u16(p + 32); + out->characteristics = coff_rd_u32(p + 36); + out->obj_sec = OBJ_SEC_NONE; +} + +/* ---- string-table lookup (4-byte size prefix, NUL-terminated entries) ---- */ + +static const char* strtab_lookup(const u8* tab, u32 tab_size, u32 off, + u32* len_out) { + if (off >= tab_size) { + *len_out = 0; + return ""; + } + const char* s = (const char*)(tab + off); + u32 max = tab_size - off; + u32 n = 0; + while (n < max && s[n] != '\0') ++n; + *len_out = n; + return s; +} + +/* Resolve a section/symbol short-or-long name into (ptr, len). COFF + * section names use the "/<decimal>" convention for >8-byte names; COFF + * symbol names use the (Zeroes==0, Offset) form instead. This helper + * handles the section form (8 raw bytes; leading '/' triggers strtab + * lookup). */ +static void resolve_section_name(const char raw[8], const u8* strtab, + u32 strtab_size, const char** name_out, + u32* len_out) { + if (raw[0] == '/') { + /* Parse decimal offset. Up to 7 ASCII digits. */ + u32 off = 0; + for (u32 i = 1; i < 8 && raw[i] >= '0' && raw[i] <= '9'; ++i) { + off = off * 10u + (u32)(raw[i] - '0'); + } + *name_out = strtab_lookup(strtab, strtab_size, off, len_out); + return; + } + /* Inline: up to 8 bytes, NUL-padded (not necessarily NUL-terminated). */ + u32 n = 0; + while (n < 8 && raw[n] != '\0') ++n; + *name_out = raw; + *len_out = n; +} + +/* ---- characteristics -> SecKind / SecFlag / SecSem ---- */ + +static u16 coff_sec_kind(const char* name, u32 nlen, u32 ch) { + if (ch & IMAGE_SCN_CNT_UNINITIALIZED_DATA) return SEC_BSS; + if (ch & IMAGE_SCN_CNT_CODE) return SEC_TEXT; + if (ch & IMAGE_SCN_MEM_EXECUTE) return SEC_TEXT; + if (nlen >= 7 && memcmp(name, ".debug_", 7) == 0) return SEC_DEBUG; + /* The MS toolchain spells DWARF section names with a leading ".debug$" + * (CodeView) — keep ELF-style ".debug_" detection but also treat the + * MS form as debug. */ + if (nlen >= 7 && memcmp(name, ".debug$", 7) == 0) return SEC_DEBUG; + if (ch & IMAGE_SCN_CNT_INITIALIZED_DATA) { + if (ch & IMAGE_SCN_MEM_WRITE) return SEC_DATA; + return SEC_RODATA; + } + return SEC_OTHER; +} + +static u16 coff_sec_flags(const char* name, u32 nlen, u32 ch) { + u16 f = 0; + if (ch & IMAGE_SCN_MEM_READ) f |= SF_ALLOC; + if (ch & IMAGE_SCN_MEM_EXECUTE) f |= SF_EXEC; + if (ch & IMAGE_SCN_MEM_WRITE) f |= SF_WRITE; + if (ch & IMAGE_SCN_LNK_COMDAT) f |= SF_GROUP; + /* TLS sections in PE are spelled ".tls$<suffix>" (e.g. ".tls$", ".tls$ZZZ"). + * There is no characteristics bit for TLS — detection is name-based. */ + if (nlen >= 5 && memcmp(name, ".tls$", 5) == 0) f |= SF_TLS; + if (nlen == 4 && memcmp(name, ".tls", 4) == 0) f |= SF_TLS; + return f; +} + +/* Bits 20..23 of Characteristics encode alignment as (log2(align)+1). + * 0 means "default"; we collapse to align=1 for round-trip purposes. */ +static u32 coff_sec_align(u32 ch) { + u32 n = (ch & IMAGE_SCN_ALIGN_MASK) >> 20; + if (n == 0) return 1; + return 1u << (n - 1u); +} + +/* ---- symbol-name resolution ---- */ + +static void resolve_sym_name(const u8* rec, const u8* strtab, u32 strtab_size, + const char** name_out, u32* len_out) { + /* ShortName: 8 bytes. If first 4 bytes are zero, second 4 bytes is + * the strtab offset (LongName form). */ + u32 z = coff_rd_u32(rec + 0); + if (z == 0) { + u32 off = coff_rd_u32(rec + 4); + *name_out = strtab_lookup(strtab, strtab_size, off, len_out); + return; + } + u32 n = 0; + while (n < 8 && rec[n] != '\0') ++n; + *name_out = (const char*)rec; + *len_out = n; +} + +/* ---- short-import record handler ---- + * Microsoft "short import" format: a 20-byte ImportObjectHeader + * followed by SizeOfData bytes containing two NUL-terminated strings — + * the imported symbol name then the DLL name. These live as members + * of .lib archives (mingw's libkernel32.dll.a etc.) and stand in for + * a full long-form COFF import object. + * + * cfree-side model: synthesize a DSO-shaped ObjBuilder with the + * imported symbol defined at section_id = OBJ_SEC_NONE (the same + * shape read_coff_dso / read_elf_dso produce for an exported name), + * and stash the providing DLL name on the builder via + * obj_set_coff_import_dll so the archive-ingestion layer can route + * the resulting LinkInput as a DSO with this name as the soname. + * + * We also synthesize the `__imp_<name>` alias mingw codegen uses to + * spell explicit IAT-slot access; both names ultimately resolve to + * the same DLL export at link time. */ +static ObjBuilder* read_coff_short_import(Compiler* c, const char* name, + const u8* data, size_t len) { + if (len < COFF_IMPORT_OBJECT_HEADER_SIZE) + compiler_panic(c, no_loc(), + "read_coff: short-import record shorter than header"); + + /* Sig1 / Sig2 already checked by the caller. */ + /* data + 4: Version (2 bytes, ignored). */ + u16 machine = coff_rd_u16(data + 6); + /* data + 8: TimeDateStamp (4 bytes, ignored). */ + u32 size_of_data = coff_rd_u32(data + 12); + u16 ordinal_or_hint = coff_rd_u16(data + 16); + u16 type_flags = coff_rd_u16(data + 18); + + if ((u64)COFF_IMPORT_OBJECT_HEADER_SIZE + (u64)size_of_data > (u64)len) + compiler_panic(c, no_loc(), + "read_coff: short-import SizeOfData=%u extends past input " + "(len=%zu)", + size_of_data, len); + + if (machine != IMAGE_FILE_MACHINE_AMD64 && + machine != IMAGE_FILE_MACHINE_ARM64) + compiler_panic(c, no_loc(), + "read_coff: short-import unsupported machine %#x", + (u32)machine); + + /* Decode TypeFlags bitfield (Type:2, NameType:3, Reserved:11). */ + u32 import_type = (u32)(type_flags & 0x3u); + u32 name_type = (u32)((type_flags >> 2) & 0x7u); + + /* Ordinal-only imports (NameType=IMPORT_OBJECT_ORDINAL) are not yet + * implemented in cfree. None of the mingw / llvm-mingw system import + * archives use this shape — every libfoo.a member in the supported + * sysroots imports by name — so refusing here is a clean diagnostic, + * not an internal panic. When a real consumer surfaces, the work is + * to thread the ordinal through link_resolve and into the PE import + * directory hint/name tables. */ + if (name_type == IMPORT_OBJECT_ORDINAL) + compiler_panic( + c, no_loc(), + "read_coff: short-import by ordinal not implemented " + "(archive member \"%.*s\", ordinal %u). cfree links " + "imports by name only; rebuild the consumer to import " + "by name, or omit this archive from the link.", + SLICE_ARG(name ? slice_from_cstr(name) : SLICE_LIT("<unnamed>")), + (unsigned)ordinal_or_hint); + + /* Symbol name: NUL-terminated starting at data + 20. */ + const u8* body = data + COFF_IMPORT_OBJECT_HEADER_SIZE; + u32 sym_name_max = size_of_data; + u32 sym_name_len = 0; + while (sym_name_len < sym_name_max && body[sym_name_len] != '\0') + ++sym_name_len; + if (sym_name_len == sym_name_max) + compiler_panic(c, no_loc(), + "read_coff: short-import symbol name not NUL-terminated"); + + /* DLL name: NUL-terminated starting after the symbol name's NUL. */ + u32 dll_name_off = sym_name_len + 1u; + if (dll_name_off >= size_of_data) + compiler_panic(c, no_loc(), "read_coff: short-import missing DLL name"); + const u8* dll_p = body + dll_name_off; + u32 dll_name_max = size_of_data - dll_name_off; + u32 dll_name_len = 0; + while (dll_name_len < dll_name_max && dll_p[dll_name_len] != '\0') + ++dll_name_len; + if (dll_name_len == dll_name_max) + compiler_panic(c, no_loc(), + "read_coff: short-import DLL name not NUL-terminated"); + + ObjBuilder* ob = obj_new(c); + if (!ob) compiler_panic(c, no_loc(), "read_coff: obj_new failed"); + + /* Pick SymKind by import type: CODE -> function, DATA/CONST -> object. + * Both are defined at section_id=OBJ_SEC_NONE, value=0, size=0 — the + * shape read_coff_dso would produce for a DLL export. */ + SymKind k = (import_type == IMPORT_OBJECT_CODE) ? SK_FUNC : SK_OBJ; + + Sym sn = pool_intern_slice( + c->global, (Slice){.s = (const char*)body, .len = sym_name_len}); + ObjSymId id = + obj_symbol_ex(ob, sn, SB_GLOBAL, SV_DEFAULT, k, OBJ_SEC_NONE, 0, 0, 0); + obj_sym_mark_referenced(ob, id); + + /* `__imp_<name>` alias for codegen that refers to the IAT slot + * directly (mingw convention). Even code imports use an object-like + * `__imp_` symbol because references to it want the IAT data slot, not + * the callable import stub. */ + static const char kImpPrefix[] = "__imp_"; + u32 imp_len = (u32)(sizeof kImpPrefix - 1u) + sym_name_len; + char* imp_buf = arena_array(c->scratch, char, imp_len); + memcpy(imp_buf, kImpPrefix, sizeof kImpPrefix - 1u); + memcpy(imp_buf + (sizeof kImpPrefix - 1u), body, sym_name_len); + Sym imp_sn = + pool_intern_slice(c->global, (Slice){.s = imp_buf, .len = imp_len}); + ObjSymId imp_id = obj_symbol_ex(ob, imp_sn, SB_GLOBAL, SV_DEFAULT, SK_OBJ, + OBJ_SEC_NONE, 0, 0, 0); + obj_sym_mark_referenced(ob, imp_id); + + /* Stash the DLL name so the archive-ingestion layer (Phase 4.3) can + * route this builder as a DSO with the DLL as soname. */ + Sym dll_sn = pool_intern_slice( + c->global, (Slice){.s = (const char*)dll_p, .len = dll_name_len}); + obj_set_coff_import_dll(ob, dll_sn); + + (void)name_type; + obj_finalize(ob); + return ob; +} + +ObjBuilder* read_coff(Compiler* c, const char* name, const u8* data, + size_t len) { + (void)name; + + /* ---- Step 0: header validation ---- */ + if (len < COFF_FILE_HEADER_SIZE) + compiler_panic(c, no_loc(), "read_coff: input shorter than COFF header"); + + /* Microsoft short-import record? (Sig1=0, Sig2=0xFFFF.) These live + * as members of .lib archives and stand in for a long-form import + * object. Detect at entry; the rest of read_coff assumes the + * input is a real IMAGE_FILE_HEADER. */ + if (len >= 4 && coff_rd_u16(data + 0) == IMPORT_OBJECT_HDR_SIG1 && + coff_rd_u16(data + 2) == IMPORT_OBJECT_HDR_SIG2) { + return read_coff_short_import(c, name, data, len); + } + + u16 machine = coff_rd_u16(data + 0); + u16 nsections = coff_rd_u16(data + 2); + /* data + 4: TimeDateStamp (4 bytes, ignored). */ + u32 ptr_to_symtab = coff_rd_u32(data + 8); + u32 nsymbols = coff_rd_u32(data + 12); + u16 size_opt_hdr = coff_rd_u16(data + 16); + /* data + 18: Characteristics (2 bytes, currently ignored). */ + + if (size_opt_hdr != 0) + compiler_panic(c, no_loc(), + "read_coff: input has optional header (size=%u); " + "use read_coff_pe for executables", + (u32)size_opt_hdr); + + if (machine != IMAGE_FILE_MACHINE_AMD64 && + machine != IMAGE_FILE_MACHINE_ARM64 && + machine != IMAGE_FILE_MACHINE_ARM64EC) + compiler_panic(c, no_loc(), "read_coff: unsupported machine %#x", + (u32)machine); + + const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_COFF); + const ObjCoffArchOps* coff = + fmt && fmt->coff_machine ? fmt->coff_machine(machine) : NULL; + if (!coff || !coff->reloc_from) + compiler_panic(c, no_loc(), "read_coff: no arch impl for machine %#x", + (u32)machine); + u32 (*reloc_from)(u32) = coff->reloc_from; + + if ((u64)COFF_FILE_HEADER_SIZE + + (u64)nsections * (u64)COFF_SECTION_HEADER_SIZE > + (u64)len) + compiler_panic(c, no_loc(), "read_coff: section header table out of range"); + + /* ---- Step 1: bootstrap, locate strtab ---- */ + /* Strtab is at PointerToSymbolTable + NumberOfSymbols * 18. When the + * file has no symbol table (ptr=0, n=0) we treat strtab as empty. */ + const u8* strtab = NULL; + u32 strtab_size = 0; + if (ptr_to_symtab && nsymbols) { + u64 symtab_end = (u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYMBOL_SIZE; + if (symtab_end + COFF_STRTAB_SIZE_FIELD_BYTES > (u64)len) + compiler_panic(c, no_loc(), + "read_coff: symbol table / strtab header out of range"); + u32 declared = coff_rd_u32(data + symtab_end); + /* The size field is inclusive of the 4-byte prefix; treat <4 as + * "empty" (some tools write 0). */ + if (declared < COFF_STRTAB_SIZE_FIELD_BYTES) declared = 0; + if (declared) { + if (symtab_end + (u64)declared > (u64)len) + compiler_panic(c, no_loc(), "read_coff: strtab body out of range"); + strtab = data + symtab_end; + strtab_size = declared; + } else { + strtab = data + symtab_end; + strtab_size = COFF_STRTAB_SIZE_FIELD_BYTES; + } + } + + ObjBuilder* ob = obj_new(c); + if (!ob) compiler_panic(c, no_loc(), "read_coff: obj_new failed"); + + /* ---- Step 2: ingest sections ---- */ + CSecRec* secs = arena_array(c->scratch, CSecRec, nsections ? nsections : 1); + const u8* shdr_base = data + COFF_FILE_HEADER_SIZE; + for (u32 i = 0; i < nsections; ++i) { + CSecRec* s = &secs[i]; + parse_shdr(shdr_base + (u64)i * COFF_SECTION_HEADER_SIZE, s); + + const char* nm; + u32 nlen; + resolve_section_name(s->raw_name, strtab, strtab_size, &nm, &nlen); + Sym sn = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}); + + u16 kind = coff_sec_kind(nm, nlen, s->characteristics); + u16 flags = coff_sec_flags(nm, nlen, s->characteristics); + u32 align = coff_sec_align(s->characteristics); + + int is_bss = (s->characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA) != 0; + u16 sem = is_bss ? SSEM_NOBITS : SSEM_PROGBITS; + + ObjSecId id = obj_section_ex(ob, sn, (SecKind)kind, (SecSem)sem, flags, + align, 0u, 0u, 0u); + if (id == OBJ_SEC_NONE) + compiler_panic(c, no_loc(), + "read_coff: obj_section_ex failed for section %u", i); + s->obj_sec = id; + + /* Preserve raw Characteristics so emit_coff can write back any bits + * the canonical SecFlag/SecSem mapping doesn't model (LNK_INFO, + * LNK_REMOVE, MEM_DISCARDABLE, MEM_SHARED, GPREL, alignment nibble). */ + obj_section_set_ext(ob, id, OBJ_EXT_COFF, s->characteristics, 0); + + if (is_bss) { + u32 bss_size = s->virtual_size ? s->virtual_size : s->size_of_raw_data; + obj_reserve_bss(ob, id, bss_size, align); + } else if (s->size_of_raw_data) { + u64 end = (u64)s->pointer_to_raw_data + (u64)s->size_of_raw_data; + if (end > (u64)len) + compiler_panic(c, no_loc(), "read_coff: section %u bytes out of range", + i); + u8* dst = obj_reserve(ob, id, s->size_of_raw_data); + memcpy(dst, data + s->pointer_to_raw_data, s->size_of_raw_data); + } + } + + /* ---- Step 3: ingest symbols (with aux-record awareness) ---- + * sym_to_obj is indexed by RAW symbol-table index (including aux + * slots), so reloc.SymbolTableIndex resolves directly without + * adjusting for skipped aux records. Aux slots map to OBJ_SYM_NONE. */ + ObjSymId* sym_to_obj = + arena_zarray(c->scratch, ObjSymId, nsymbols ? nsymbols : 1); + + /* Track section-symbol primary symtab index per section, stored as + * (raw_index + 1) so 0 can mean "not seen yet" without colliding + * with the (legitimate) first symbol-table slot — emit_coff always + * lays the first section's section-symbol at index 0. */ + u32* sec_sym_primary = arena_zarray(c->scratch, u32, nsections + 1u); + + const u8* sym_base = data + ptr_to_symtab; + if (nsymbols) { + if ((u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYMBOL_SIZE > (u64)len) + compiler_panic(c, no_loc(), "read_coff: symbol table body out of range"); + } + + for (u32 i = 0; i < nsymbols;) { + const u8* p = sym_base + (u64)i * COFF_SYMBOL_SIZE; + const char* nm; + u32 nlen; + resolve_sym_name(p, strtab, strtab_size, &nm, &nlen); + + u32 value = coff_rd_u32(p + 8); + i16 sec_num = (i16)coff_rd_u16(p + 12); + u16 type = coff_rd_u16(p + 14); + u8 sclass = p[16]; + u8 naux = p[17]; + + /* FILE storage class: concatenate aux records' raw bytes (each + * 18 bytes, NUL-padded) for the source-file name. */ + if (sclass == IMAGE_SYM_CLASS_FILE) { + /* Build name from aux records (up to naux*18 bytes); fall back + * to the primary record's name if naux==0. */ + const char* fnm = nm; + u32 fnlen = nlen; + if (naux) { + /* Each aux record's 18 bytes are interpreted as raw file-name + * bytes; concatenate then trim trailing NULs. */ + u32 total = (u32)naux * COFF_SYMBOL_SIZE; + if ((u64)i + 1u + (u64)naux > (u64)nsymbols) + compiler_panic(c, no_loc(), + "read_coff: FILE aux records extend past symbol " + "table"); + const u8* aux = p + COFF_SYMBOL_SIZE; + u32 n = 0; + while (n < total && aux[n] != '\0') ++n; + fnm = (const char*)aux; + fnlen = n; + } + Sym fsn = + fnlen ? pool_intern_slice(c->global, (Slice){.s = fnm, .len = fnlen}) + : 0; + ObjSymId id = obj_symbol_ex(ob, fsn, SB_LOCAL, SV_DEFAULT, SK_FILE, + OBJ_SEC_NONE, 0, 0, 0); + obj_sym_mark_referenced(ob, id); + sym_to_obj[i] = id; + i += 1u + naux; + continue; + } + + /* Skip .bf/.ef debug pair primaries (FUNCTION storage class) and + * the END_OF_FUNCTION marker: they carry no symbol cfree models. */ + if (sclass == IMAGE_SYM_CLASS_FUNCTION || + sclass == IMAGE_SYM_CLASS_END_OF_FUNCTION) { + sym_to_obj[i] = OBJ_SYM_NONE; + i += 1u + naux; + continue; + } + + /* Resolve (bind, vis, kind, section_id, value, size, cmnalign). */ + SymBind bind = SB_LOCAL; + SymVis vis = SV_DEFAULT; + SymKind kind = SK_NOTYPE; + ObjSecId target_sec = OBJ_SEC_NONE; + u64 sym_value = 0; + u64 sym_size = 0; + u64 cmnalign = 0; + + if (sec_num == IMAGE_SYM_UNDEFINED) { + /* Undef or common. EXTERNAL with Value > 0 is a common. */ + if (sclass == IMAGE_SYM_CLASS_EXTERNAL && value > 0) { + bind = SB_GLOBAL; + kind = SK_COMMON; + sym_size = value; + cmnalign = 1; /* COFF doesn't carry per-common alignment */ + } else { + bind = (sclass == IMAGE_SYM_CLASS_WEAK_EXTERNAL) ? SB_WEAK + : (sclass == IMAGE_SYM_CLASS_EXTERNAL) ? SB_GLOBAL + : SB_LOCAL; + kind = SK_UNDEF; + } + } else if (sec_num == IMAGE_SYM_ABSOLUTE) { + kind = SK_ABS; + sym_value = value; + bind = (sclass == IMAGE_SYM_CLASS_EXTERNAL) ? SB_GLOBAL : SB_LOCAL; + } else if (sec_num == IMAGE_SYM_DEBUG) { + /* Defined-in-debug — cfree has no model for it. Skip with an + * OBJ_SYM_NONE entry; relocations against this slot will resolve + * to OBJ_SYM_NONE, which obj_reloc_ex tolerates. */ + sym_to_obj[i] = OBJ_SYM_NONE; + i += 1u + naux; + continue; + } else if (sec_num >= 1 && (u32)sec_num <= nsections) { + target_sec = secs[sec_num - 1].obj_sec; + sym_value = value; + switch (sclass) { + case IMAGE_SYM_CLASS_EXTERNAL: + bind = SB_GLOBAL; + break; + case IMAGE_SYM_CLASS_WEAK_EXTERNAL: + bind = SB_WEAK; + break; + case IMAGE_SYM_CLASS_STATIC: + case IMAGE_SYM_CLASS_LABEL: + default: + bind = SB_LOCAL; + break; + } + + /* Detect SECTION symbols: STATIC, Value==0, name matches the + * section's own name, and the section has at least one aux + * record (the section-definition aux). Mark as SK_SECTION so + * emit_coff regenerates the synthetic entry. */ + int is_section_sym = 0; + if (sclass == IMAGE_SYM_CLASS_STATIC && value == 0 && naux >= 1) { + const CSecRec* cs = &secs[sec_num - 1]; + u32 raw_nlen = 0; + while (raw_nlen < 8 && cs->raw_name[raw_nlen] != '\0') ++raw_nlen; + if (raw_nlen == nlen && memcmp(cs->raw_name, nm, nlen) == 0) { + is_section_sym = 1; + } else if (cs->raw_name[0] == '/') { + /* Long-named section: compare the resolved name. */ + const char* rn; + u32 rnlen; + resolve_section_name(cs->raw_name, strtab, strtab_size, &rn, &rnlen); + if (rnlen == nlen && memcmp(rn, nm, nlen) == 0) is_section_sym = 1; + } + } + + if (is_section_sym) { + kind = SK_SECTION; + sec_sym_primary[sec_num] = i + 1u; + } else if (sclass == IMAGE_SYM_CLASS_SECTION) { + kind = SK_SECTION; + } else if (sclass == IMAGE_SYM_CLASS_LABEL) { + kind = SK_NOTYPE; + } else if ((type >> 8) == IMAGE_SYM_DTYPE_FUNCTION) { + kind = SK_FUNC; + } else if (type == IMAGE_SYM_TYPE_NULL) { + kind = (bind == SB_LOCAL) ? SK_NOTYPE : SK_OBJ; + } else { + kind = SK_OBJ; + } + } else { + compiler_panic(c, no_loc(), + "read_coff: symbol section number %d out of range", + (int)sec_num); + } + + /* WEAK_EXTERNAL primary: aux record carries TagIndex + Characteristics. + * cfree's model has SB_WEAK; the fallback symbol is link-time + * resolution by name and we drop the explicit index. */ + if (sclass == IMAGE_SYM_CLASS_WEAK_EXTERNAL) bind = SB_WEAK; + + Sym sn = + nlen ? pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}) : 0; + ObjSymId id = obj_symbol_ex(ob, sn, bind, vis, kind, target_sec, sym_value, + sym_size, cmnalign); + obj_sym_mark_referenced(ob, id); + sym_to_obj[i] = id; + i += 1u + naux; + } + + /* ---- Step 4: stitch COMDAT groups from section-definition aux ---- + * Each COMDAT section has a STATIC primary symbol (the section + * symbol) followed by one section-definition aux record. Selection + * != 0 marks the section as a COMDAT member; the signature symbol + * is the section symbol itself (Number field's selection variant + * controls dedup policy at link time). */ + for (u32 s = 1; s <= nsections; ++s) { + u32 prim_plus1 = sec_sym_primary[s]; + if (!prim_plus1) continue; + u32 prim = prim_plus1 - 1u; + const CSecRec* cs = &secs[s - 1]; + if (!(cs->characteristics & IMAGE_SCN_LNK_COMDAT)) continue; + const u8* p = sym_base + (u64)prim * COFF_SYMBOL_SIZE; + u8 naux = p[17]; + if (!naux) continue; + const u8* aux = p + COFF_SYMBOL_SIZE; + /* Aux layout: Length(4), NumberOfRelocations(2), NumberOfLinenumbers(2), + * CheckSum(4), Number(2), Selection(1), Unused(3). */ + u16 assoc_number = coff_rd_u16(aux + 12); + u8 selection = aux[14]; + if (selection == 0) continue; + + ObjSymId sig = sym_to_obj[prim]; + const ObjSym* sigsym = obj_symbol_get(ob, sig); + Sym gname = sigsym ? sigsym->name : 0; + ObjGroupId gid = obj_group(ob, gname, sig, (u32)selection); + obj_group_add_section(ob, gid, cs->obj_sec); + obj_section_set_group(ob, cs->obj_sec, gid); + + /* ASSOCIATIVE: the COMDAT member is associated with another + * section's group. Add this section to that group's list too so + * dead-strip keeps them paired. */ + if (selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE && assoc_number >= 1 && + (u32)assoc_number <= nsections) { + u32 other_prim_plus1 = sec_sym_primary[assoc_number]; + if (other_prim_plus1) { + u32 other_prim = other_prim_plus1 - 1u; + const u8* op = sym_base + (u64)other_prim * COFF_SYMBOL_SIZE; + if (op[17]) { + const u8* oaux = op + COFF_SYMBOL_SIZE; + u8 osel = oaux[14]; + if (osel != 0) { + ObjSymId osig = sym_to_obj[other_prim]; + const ObjSym* osigsym = obj_symbol_get(ob, osig); + Sym ogname = osigsym ? osigsym->name : 0; + ObjGroupId ogid = obj_group(ob, ogname, osig, (u32)osel); + obj_group_add_section(ob, ogid, cs->obj_sec); + } + } + } + } + } + + /* ---- Step 5: per-section relocations ---- */ + for (u32 i = 0; i < nsections; ++i) { + const CSecRec* s = &secs[i]; + if (!s->number_of_relocations) continue; + u64 reloc_end = (u64)s->pointer_to_relocations + + (u64)s->number_of_relocations * (u64)COFF_RELOC_SIZE; + if (reloc_end > (u64)len) + compiler_panic(c, no_loc(), + "read_coff: relocation table for section %u out of range", + i); + const u8* rbase = data + s->pointer_to_relocations; + for (u32 j = 0; j < s->number_of_relocations; ++j) { + const u8* rp = rbase + (u64)j * COFF_RELOC_SIZE; + u32 r_va = coff_rd_u32(rp + 0); + u32 r_sym = coff_rd_u32(rp + 4); + u16 r_type = coff_rd_u16(rp + 8); + + u32 kind = reloc_from(r_type); + if (kind == (u32)-1) + compiler_panic(c, no_loc(), + "read_coff: unsupported reloc type %u for machine %#x", + (u32)r_type, (u32)machine); + + ObjSymId target = OBJ_SYM_NONE; + if (r_sym < nsymbols) target = sym_to_obj[r_sym]; + + /* AMD64 REL32 encodings are relative to a PC after the relocated + * field, while cfree's R_PC32-style apply formula subtracts the + * relocation field address P. Plain REL32 is relative to P+4; + * REL32_N is relative to P+N. Record that convention as an + * implicit negative addend so link_reloc_apply can stay format + * neutral. */ + /* ARM64 PAGEOFFSET_12L is one wire code for LDST{8,16,32,64,128}. + * The per-arch translator returns R_AARCH64_LDST64_ABS_LO12_NC by + * default; recover the actual access width from the patched LDR/ + * STR instruction's size field at bits [31:30] (and a SIMD/FP + * extension via bit 26 + opc[23]) so the linker applies the right + * scale. Mismatch panics at apply-time with "misaligned + * address" otherwise — see link_reloc.c. */ + if ((machine == IMAGE_FILE_MACHINE_ARM64 || + machine == IMAGE_FILE_MACHINE_ARM64EC) && + r_type == IMAGE_REL_ARM64_PAGEOFFSET_12L && s->size_of_raw_data && + (u64)r_va + 4u <= (u64)s->size_of_raw_data) { + const u8* ibytes = data + s->pointer_to_raw_data + r_va; + u32 instr = (u32)ibytes[0] | ((u32)ibytes[1] << 8) | + ((u32)ibytes[2] << 16) | ((u32)ibytes[3] << 24); + u32 sz = (instr >> 30) & 0x3u; + int is_simd = (instr >> 26) & 0x1u; + if (is_simd && ((instr >> 23) & 0x1u)) { + kind = R_AARCH64_LDST128_ABS_LO12_NC; + } else { + switch (sz) { + case 0: + kind = R_AARCH64_LDST8_ABS_LO12_NC; + break; + case 1: + kind = R_AARCH64_LDST16_ABS_LO12_NC; + break; + case 2: + kind = R_AARCH64_LDST32_ABS_LO12_NC; + break; + default: + kind = R_AARCH64_LDST64_ABS_LO12_NC; + break; + } + } + } + + i64 addend = 0; + int has_explicit = 0; + if (machine == IMAGE_FILE_MACHINE_AMD64) { + switch (r_type) { + case IMAGE_REL_AMD64_REL32: + addend = -4; + has_explicit = 1; + break; + case IMAGE_REL_AMD64_REL32_1: + addend = -1; + has_explicit = 1; + break; + case IMAGE_REL_AMD64_REL32_2: + addend = -2; + has_explicit = 1; + break; + case IMAGE_REL_AMD64_REL32_3: + addend = -3; + has_explicit = 1; + break; + case IMAGE_REL_AMD64_REL32_4: + addend = -4; + has_explicit = 1; + break; + case IMAGE_REL_AMD64_REL32_5: + addend = -5; + has_explicit = 1; + break; + default: + break; + } + } + + obj_reloc_ex(ob, s->obj_sec, r_va, (RelocKind)kind, target, addend, + has_explicit, 0); + } + } + + /* ---- Step 6: finalize and return ---- */ + obj_finalize(ob); + return ob; +} diff --git a/src/obj/coff/read_dso.c b/src/obj/coff/read_dso.c @@ -0,0 +1,237 @@ +/* PE32+ DLL reader. Peer of read_elf_dso / read_macho_dso: walks the + * IMAGE_DIRECTORY_ENTRY_EXPORT data directory of a Windows .dll and + * produces an ObjBuilder of defined OBJ_SEC_NONE symbols — one per + * name in the Export Name Table. The DLL's own Name string (the + * analogue of DT_SONAME / LC_ID_DYLIB) is returned via *soname_out. + * + * The produced ObjBuilder carries no sections, relocations, or groups + * — DSO inputs contribute no bytes to the link. The consumer's + * resolve_undefs pass sees the exports as defined globals and marks + * matching consumer-side undefs as `imported`; the import-table + * emitter (Phase 3 / 4.4) later groups them by providing DLL. + * + * Scope: PE32+ only (IMAGE_NT_OPTIONAL_HDR64_MAGIC), AMD64 or ARM64, + * with IMAGE_FILE_DLL set. Ordinal-only exports (entries present in + * the EAT but absent from the ENT) are not synthesized in v1 — almost + * all real-world imports are by name. Forwarder entries (EAT RVA + * falls within the export directory's own range) are still emitted as + * symbols so the linker can satisfy imports against them; the OS + * loader follows the forwarder chain at runtime. This contract is + * pinned by test/coff/pe-dso-forwarder.c. */ + +#include <string.h> + +#include "core/arena.h" +#include "core/heap.h" +#include "core/pool.h" +#include "core/slice.h" +#include "obj/coff/coff.h" + +static SrcLoc no_loc(void) { + SrcLoc l = {0, 0, 0}; + return l; +} + +/* ---- RVA -> file offset ---- + * Walks the section table once per call. Returns 1 on success and + * fills *off_out; returns 0 if the RVA falls outside every section's + * [VirtualAddress, VirtualAddress + max(VirtualSize, SizeOfRawData)) + * range or the resulting file offset would exceed `len`. */ +static int rva_to_offset(const u8* shdrs, u16 nsec, u32 rva, size_t len, + u64* off_out) { + for (u16 i = 0; i < nsec; ++i) { + const u8* sh = shdrs + (u64)i * COFF_SECTION_HEADER_SIZE; + u32 vsize = coff_rd_u32(sh + 8); + u32 vaddr = coff_rd_u32(sh + 12); + u32 raw_size = coff_rd_u32(sh + 16); + u32 raw_ptr = coff_rd_u32(sh + 20); + /* Some linkers leave VirtualSize == 0 in objects; use raw_size as + * a fallback so we still resolve RVAs in well-formed images. */ + u32 span = vsize ? vsize : raw_size; + if (rva >= vaddr && rva < vaddr + span) { + u64 delta = (u64)(rva - vaddr); + if (delta >= raw_size) return 0; /* RVA past on-disk data */ + u64 off = (u64)raw_ptr + delta; + if (off >= len) return 0; + *off_out = off; + return 1; + } + } + return 0; +} + +/* Read a NUL-terminated string starting at `off`, bounded by `len`. + * Returns the string length (excluding NUL); writes the pointer to + * *out. Returns 0 if off is out of range or the string is not + * terminated within the file. */ +static u32 read_cstr(const u8* data, size_t len, u64 off, const char** out) { + if (off >= len) { + *out = ""; + return 0; + } + const char* s = (const char*)(data + off); + u64 max = (u64)len - off; + u64 n = 0; + while (n < max && s[n] != '\0') ++n; + if (n == max) { + *out = ""; + return 0; + } /* unterminated */ + *out = s; + return (u32)n; +} + +ObjBuilder* read_coff_dso(Compiler* c, const char* name, const u8* data, + size_t len, Sym* soname_out) { + (void)name; + if (soname_out) *soname_out = 0; + + /* ---- DOS header + PE signature ---- */ + if (len < COFF_DOS_HEADER_SIZE) + compiler_panic(c, no_loc(), "read_coff_dso: input shorter than DOS header"); + u16 e_magic = coff_rd_u16(data + 0); + if (e_magic != IMAGE_DOS_SIGNATURE) + compiler_panic(c, no_loc(), "read_coff_dso: bad DOS magic 0x%x", e_magic); + u32 e_lfanew = coff_rd_u32(data + 60); + + u64 nt_end = (u64)e_lfanew + 4u + COFF_FILE_HEADER_SIZE + COFF_OPT_HDR64_SIZE; + if (nt_end > len) + compiler_panic(c, no_loc(), + "read_coff_dso: PE headers extend past end of file"); + + u32 pe_sig = coff_rd_u32(data + e_lfanew); + if (pe_sig != IMAGE_NT_SIGNATURE) + compiler_panic(c, no_loc(), "read_coff_dso: bad PE signature 0x%x", pe_sig); + + /* ---- IMAGE_FILE_HEADER ---- */ + const u8* fh = data + e_lfanew + 4u; + u16 machine = coff_rd_u16(fh + 0); + u16 nsec = coff_rd_u16(fh + 2); + u16 size_of_opt = coff_rd_u16(fh + 16); + u16 chars = coff_rd_u16(fh + 18); + + if (machine != IMAGE_FILE_MACHINE_AMD64 && + machine != IMAGE_FILE_MACHINE_ARM64) + compiler_panic(c, no_loc(), "read_coff_dso: unsupported machine 0x%x", + machine); + if (!(chars & IMAGE_FILE_DLL)) + compiler_panic(c, no_loc(), + "read_coff_dso: not a DLL (Characteristics=0x%x)", chars); + if (size_of_opt < COFF_OPT_HDR64_SIZE) + compiler_panic(c, no_loc(), + "read_coff_dso: SizeOfOptionalHeader %u too small for PE32+", + size_of_opt); + + /* ---- IMAGE_OPTIONAL_HEADER64 ---- */ + const u8* oh = fh + COFF_FILE_HEADER_SIZE; + u16 opt_magic = coff_rd_u16(oh + 0); + if (opt_magic != IMAGE_NT_OPTIONAL_HDR64_MAGIC) + compiler_panic(c, no_loc(), + "read_coff_dso: not PE32+ (optional header Magic=0x%x)", + opt_magic); + + /* DataDirectory begins at offset 112 inside the PE32+ optional header + * (28 standard + 84 windows-specific + NumberOfRvaAndSizes = 112). */ + const u8* data_dir = oh + COFF_OPT_HDR64_SIZE - + COFF_NUM_DATA_DIRECTORIES * COFF_DATA_DIRECTORY_SIZE; + u32 export_rva = coff_rd_u32(data_dir + IMAGE_DIRECTORY_ENTRY_EXPORT * + COFF_DATA_DIRECTORY_SIZE); + u32 export_size = coff_rd_u32( + data_dir + IMAGE_DIRECTORY_ENTRY_EXPORT * COFF_DATA_DIRECTORY_SIZE + 4u); + + /* ---- section table ---- */ + u64 shdrs_off = (u64)e_lfanew + 4u + COFF_FILE_HEADER_SIZE + size_of_opt; + u64 shdrs_end = shdrs_off + (u64)nsec * COFF_SECTION_HEADER_SIZE; + if (shdrs_end > len) + compiler_panic(c, no_loc(), + "read_coff_dso: section table extends past end of file"); + const u8* shdrs = data + shdrs_off; + + ObjBuilder* ob = obj_new(c); + if (!ob) compiler_panic(c, no_loc(), "read_coff_dso: obj_new failed"); + + /* No export directory => empty DSO (legal for stub DLLs). */ + if (export_size == 0 || export_rva == 0) { + obj_finalize(ob); + return ob; + } + + u64 exp_off; + if (!rva_to_offset(shdrs, nsec, export_rva, len, &exp_off)) + compiler_panic(c, no_loc(), + "read_coff_dso: export directory RVA 0x%x out of range", + export_rva); + if (exp_off + COFF_EXPORT_DIR_SIZE > len) + compiler_panic(c, no_loc(), "read_coff_dso: export directory truncated"); + + const u8* ed = data + exp_off; + u32 name_rva = coff_rd_u32(ed + 12); + u32 num_funcs = coff_rd_u32(ed + 20); + u32 num_names = coff_rd_u32(ed + 24); + u32 eat_rva = coff_rd_u32(ed + 28); + u32 ent_rva = coff_rd_u32(ed + 32); + u32 ord_rva = coff_rd_u32(ed + 36); + /* Base (ed + 16) is the user-visible ordinal offset; the cfree linker + * matches imports by name, so we don't propagate it. */ + + /* ---- DLL name (soname) ---- */ + if (name_rva) { + u64 name_off; + if (!rva_to_offset(shdrs, nsec, name_rva, len, &name_off)) + compiler_panic(c, no_loc(), + "read_coff_dso: DLL name RVA 0x%x out of range", name_rva); + const char* dll_name; + u32 nlen = read_cstr(data, len, name_off, &dll_name); + if (nlen && soname_out) + *soname_out = + pool_intern_slice(c->global, (Slice){.s = dll_name, .len = nlen}); + } + + /* ---- resolve EAT / ENT / ordinal table once ---- */ + u64 eat_off = 0, ent_off = 0, ord_off = 0; + if (num_names) { + if (!rva_to_offset(shdrs, nsec, eat_rva, len, &eat_off)) + compiler_panic(c, no_loc(), "read_coff_dso: EAT RVA 0x%x out of range", + eat_rva); + if (!rva_to_offset(shdrs, nsec, ent_rva, len, &ent_off)) + compiler_panic(c, no_loc(), "read_coff_dso: ENT RVA 0x%x out of range", + ent_rva); + if (!rva_to_offset(shdrs, nsec, ord_rva, len, &ord_off)) + compiler_panic(c, no_loc(), + "read_coff_dso: ordinal table RVA 0x%x out of range", + ord_rva); + if (ent_off + (u64)num_names * 4u > len || + ord_off + (u64)num_names * 2u > len) + compiler_panic(c, no_loc(), + "read_coff_dso: ENT/ordinal table extends past file"); + if (eat_off + (u64)num_funcs * 4u > len) + compiler_panic(c, no_loc(), "read_coff_dso: EAT extends past file"); + } + + /* ---- walk the ENT ---- + * Forwarders (EAT RVA inside [export_rva, export_rva + export_size)) + * still produce a symbol: cfree's linker doesn't follow the chain, + * but the import needs to be satisfiable so the OS loader can. */ + for (u32 i = 0; i < num_names; ++i) { + u32 nrva = coff_rd_u32(data + ent_off + (u64)i * 4u); + u16 ord = coff_rd_u16(data + ord_off + (u64)i * 2u); + if (ord >= num_funcs) continue; /* malformed; skip rather than panic */ + /* func_rva is fetched for forwarder classification only; cfree does + * not consume the address itself (DSO symbols are OBJ_SEC_NONE). */ + u32 func_rva = coff_rd_u32(data + eat_off + (u64)ord * 4u); + (void)func_rva; /* see comment above re: forwarders */ + + u64 name_off; + if (!rva_to_offset(shdrs, nsec, nrva, len, &name_off)) continue; + const char* nm; + u32 nlen = read_cstr(data, len, name_off, &nm); + if (!nlen) continue; + + Sym sn = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}); + ObjSymId id = obj_symbol(ob, sn, SB_GLOBAL, SK_FUNC, OBJ_SEC_NONE, 0, 0); + obj_sym_mark_referenced(ob, id); + } + + obj_finalize(ob); + return ob; +} diff --git a/src/obj/coff/reloc_aarch64.c b/src/obj/coff/reloc_aarch64.c @@ -0,0 +1,96 @@ +/* RelocKind <-> AArch64 PE/COFF reloc-type mapping. Mirror of + * elf_reloc_aarch64.c for PE/COFF. + * + * The ARM64 PE/COFF reloc set covers the common AArch64 patch sites: + * ADRP page-base / page-offset pairs, BRANCH26/19/14, ADDR32/64, plus + * the section-relative SECREL family which cfree does not model in v1. + * PAGEOFFSET_12L collapses all LDST*_ABS_LO12_NC widths into one wire + * code; the width is recoverable from the patched LDR/STR instruction + * encoding, so the reader picks the LDST64 form and the consumer can + * disambiguate later if it cares. ADDR32NB is image-relative; v1 + * collapses it to R_ABS32 and lets layout subtract the image base. */ + +#include "obj/coff/coff.h" + +u32 coff_aarch64_reloc_to(u32 kind /* RelocKind */) { + switch (kind) { + case R_NONE: + return IMAGE_REL_ARM64_ABSOLUTE; + case R_ABS64: + return IMAGE_REL_ARM64_ADDR64; + case R_ABS32: + return IMAGE_REL_ARM64_ADDR32; + case R_AARCH64_CALL26: + case R_AARCH64_JUMP26: + return IMAGE_REL_ARM64_BRANCH26; + case R_AARCH64_CONDBR19: + return IMAGE_REL_ARM64_BRANCH19; + case R_AARCH64_TSTBR14: + return IMAGE_REL_ARM64_BRANCH14; + case R_AARCH64_ADR_PREL_PG_HI21: + case R_AARCH64_ADR_PREL_PG_HI21_NC: + return IMAGE_REL_ARM64_PAGEBASE_REL21; + case R_AARCH64_ADR_PREL_LO21: + return IMAGE_REL_ARM64_REL21; + case R_AARCH64_ADD_ABS_LO12_NC: + return IMAGE_REL_ARM64_PAGEOFFSET_12A; + case R_AARCH64_LDST8_ABS_LO12_NC: + case R_AARCH64_LDST16_ABS_LO12_NC: + case R_AARCH64_LDST32_ABS_LO12_NC: + case R_AARCH64_LDST64_ABS_LO12_NC: + case R_AARCH64_LDST128_ABS_LO12_NC: + return IMAGE_REL_ARM64_PAGEOFFSET_12L; + case R_PC32: + case R_REL32: + return IMAGE_REL_ARM64_REL32; + case R_COFF_SECREL: + return IMAGE_REL_ARM64_SECREL; + case R_COFF_SECTION: + return IMAGE_REL_ARM64_SECTION; + case R_COFF_AARCH64_SECREL_LOW12A: + return IMAGE_REL_ARM64_SECREL_LOW12A; + case R_COFF_AARCH64_SECREL_HIGH12A: + return IMAGE_REL_ARM64_SECREL_HIGH12A; + default: + return IMAGE_REL_ARM64_ABSOLUTE; + } +} + +u32 coff_aarch64_reloc_from(u32 wire_type) { + switch (wire_type) { + case IMAGE_REL_ARM64_ABSOLUTE: + return R_NONE; + case IMAGE_REL_ARM64_ADDR64: + return R_ABS64; + case IMAGE_REL_ARM64_ADDR32: + return R_ABS32; + case IMAGE_REL_ARM64_ADDR32NB: + return R_ABS32; + case IMAGE_REL_ARM64_BRANCH26: + return R_AARCH64_CALL26; + case IMAGE_REL_ARM64_BRANCH19: + return R_AARCH64_CONDBR19; + case IMAGE_REL_ARM64_BRANCH14: + return R_AARCH64_TSTBR14; + case IMAGE_REL_ARM64_PAGEBASE_REL21: + return R_AARCH64_ADR_PREL_PG_HI21; + case IMAGE_REL_ARM64_REL21: + return R_AARCH64_ADR_PREL_LO21; + case IMAGE_REL_ARM64_PAGEOFFSET_12A: + return R_AARCH64_ADD_ABS_LO12_NC; + case IMAGE_REL_ARM64_PAGEOFFSET_12L: + return R_AARCH64_LDST64_ABS_LO12_NC; + case IMAGE_REL_ARM64_REL32: + return R_PC32; + case IMAGE_REL_ARM64_SECREL: + return R_COFF_SECREL; + case IMAGE_REL_ARM64_SECTION: + return R_COFF_SECTION; + case IMAGE_REL_ARM64_SECREL_LOW12A: + return R_COFF_AARCH64_SECREL_LOW12A; + case IMAGE_REL_ARM64_SECREL_HIGH12A: + return R_COFF_AARCH64_SECREL_HIGH12A; + default: + return (u32)-1; /* sentinel */ + } +} diff --git a/src/obj/coff/reloc_x86_64.c b/src/obj/coff/reloc_x86_64.c @@ -0,0 +1,76 @@ +/* RelocKind <-> x86_64 PE/COFF reloc-type mapping. Mirror of + * elf_reloc_x86_64.c for PE/COFF. + * + * PE/COFF's AMD64 reloc set is much narrower than ELF's: only ABSOLUTE, + * ADDR64, ADDR32, ADDR32NB, REL32 (with REL32_1..5 implicit-addend + * variants), plus a few section-relative forms cfree does not model in + * v1. We emit plain REL32 (4) for every PC-relative kind and let the + * explicit Reloc.addend ride in the patched bytes; on the read side + * REL32_1..5 collapse to R_PC32 (the reader applies the implicit + * addend itself). IMAGE_REL_AMD64_ABSOLUTE (== 0) doubles as the + * "unsupported" sentinel on the _to side, matching the ELF contract. */ + +#include "obj/coff/coff.h" + +u32 coff_x86_64_reloc_to(u32 kind /* RelocKind */) { + switch (kind) { + case R_NONE: + return IMAGE_REL_AMD64_ABSOLUTE; + case R_ABS64: + return IMAGE_REL_AMD64_ADDR64; + case R_ABS32: + return IMAGE_REL_AMD64_ADDR32; + case R_X64_32S: + return IMAGE_REL_AMD64_ADDR32NB; + case R_PC32: + case R_REL32: + case R_PLT32: + case R_X64_PLT32: + case R_X64_GOTPCREL: + case R_X64_GOTPCRELX: + case R_X64_REX_GOTPCRELX: + return IMAGE_REL_AMD64_REL32; + case R_COFF_SECREL: + return IMAGE_REL_AMD64_SECREL; + case R_COFF_SECTION: + return IMAGE_REL_AMD64_SECTION; + default: + return IMAGE_REL_AMD64_ABSOLUTE; + } +} + +u32 coff_x86_64_reloc_from(u32 wire_type) { + switch (wire_type) { + case IMAGE_REL_AMD64_ABSOLUTE: + return R_NONE; + case IMAGE_REL_AMD64_ADDR64: + return R_ABS64; + case IMAGE_REL_AMD64_ADDR32: + return R_ABS32; + case IMAGE_REL_AMD64_ADDR32NB: + return R_X64_32S; + case IMAGE_REL_AMD64_REL32: + case IMAGE_REL_AMD64_REL32_1: + case IMAGE_REL_AMD64_REL32_2: + case IMAGE_REL_AMD64_REL32_3: + case IMAGE_REL_AMD64_REL32_4: + case IMAGE_REL_AMD64_REL32_5: + return R_PC32; + case IMAGE_REL_AMD64_SECREL: + return R_COFF_SECREL; + case IMAGE_REL_AMD64_SECTION: + return R_COFF_SECTION; + /* SECREL7 (7-bit section-relative) appears in mingw-emitted archive + * members (intrinsic helpers, exception tables, DWARF). cfree + * doesn't currently apply or emit these, but panicking at read + * time would block ingesting any mingw archive whose non-import + * members carry .debug_info / .pdata. Map to R_NONE so the + * relocation slot is preserved structurally but treated as a + * no-op by the relocator; the member can still be dead-stripped + * when nothing references it. */ + case IMAGE_REL_AMD64_SECREL7: + return R_NONE; + default: + return (u32)-1; /* sentinel */ + } +} diff --git a/src/obj/coff_emit.c b/src/obj/coff_emit.c @@ -1,732 +0,0 @@ -/* PE/COFF relocatable .obj writer. Walks a finalized ObjBuilder and - * emits a 64-bit little-endian relocatable object via the supplied - * Writer. Counterpart to emit_elf / emit_macho. - * - * Layout strategy: - * 1. plan COFF sections (one per kept obj section), assigning - * Characteristics, alignment, raw size, and per-section reloc - * counts; - * 2. build the symbol table (synthesized per-section static symbols - * with section-definition aux records, plus file symbols and - * every ObjSym kept after sweep); - * 3. build per-section relocation records via the per-arch - * translator (arch_for_compiler(c)->coff->reloc_to); - * 4. assign file offsets: - * file header | section headers | (bytes + relocs)* | symtab | strtab - * 5. write the file in that order. - * - * 64-bit little-endian only — IMAGE_FILE_MACHINE_AMD64 (x86_64) and - * IMAGE_FILE_MACHINE_ARM64 (aarch64). Big-endian / ptr_size != 8 panic - * at entry. - * - * Section name mapping policy: we pass the cfree Section.name through - * verbatim to the COFF Name field. Callers / readers are expected to - * have stored COFF-shaped names (".text", ".rdata", ".tls$", etc.) at - * the obj layer; emit_coff does not rewrite ELF-style spellings like - * ".rodata" -> ".rdata". Names longer than 8 bytes spill into the - * string table with the "/<decimal-offset>" encoding. - * - * Addend handling: COFF stores the addend inline in the patched bytes - * (there is no addend field in IMAGE_RELOCATION). The ObjBuilder - * caller is responsible for having written the addend into the section - * bytes already — matching how MSVC / mingw emit. A nonzero - * Reloc::addend with has_explicit_addend set is rejected here as a - * known v1 limitation. */ - -#include <string.h> - -#include "arch/arch.h" -#include "core/arena.h" -#include "core/buf.h" -#include "core/heap.h" -#include "core/pool.h" -#include "core/slice.h" -#include "core/util.h" -#include "obj/coff.h" - -static SrcLoc no_loc(void) { - SrcLoc l = {0, 0, 0}; - return l; -} - -static int coff_rel32_absorbs_minus4(CfreeArchKind arch, RelocKind kind, - i64 addend) { - if (arch != CFREE_ARCH_X86_64 || addend != -4) return 0; - switch (kind) { - case R_PC32: - case R_REL32: - case R_PLT32: - case R_X64_PLT32: - case R_X64_GOTPCREL: - case R_X64_GOTPCRELX: - case R_X64_REX_GOTPCRELX: - return 1; - default: - return 0; - } -} - -/* ---- per-COFF-section plan record ---- */ - -typedef struct CSec { - /* IMAGE_SECTION_HEADER fields (little-endian-encoded at write time). */ - char name8[8]; /* Name field bytes; "/N" form if long name */ - u32 virtual_size; /* nonzero for NOBITS (bss size) */ - u32 size_of_raw_data; /* zero for NOBITS */ - u32 pointer_to_raw_data; - u32 pointer_to_relocations; - u16 number_of_relocations; - u32 characteristics; /* IMAGE_SCN_* | ALIGN nibble */ - - /* Planning state. */ - u32 align; /* in bytes, power of two */ - u32 obj_sec; /* originating ObjSecId */ - int is_nobits; - const Buf* obj_bytes; /* NULL when nobits */ - u8* reloc_bytes; /* arena-allocated, nreloc * 10 bytes */ - ObjGroupId group_id; /* OBJ_GROUP_NONE if not in a group */ -} CSec; - -/* ---- emit ---- */ - -static u32 log2_align(u32 a) { - u32 r = 0; - while ((1u << r) < a) ++r; - return r; -} - -/* Map cfree section flags/sem to IMAGE_SCN_* Characteristics, leaving - * the alignment nibble for the caller to OR in. */ -static u32 sec_characteristics(const Section* s, int in_group) { - u32 r = 0; - int is_bss = (s->kind == SEC_BSS) || (s->sem == SSEM_NOBITS); - if (s->flags & SF_EXEC) { - r |= IMAGE_SCN_CNT_CODE | IMAGE_SCN_MEM_EXECUTE; - } else if (is_bss) { - r |= IMAGE_SCN_CNT_UNINITIALIZED_DATA; - } else if (s->flags & SF_WRITE) { - r |= IMAGE_SCN_CNT_INITIALIZED_DATA; - } else if (s->flags & SF_ALLOC) { - /* Read-only allocated data (.rdata). */ - r |= IMAGE_SCN_CNT_INITIALIZED_DATA; - } - if (s->flags & SF_ALLOC) r |= IMAGE_SCN_MEM_READ; - if (s->flags & SF_WRITE) r |= IMAGE_SCN_MEM_WRITE; - if (in_group) r |= IMAGE_SCN_LNK_COMDAT; - /* When a reader stashed format-specific flag bits on a COFF-origin - * section, OR them back in here. ext_type carries the raw - * Characteristics value (or zero if no override); ext_flags is a - * sibling bag for any bits the canonical mapping above would lose. */ - if (s->ext_kind == OBJ_EXT_COFF) { - if (s->ext_type) { - /* Preserve the raw characteristics verbatim — overrides the - * canonical mapping. Keeps round-trip byte-stable for sections - * carrying CNT_INFO / LNK_REMOVE / MEM_DISCARDABLE / etc. */ - r = s->ext_type & ~IMAGE_SCN_ALIGN_MASK; - } - r |= s->ext_flags; - } - return r; -} - -/* Append `len` bytes of `s` followed by a single NUL to `b`, returning - * the offset at which `s` was placed. Dedupe linearly — strtabs are - * small enough that this is fine without a hash table, and the - * dedupe matches what binutils / llvm-objcopy emit. Mirror of the - * helper in elf_emit. */ -static u32 strtab_add(Buf* b, const char* s, u32 len) { - if (len == 0) return 0; - u32 total = buf_pos(b); - if (total > len) { - u8 stack[256]; - u8* tmp = - total <= sizeof stack ? stack : (u8*)b->heap->alloc(b->heap, total, 1); - if (tmp) { - buf_flatten(b, tmp); - /* Skip the first 4 bytes (the size-prefix placeholder) when - * searching for matches. */ - u32 start = COFF_STRTAB_SIZE_FIELD_BYTES; - if (total > start + len) { - for (u32 i = start; i + len < total; ++i) { - if (tmp[i + len] == 0 && memcmp(tmp + i, s, len) == 0) { - if (tmp != stack) b->heap->free(b->heap, tmp, total); - return i; - } - } - } - if (tmp != stack) b->heap->free(b->heap, tmp, total); - } - } - u32 off = total; - buf_write(b, s, len); - { - u8 z = 0; - buf_write(b, &z, 1); - } - return off; -} - -/* Encode an 8-byte Name field. If the name fits in 8 bytes, copy - * verbatim and zero-pad. Otherwise allocate the name in `strtab` and - * write "/<decimal-offset>" (NUL-padded to 8 bytes). */ -static void encode_name8(char out[8], const char* name, u32 nlen, Buf* strtab) { - memset(out, 0, 8); - if (nlen <= 8) { - if (nlen) memcpy(out, name, nlen); - return; - } - u32 off = strtab_add(strtab, name, nlen); - /* "/<decimal-offset>" — up to 7 decimal digits leaves room for the - * leading slash within 8 bytes. COFF .obj strtabs are < 1 MiB in - * practice, so 7 digits is plenty. */ - char tmp[16]; - int n = 0; - tmp[n++] = '/'; - /* Decimal-format off into tmp+1. */ - char dig[12]; - int d = 0; - u32 v = off; - if (v == 0) { - dig[d++] = '0'; - } else { - while (v) { - dig[d++] = (char)('0' + (v % 10u)); - v /= 10u; - } - } - while (d > 0 && n < (int)sizeof tmp) tmp[n++] = dig[--d]; - if (n > 8) n = 8; - memcpy(out, tmp, (size_t)n); -} - -/* Write one 18-byte IMAGE_SYMBOL record into `dst`. */ -static void wr_sym(u8* dst, const char ShortName[8], u32 Zeroes, u32 Offset, - u32 Value, i16 SectionNumber, u16 Type, u8 StorageClass, - u8 NumberOfAuxSymbols) { - if (Zeroes == 0 && Offset != 0) { - /* LongName form: 4 zero bytes then 4-byte LE strtab offset. */ - memset(dst, 0, 4); - wr_u32_le(dst + 4, Offset); - } else { - memcpy(dst, ShortName, 8); - } - wr_u32_le(dst + 8, Value); - wr_u16_le(dst + 12, (u16)SectionNumber); - wr_u16_le(dst + 14, Type); - dst[16] = StorageClass; - dst[17] = NumberOfAuxSymbols; -} - -/* Write a section-definition aux record (18 bytes). */ -static void wr_aux_secdef(u8* dst, u32 Length, u16 NumberOfRelocations, - u16 NumberOfLinenumbers, u32 CheckSum, u16 Number, - u8 Selection) { - wr_u32_le(dst + 0, Length); - wr_u16_le(dst + 4, NumberOfRelocations); - wr_u16_le(dst + 6, NumberOfLinenumbers); - wr_u32_le(dst + 8, CheckSum); - wr_u16_le(dst + 12, Number); - dst[14] = Selection; - dst[15] = 0; - dst[16] = 0; - dst[17] = 0; -} - -/* Write a weak-externals aux record (18 bytes). */ -static void wr_aux_weak(u8* dst, u32 TagIndex, u32 Characteristics) { - wr_u32_le(dst + 0, TagIndex); - wr_u32_le(dst + 4, Characteristics); - memset(dst + 8, 0, 10); -} - -/* Look up the pool-interned string for a Sym. */ -static const char* sym_to_str(Compiler* c, Sym n, u32* len_out) { - Slice sl = pool_slice(c->global, n); - const char* s = sl.s; - if (!s) { - *len_out = 0; - return ""; - } - *len_out = (u32)sl.len; - return s; -} - -void emit_coff(Compiler* c, ObjBuilder* ob, Writer* w) { - Heap* h = (Heap*)c->ctx->heap; - - /* Tombstone sweep — see obj_sweep_dead. */ - obj_sweep_dead(ob); - - /* ---- target validation ----------------------------------------- */ - const ArchImpl* arch = arch_for_compiler(c); - const ArchCoffOps* coff = arch ? arch->coff : NULL; - if (!coff || !coff->reloc_to) { - compiler_panic(c, no_loc(), "emit_coff: unsupported target arch %u", - (u32)c->target.arch); - } - u16 machine = coff->machine; - u32 (*reloc_to)(u32) = coff->reloc_to; - if (c->target.big_endian) { - compiler_panic(c, no_loc(), "emit_coff: big-endian COFF not supported"); - } - if (c->target.ptr_size != 8) { - compiler_panic(c, no_loc(), "emit_coff: ptr_size %u (expected 8)", - (u32)c->target.ptr_size); - } - - /* ---- pass 1: plan sections ------------------------------------- */ - u32 nobjsec = obj_section_count(ob); - CSec* secs = arena_zarray(c->scratch, CSec, nobjsec ? nobjsec : 1); - u32* obj_to_coff = arena_zarray(c->scratch, u32, nobjsec ? nobjsec : 1); - u32 nsecs = 0; - - /* String table — leading 4-byte size placeholder. Real strings start - * at offset 4. */ - Buf strtab; - buf_init(&strtab, h); - { - u8 zero4[COFF_STRTAB_SIZE_FIELD_BYTES] = {0, 0, 0, 0}; - buf_write(&strtab, zero4, COFF_STRTAB_SIZE_FIELD_BYTES); - } - - for (u32 i = 1; i < nobjsec; ++i) { - const Section* s = obj_section_get(ob, i); - if (s->removed) continue; - /* Skip ELF-style synthetic sections (a reader from another format - * may have surfaced them) — COFF stores symtab/strtab/relocs - * out-of-band, not as named sections. */ - if (s->sem == SSEM_SYMTAB || s->sem == SSEM_STRTAB || - s->sem == SSEM_RELA || s->sem == SSEM_REL || s->sem == SSEM_GROUP) { - continue; - } - - CSec* cs = &secs[nsecs]; - u32 nlen; - const char* nm = sym_to_str(c, s->name, &nlen); - encode_name8(cs->name8, nm, nlen, &strtab); - - cs->obj_sec = i; - cs->group_id = s->group_id; - cs->align = s->align ? s->align : 1; - - int in_group = (s->group_id != OBJ_GROUP_NONE); - u32 ch = sec_characteristics(s, in_group); - /* Alignment lives in bits 20..23. Cap at log2(8192)=13 -> nibble - * value 14 (IMAGE_SCN_ALIGN_8192BYTES). */ - u32 lg = log2_align(cs->align); - if (lg > 13) lg = 13; - ch &= ~IMAGE_SCN_ALIGN_MASK; - ch |= IMAGE_SCN_ALIGN_FROM_LOG2(lg); - cs->characteristics = ch; - - if (s->sem == SSEM_NOBITS || s->kind == SEC_BSS) { - cs->is_nobits = 1; - cs->virtual_size = s->bss_size; - cs->size_of_raw_data = 0; - cs->obj_bytes = NULL; - } else { - cs->is_nobits = 0; - cs->virtual_size = 0; - cs->size_of_raw_data = s->bytes.total; - cs->obj_bytes = &s->bytes; - } - - obj_to_coff[i] = nsecs + 1; /* 1-based; matches SectionNumber. */ - nsecs++; - } - - /* ---- pass 2: count and assign per-section reloc counts --------- */ - /* COFF stores NumberOfRelocations as u16; sections with > 65535 - * relocs use the IMAGE_SCN_LNK_NRELOC_OVFL extension which we don't - * implement in v1. Panic if any single section exceeds the limit. */ - u32 total_relocs = obj_reloc_total(ob); - for (u32 ci = 0; ci < nsecs; ++ci) { - CSec* cs = &secs[ci]; - u32 nr = obj_reloc_count(ob, cs->obj_sec); - if (nr > 0xFFFFu) { - compiler_panic(c, no_loc(), - "emit_coff: section %u has %u relocs (max 65535)", - (u32)cs->obj_sec, nr); - } - cs->number_of_relocations = (u16)nr; - } - - /* ---- pass 3: build the symbol table ---------------------------- */ - /* Count ObjSyms (incl. tombstoned — we'll skip those when emitting). */ - u32 nobjsym = 0; - { - ObjSymIter* it = obj_symiter_new(ob); - ObjSymEntry e; - while (obj_symiter_next(it, &e)) ++nobjsym; - obj_symiter_free(it); - } - - /* Upper bound on symbol-table records (including aux slots): - * - 2 records per section symbol (primary + 1 aux secdef) - * - 2 records per ObjSym (primary + up to 1 weak aux) - * - +2 spare for safety - * Worst case is generous; we trim by tracking nrecords as we emit. */ - u32 max_records = 2u * nsecs + 2u * nobjsym + 4u; - u8* symtab = (u8*)arena_zarray(c->scratch, u8, - (size_t)COFF_SYMBOL_SIZE * max_records); - u32 nrecords = 0; - - /* obj_id -> COFF symbol index (including aux slots). Index 0 is - * reserved as "none" in our internal map (a real COFF symbol may - * legitimately live at index 0, but no ObjSym ever maps there since - * we never put OBJ_SYM_NONE through). */ - u32* sym_to_coff = arena_zarray(c->scratch, u32, nobjsym + 2); - - /* Section symbols first — one STATIC per kept obj section, each - * followed by a SECTION DEFINITION aux. Reloc-against-section in - * other tools' output uses these; emitting them unconditionally - * matches what clang / mingw emit and gives readers a stable target. */ - u32* secsym_index = arena_zarray(c->scratch, u32, nsecs + 1); - for (u32 ci = 0; ci < nsecs; ++ci) { - CSec* cs = &secs[ci]; - char short_name[8]; - /* The section symbol's name is the section's own name (truncated - * to 8 bytes — section symbols never use the strtab spill form in - * MSVC/clang output). */ - memcpy(short_name, cs->name8, 8); - - u8* slot = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE; - wr_sym(slot, short_name, /*Zeroes*/ 1, /*Offset*/ 0, - /*Value*/ 0, - /*SectionNumber*/ (i16)(ci + 1), - /*Type*/ IMAGE_SYM_TYPE_NULL, - /*StorageClass*/ IMAGE_SYM_CLASS_STATIC, - /*NumberOfAuxSymbols*/ 1); - secsym_index[ci] = nrecords; - nrecords++; - - /* Section-definition aux. For COMDAT members we encode the - * Selection from the group; default to SELECT_ANY which is what - * gcc/clang emit unless the user requests a specific selection - * mode. The associated-section Number is left at 0 (cfree does - * not produce associative-COMDAT chains today). */ - u8 selection = 0; - if (cs->group_id != OBJ_GROUP_NONE) { - const ObjGroup* g = obj_group_get(ob, cs->group_id); - if (g && !g->removed) { - selection = g->flags ? (u8)IMAGE_COMDAT_SELECT_ANY - : (u8)IMAGE_COMDAT_SELECT_ANY; - } - } - u8* aux = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE; - wr_aux_secdef(aux, /*Length*/ cs->size_of_raw_data, - /*NumberOfRelocations*/ cs->number_of_relocations, - /*NumberOfLinenumbers*/ 0, - /*CheckSum*/ 0, - /*Number*/ 0, - /*Selection*/ selection); - nrecords++; - } - - /* File / regular symbols. */ - { - ObjSymIter* it = obj_symiter_new(ob); - ObjSymEntry e; - while (obj_symiter_next(it, &e)) { - const ObjSym* s = e.sym; - if (s->removed) continue; - if (s->kind == SK_IFUNC) { - compiler_panic(c, no_loc(), - "emit_coff: SK_IFUNC has no PE/COFF representation"); - } - /* Don't re-emit SK_SECTION symbols — section symbols are - * synthesized above. Map any input-side SK_SECTION onto the - * already-emitted one. */ - if (s->kind == SK_SECTION) { - if (s->section_id && s->section_id < nobjsec) { - u32 ci = obj_to_coff[s->section_id]; - if (ci) sym_to_coff[e.id] = secsym_index[ci - 1]; - } - continue; - } - - u32 nlen; - const char* nm = sym_to_str(c, s->name, &nlen); - - if (s->kind == SK_FILE) { - /* File symbol: name ".file" (short), section IMAGE_SYM_DEBUG, - * storage class FILE, followed by aux records carrying the - * NUL-padded file path (18 bytes per aux). */ - u32 file_len = nlen; - u32 naux = file_len ? (file_len + COFF_AUX_FILE_SIZE - 1u) / - COFF_AUX_FILE_SIZE - : 1u; - char short_name[8] = {'.', 'f', 'i', 'l', 'e', 0, 0, 0}; - u8* slot = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE; - wr_sym(slot, short_name, 1, 0, /*Value*/ 0, - /*SectionNumber*/ (i16)IMAGE_SYM_DEBUG, - /*Type*/ IMAGE_SYM_TYPE_NULL, - /*StorageClass*/ IMAGE_SYM_CLASS_FILE, - /*NumberOfAuxSymbols*/ (u8)naux); - sym_to_coff[e.id] = nrecords; - nrecords++; - for (u32 a = 0; a < naux; ++a) { - u8* aux = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE; - memset(aux, 0, COFF_AUX_FILE_SIZE); - u32 off = a * COFF_AUX_FILE_SIZE; - u32 copy = file_len > off ? file_len - off : 0; - if (copy > COFF_AUX_FILE_SIZE) copy = COFF_AUX_FILE_SIZE; - if (copy) memcpy(aux, nm + off, copy); - nrecords++; - } - continue; - } - - /* Regular symbol. */ - char short_name[8]; - u32 zeroes = 1, offset = 0; - memset(short_name, 0, 8); - if (nlen <= 8) { - if (nlen) memcpy(short_name, nm, nlen); - } else { - zeroes = 0; - offset = strtab_add(&strtab, nm, nlen); - } - - i16 section_number = 0; - u32 value = 0; - u8 storage_class = IMAGE_SYM_CLASS_NULL; - u16 type = IMAGE_SYM_TYPE_NULL; - u8 naux = 0; - int emit_weak_aux = 0; - - switch (s->kind) { - case SK_ABS: - section_number = (i16)IMAGE_SYM_ABSOLUTE; - value = (u32)s->value; - break; - case SK_COMMON: - /* COFF lacks a per-common alignment field; encode size in - * Value with SectionNumber=UNDEFINED and rely on the linker - * to pick a default alignment. (cfree's frontend uses - * COMMON only via __attribute__((common)) which is rare on - * PE/COFF targets.) */ - section_number = (i16)IMAGE_SYM_UNDEFINED; - value = (u32)s->size; - break; - default: - if (s->section_id == OBJ_SEC_NONE) { - section_number = (i16)IMAGE_SYM_UNDEFINED; - value = 0; - } else if (s->section_id < nobjsec && obj_to_coff[s->section_id]) { - section_number = (i16)obj_to_coff[s->section_id]; - value = (u32)s->value; - } else { - section_number = (i16)IMAGE_SYM_UNDEFINED; - value = 0; - } - break; - } - - if (s->kind == SK_FUNC) type = (u16)COFF_SYM_TYPE_FUNCTION; - - switch (s->bind) { - case SB_LOCAL: - storage_class = IMAGE_SYM_CLASS_STATIC; - break; - case SB_GLOBAL: - storage_class = IMAGE_SYM_CLASS_EXTERNAL; - break; - case SB_WEAK: - /* mingw / clang spell weak as EXTERNAL with a WeakExternal - * aux that points at the fallback symbol. cfree's obj layer - * doesn't carry a separate fallback symbol today, so we emit - * a self-referential weak aux (TagIndex=0) which the linker - * treats as "weak, no fallback" — equivalent to ELF STB_WEAK. */ - storage_class = IMAGE_SYM_CLASS_WEAK_EXTERNAL; - emit_weak_aux = 1; - naux = 1; - break; - default: - storage_class = IMAGE_SYM_CLASS_STATIC; - break; - } - - u8* slot = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE; - wr_sym(slot, short_name, zeroes, offset, value, section_number, type, - storage_class, naux); - sym_to_coff[e.id] = nrecords; - nrecords++; - if (emit_weak_aux) { - u8* aux = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE; - wr_aux_weak(aux, /*TagIndex*/ 0, - /*Characteristics*/ IMAGE_WEAK_EXTERN_SEARCH_LIBRARY); - nrecords++; - } - } - obj_symiter_free(it); - } - - /* ---- pass 4: build per-section relocation tables --------------- */ - for (u32 ci = 0; ci < nsecs; ++ci) { - CSec* cs = &secs[ci]; - u32 nr = cs->number_of_relocations; - if (!nr) continue; - u8* buf = - (u8*)arena_alloc(c->scratch, (size_t)COFF_RELOC_SIZE * nr, _Alignof(u32)); - u32 j = 0; - for (u32 ri = 0; ri < total_relocs; ++ri) { - const Reloc* r = obj_reloc_at(ob, ri); - if (r->removed) continue; - if (r->section_id != cs->obj_sec) continue; - if (r->sym == OBJ_SYM_NONE) { - compiler_panic(c, no_loc(), - "emit_coff: reloc without symbol not supported " - "(sec=%u offset=%u kind=%u)", - (u32)r->section_id, (u32)r->offset, (u32)r->kind); - } - if (r->has_explicit_addend && r->addend != 0 && - !coff_rel32_absorbs_minus4(c->target.arch, (RelocKind)r->kind, - r->addend)) { - /* v1 limitation: COFF carries the addend in the patched bytes, - * and we don't currently mutate the obj's section bytes to - * encode a separate explicit addend. cfree's MCEmitter writes - * the addend inline for COFF targets, so this branch only - * fires for inputs synthesized by external tools. */ - compiler_panic(c, no_loc(), - "emit_coff: explicit nonzero addend not supported " - "(sec=%u offset=%u kind=%u addend=%lld)", - (u32)r->section_id, (u32)r->offset, (u32)r->kind, - (long long)r->addend); - } - u32 wire = reloc_to(r->kind); - /* Both arch translators use 0 (IMAGE_REL_*_ABSOLUTE) as the - * unsupported-input sentinel; treat that as a panic unless the - * input really is R_NONE. */ - if (wire == 0 && r->kind != R_NONE) { - compiler_panic( - c, no_loc(), - "emit_coff: unsupported relocation kind %u for arch %u", - (u32)r->kind, (u32)c->target.arch); - } - u32 sym_idx = sym_to_coff[r->sym]; - u8* slot = buf + (size_t)j * COFF_RELOC_SIZE; - wr_u32_le(slot + 0, r->offset); - wr_u32_le(slot + 4, sym_idx); - wr_u16_le(slot + 8, (u16)wire); - ++j; - } - cs->reloc_bytes = buf; - /* If a tombstoned reloc was skipped between count and emit, j may - * be less than nr; trust the latter count for the wire field. */ - if (j != nr) cs->number_of_relocations = (u16)j; - } - - /* ---- pass 5: assign file offsets ------------------------------- */ - /* Layout: - * [file header] [section headers] [per-section: bytes, relocs]* - * [symbol table] [string table] */ - u64 cur = (u64)COFF_FILE_HEADER_SIZE + - (u64)COFF_SECTION_HEADER_SIZE * (u64)nsecs; - - for (u32 ci = 0; ci < nsecs; ++ci) { - CSec* cs = &secs[ci]; - /* Raw data offset. NOBITS contributes nothing on disk. */ - if (cs->is_nobits || cs->size_of_raw_data == 0) { - cs->pointer_to_raw_data = 0; - } else { - cur = ALIGN_UP(cur, (u64)cs->align); - cs->pointer_to_raw_data = (u32)cur; - cur += cs->size_of_raw_data; - } - /* Reloc table. COFF doesn't mandate alignment for the reloc array, - * but llvm and binutils emit them naturally byte-packed; we 4-align - * for tidiness. */ - if (cs->number_of_relocations) { - cur = ALIGN_UP(cur, (u64)4); - cs->pointer_to_relocations = (u32)cur; - cur += (u64)cs->number_of_relocations * COFF_RELOC_SIZE; - } else { - cs->pointer_to_relocations = 0; - } - } - - cur = ALIGN_UP(cur, (u64)4); - u64 symtab_off = cur; - cur += (u64)nrecords * COFF_SYMBOL_SIZE; - - /* String table starts immediately after the symtab. Patch the 4-byte - * size prefix (inclusive). */ - u32 strtab_size = buf_pos(&strtab); - /* The size field is part of the on-disk strtab and is the total - * inclusive byte count. Patch it now. */ - { - u8 sz_le[4]; - wr_u32_le(sz_le, strtab_size); - /* Buf doesn't expose in-place patch; flatten, patch, re-emit when - * we write. Just remember the value. */ - (void)sz_le; - } - u64 strtab_off = cur; - cur += strtab_size; - - /* ---- pass 6: write the file ------------------------------------ */ - cfree_writer_seek(w, 0); - - /* IMAGE_FILE_HEADER */ - coff_wr_u16(w, machine); - coff_wr_u16(w, (u16)nsecs); - coff_wr_u32(w, 0); /* TimeDateStamp: reproducible */ - coff_wr_u32(w, (u32)symtab_off); - coff_wr_u32(w, nrecords); - coff_wr_u16(w, 0); /* SizeOfOptionalHeader: 0 for .obj */ - coff_wr_u16(w, IMAGE_FILE_LARGE_ADDRESS_AWARE); - - /* Section headers — one 40-byte block immediately after the file - * header. */ - for (u32 ci = 0; ci < nsecs; ++ci) { - const CSec* cs = &secs[ci]; - cfree_writer_write(w, cs->name8, 8); - coff_wr_u32(w, cs->virtual_size); - coff_wr_u32(w, 0); /* VirtualAddress: 0 for .obj */ - coff_wr_u32(w, cs->size_of_raw_data); - coff_wr_u32(w, cs->pointer_to_raw_data); - coff_wr_u32(w, cs->pointer_to_relocations); - coff_wr_u32(w, 0); /* PointerToLinenumbers: 0 */ - coff_wr_u16(w, cs->number_of_relocations); - coff_wr_u16(w, 0); /* NumberOfLinenumbers: 0 */ - coff_wr_u32(w, cs->characteristics); - } - - /* Section bytes + relocs (interleaved). */ - for (u32 ci = 0; ci < nsecs; ++ci) { - const CSec* cs = &secs[ci]; - if (!cs->is_nobits && cs->size_of_raw_data && cs->obj_bytes) { - cfree_writer_seek(w, cs->pointer_to_raw_data); - u32 sz = cs->obj_bytes->total; - u8* tmp = (u8*)h->alloc(h, sz ? sz : 1, 1); - if (sz) buf_flatten(cs->obj_bytes, tmp); - cfree_writer_write(w, tmp, sz); - h->free(h, tmp, sz ? sz : 1); - } - if (cs->number_of_relocations && cs->reloc_bytes) { - cfree_writer_seek(w, cs->pointer_to_relocations); - cfree_writer_write(w, cs->reloc_bytes, - (size_t)cs->number_of_relocations * COFF_RELOC_SIZE); - } - } - - /* Symbol table. */ - cfree_writer_seek(w, symtab_off); - cfree_writer_write(w, symtab, (size_t)nrecords * COFF_SYMBOL_SIZE); - - /* String table: 4-byte total size (inclusive) followed by the body. - * `strtab` was initialized with 4 placeholder zero bytes; rewrite - * them with the real size before flushing. */ - { - u8* flat = (u8*)arena_alloc(c->scratch, strtab_size ? strtab_size : 1, 1); - if (strtab_size) buf_flatten(&strtab, flat); - /* Patch the 4-byte size prefix in place. */ - if (strtab_size >= COFF_STRTAB_SIZE_FIELD_BYTES) { - wr_u32_le(flat, strtab_size); - } - cfree_writer_seek(w, strtab_off); - cfree_writer_write(w, flat, strtab_size); - } - buf_fini(&strtab); -} diff --git a/src/obj/coff_read.c b/src/obj/coff_read.c @@ -1,722 +0,0 @@ -/* PE/COFF .obj (IMAGE_FILE_HEADER + sections) reader. Parses a 64-bit - * little-endian relocatable object back into a fresh ObjBuilder. Peer - * of read_elf / read_macho; the post-finalize ObjBuilder shape is the - * canonical superset doc/DESIGN.md §5.5 promises: read_coff of an - * emit_coff output produces an ObjBuilder shape-equivalent to the - * writer's input, modulo synthesized SECTION symbols and the COMDAT - * section-definition aux records. - * - * Scope: IMAGE_FILE_MACHINE_AMD64 and IMAGE_FILE_MACHINE_ARM64. PE - * executables (with a non-zero SizeOfOptionalHeader) are rejected — a - * future read_coff_pe would handle those. Microsoft "short import" - * records (Sig1=0, Sig2=0xFFFF) found inside .lib archive members are - * detected at entry and dispatched to read_coff_short_import, which - * synthesizes a DSO-shaped ObjBuilder annotated with the providing - * DLL name via obj_set_coff_import_dll. */ - -#include <string.h> - -#include "arch/arch.h" -#include "core/arena.h" -#include "core/heap.h" -#include "core/pool.h" -#include "core/slice.h" -#include "obj/coff.h" - -static SrcLoc no_loc(void) { - SrcLoc l = {0, 0, 0}; - return l; -} - -/* ---- section-header scratch ---- */ - -typedef struct CSecRec { - char raw_name[8]; - u32 virtual_size; - u32 size_of_raw_data; - u32 pointer_to_raw_data; - u32 pointer_to_relocations; - u16 number_of_relocations; - u32 characteristics; - ObjSecId obj_sec; /* OBJ_SEC_NONE if skipped */ -} CSecRec; - -static void parse_shdr(const u8* p, CSecRec* out) { - memcpy(out->raw_name, p, 8); - out->virtual_size = coff_rd_u32(p + 8); - out->size_of_raw_data = coff_rd_u32(p + 16); - out->pointer_to_raw_data = coff_rd_u32(p + 20); - out->pointer_to_relocations = coff_rd_u32(p + 24); - out->number_of_relocations = coff_rd_u16(p + 32); - out->characteristics = coff_rd_u32(p + 36); - out->obj_sec = OBJ_SEC_NONE; -} - -/* ---- string-table lookup (4-byte size prefix, NUL-terminated entries) ---- */ - -static const char* strtab_lookup(const u8* tab, u32 tab_size, u32 off, - u32* len_out) { - if (off >= tab_size) { - *len_out = 0; - return ""; - } - const char* s = (const char*)(tab + off); - u32 max = tab_size - off; - u32 n = 0; - while (n < max && s[n] != '\0') ++n; - *len_out = n; - return s; -} - -/* Resolve a section/symbol short-or-long name into (ptr, len). COFF - * section names use the "/<decimal>" convention for >8-byte names; COFF - * symbol names use the (Zeroes==0, Offset) form instead. This helper - * handles the section form (8 raw bytes; leading '/' triggers strtab - * lookup). */ -static void resolve_section_name(const char raw[8], const u8* strtab, - u32 strtab_size, const char** name_out, - u32* len_out) { - if (raw[0] == '/') { - /* Parse decimal offset. Up to 7 ASCII digits. */ - u32 off = 0; - for (u32 i = 1; i < 8 && raw[i] >= '0' && raw[i] <= '9'; ++i) { - off = off * 10u + (u32)(raw[i] - '0'); - } - *name_out = strtab_lookup(strtab, strtab_size, off, len_out); - return; - } - /* Inline: up to 8 bytes, NUL-padded (not necessarily NUL-terminated). */ - u32 n = 0; - while (n < 8 && raw[n] != '\0') ++n; - *name_out = raw; - *len_out = n; -} - -/* ---- characteristics -> SecKind / SecFlag / SecSem ---- */ - -static u16 coff_sec_kind(const char* name, u32 nlen, u32 ch) { - if (ch & IMAGE_SCN_CNT_UNINITIALIZED_DATA) return SEC_BSS; - if (ch & IMAGE_SCN_CNT_CODE) return SEC_TEXT; - if (ch & IMAGE_SCN_MEM_EXECUTE) return SEC_TEXT; - if (nlen >= 7 && memcmp(name, ".debug_", 7) == 0) return SEC_DEBUG; - /* The MS toolchain spells DWARF section names with a leading ".debug$" - * (CodeView) — keep ELF-style ".debug_" detection but also treat the - * MS form as debug. */ - if (nlen >= 7 && memcmp(name, ".debug$", 7) == 0) return SEC_DEBUG; - if (ch & IMAGE_SCN_CNT_INITIALIZED_DATA) { - if (ch & IMAGE_SCN_MEM_WRITE) return SEC_DATA; - return SEC_RODATA; - } - return SEC_OTHER; -} - -static u16 coff_sec_flags(const char* name, u32 nlen, u32 ch) { - u16 f = 0; - if (ch & IMAGE_SCN_MEM_READ) f |= SF_ALLOC; - if (ch & IMAGE_SCN_MEM_EXECUTE) f |= SF_EXEC; - if (ch & IMAGE_SCN_MEM_WRITE) f |= SF_WRITE; - if (ch & IMAGE_SCN_LNK_COMDAT) f |= SF_GROUP; - /* TLS sections in PE are spelled ".tls$<suffix>" (e.g. ".tls$", ".tls$ZZZ"). - * There is no characteristics bit for TLS — detection is name-based. */ - if (nlen >= 5 && memcmp(name, ".tls$", 5) == 0) f |= SF_TLS; - if (nlen == 4 && memcmp(name, ".tls", 4) == 0) f |= SF_TLS; - return f; -} - -/* Bits 20..23 of Characteristics encode alignment as (log2(align)+1). - * 0 means "default"; we collapse to align=1 for round-trip purposes. */ -static u32 coff_sec_align(u32 ch) { - u32 n = (ch & IMAGE_SCN_ALIGN_MASK) >> 20; - if (n == 0) return 1; - return 1u << (n - 1u); -} - -/* ---- symbol-name resolution ---- */ - -static void resolve_sym_name(const u8* rec, const u8* strtab, u32 strtab_size, - const char** name_out, u32* len_out) { - /* ShortName: 8 bytes. If first 4 bytes are zero, second 4 bytes is - * the strtab offset (LongName form). */ - u32 z = coff_rd_u32(rec + 0); - if (z == 0) { - u32 off = coff_rd_u32(rec + 4); - *name_out = strtab_lookup(strtab, strtab_size, off, len_out); - return; - } - u32 n = 0; - while (n < 8 && rec[n] != '\0') ++n; - *name_out = (const char*)rec; - *len_out = n; -} - -/* ---- short-import record handler ---- - * Microsoft "short import" format: a 20-byte ImportObjectHeader - * followed by SizeOfData bytes containing two NUL-terminated strings — - * the imported symbol name then the DLL name. These live as members - * of .lib archives (mingw's libkernel32.dll.a etc.) and stand in for - * a full long-form COFF import object. - * - * cfree-side model: synthesize a DSO-shaped ObjBuilder with the - * imported symbol defined at section_id = OBJ_SEC_NONE (the same - * shape read_coff_dso / read_elf_dso produce for an exported name), - * and stash the providing DLL name on the builder via - * obj_set_coff_import_dll so the archive-ingestion layer can route - * the resulting LinkInput as a DSO with this name as the soname. - * - * We also synthesize the `__imp_<name>` alias mingw codegen uses to - * spell explicit IAT-slot access; both names ultimately resolve to - * the same DLL export at link time. */ -static ObjBuilder* read_coff_short_import(Compiler* c, const char* name, - const u8* data, size_t len) { - if (len < COFF_IMPORT_OBJECT_HEADER_SIZE) - compiler_panic(c, no_loc(), - "read_coff: short-import record shorter than header"); - - /* Sig1 / Sig2 already checked by the caller. */ - /* data + 4: Version (2 bytes, ignored). */ - u16 machine = coff_rd_u16(data + 6); - /* data + 8: TimeDateStamp (4 bytes, ignored). */ - u32 size_of_data = coff_rd_u32(data + 12); - u16 ordinal_or_hint = coff_rd_u16(data + 16); - u16 type_flags = coff_rd_u16(data + 18); - - if ((u64)COFF_IMPORT_OBJECT_HEADER_SIZE + (u64)size_of_data > (u64)len) - compiler_panic(c, no_loc(), - "read_coff: short-import SizeOfData=%u extends past input " - "(len=%zu)", - size_of_data, len); - - if (machine != IMAGE_FILE_MACHINE_AMD64 && - machine != IMAGE_FILE_MACHINE_ARM64) - compiler_panic(c, no_loc(), - "read_coff: short-import unsupported machine %#x", - (u32)machine); - - /* Decode TypeFlags bitfield (Type:2, NameType:3, Reserved:11). */ - u32 import_type = (u32)(type_flags & 0x3u); - u32 name_type = (u32)((type_flags >> 2) & 0x7u); - - /* Ordinal-only imports (NameType=IMPORT_OBJECT_ORDINAL) are not yet - * implemented in cfree. None of the mingw / llvm-mingw system import - * archives use this shape — every libfoo.a member in the supported - * sysroots imports by name — so refusing here is a clean diagnostic, - * not an internal panic. When a real consumer surfaces, the work is - * to thread the ordinal through link_resolve and into the PE import - * directory hint/name tables. */ - if (name_type == IMPORT_OBJECT_ORDINAL) - compiler_panic(c, no_loc(), - "read_coff: short-import by ordinal not implemented " - "(archive member \"%.*s\", ordinal %u). cfree links " - "imports by name only; rebuild the consumer to import " - "by name, or omit this archive from the link.", - SLICE_ARG(name ? slice_from_cstr(name) - : SLICE_LIT("<unnamed>")), - (unsigned)ordinal_or_hint); - - /* Symbol name: NUL-terminated starting at data + 20. */ - const u8* body = data + COFF_IMPORT_OBJECT_HEADER_SIZE; - u32 sym_name_max = size_of_data; - u32 sym_name_len = 0; - while (sym_name_len < sym_name_max && body[sym_name_len] != '\0') - ++sym_name_len; - if (sym_name_len == sym_name_max) - compiler_panic(c, no_loc(), - "read_coff: short-import symbol name not NUL-terminated"); - - /* DLL name: NUL-terminated starting after the symbol name's NUL. */ - u32 dll_name_off = sym_name_len + 1u; - if (dll_name_off >= size_of_data) - compiler_panic(c, no_loc(), - "read_coff: short-import missing DLL name"); - const u8* dll_p = body + dll_name_off; - u32 dll_name_max = size_of_data - dll_name_off; - u32 dll_name_len = 0; - while (dll_name_len < dll_name_max && dll_p[dll_name_len] != '\0') - ++dll_name_len; - if (dll_name_len == dll_name_max) - compiler_panic(c, no_loc(), - "read_coff: short-import DLL name not NUL-terminated"); - - ObjBuilder* ob = obj_new(c); - if (!ob) compiler_panic(c, no_loc(), "read_coff: obj_new failed"); - - /* Pick SymKind by import type: CODE -> function, DATA/CONST -> object. - * Both are defined at section_id=OBJ_SEC_NONE, value=0, size=0 — the - * shape read_coff_dso would produce for a DLL export. */ - SymKind k = (import_type == IMPORT_OBJECT_CODE) ? SK_FUNC : SK_OBJ; - - Sym sn = pool_intern_slice( - c->global, (Slice){ .s = (const char*)body, .len = sym_name_len }); - ObjSymId id = obj_symbol_ex(ob, sn, SB_GLOBAL, SV_DEFAULT, k, OBJ_SEC_NONE, - 0, 0, 0); - obj_sym_mark_referenced(ob, id); - - /* `__imp_<name>` alias for codegen that refers to the IAT slot - * directly (mingw convention). Even code imports use an object-like - * `__imp_` symbol because references to it want the IAT data slot, not - * the callable import stub. */ - static const char kImpPrefix[] = "__imp_"; - u32 imp_len = (u32)(sizeof kImpPrefix - 1u) + sym_name_len; - char* imp_buf = arena_array(c->scratch, char, imp_len); - memcpy(imp_buf, kImpPrefix, sizeof kImpPrefix - 1u); - memcpy(imp_buf + (sizeof kImpPrefix - 1u), body, sym_name_len); - Sym imp_sn = - pool_intern_slice(c->global, (Slice){ .s = imp_buf, .len = imp_len }); - ObjSymId imp_id = obj_symbol_ex(ob, imp_sn, SB_GLOBAL, SV_DEFAULT, SK_OBJ, - OBJ_SEC_NONE, 0, 0, 0); - obj_sym_mark_referenced(ob, imp_id); - - /* Stash the DLL name so the archive-ingestion layer (Phase 4.3) can - * route this builder as a DSO with the DLL as soname. */ - Sym dll_sn = pool_intern_slice( - c->global, (Slice){ .s = (const char*)dll_p, .len = dll_name_len }); - obj_set_coff_import_dll(ob, dll_sn); - - (void)name_type; - obj_finalize(ob); - return ob; -} - -ObjBuilder* read_coff(Compiler* c, const char* name, const u8* data, - size_t len) { - (void)name; - - /* ---- Step 0: header validation ---- */ - if (len < COFF_FILE_HEADER_SIZE) - compiler_panic(c, no_loc(), "read_coff: input shorter than COFF header"); - - /* Microsoft short-import record? (Sig1=0, Sig2=0xFFFF.) These live - * as members of .lib archives and stand in for a long-form import - * object. Detect at entry; the rest of read_coff assumes the - * input is a real IMAGE_FILE_HEADER. */ - if (len >= 4 && - coff_rd_u16(data + 0) == IMPORT_OBJECT_HDR_SIG1 && - coff_rd_u16(data + 2) == IMPORT_OBJECT_HDR_SIG2) { - return read_coff_short_import(c, name, data, len); - } - - u16 machine = coff_rd_u16(data + 0); - u16 nsections = coff_rd_u16(data + 2); - /* data + 4: TimeDateStamp (4 bytes, ignored). */ - u32 ptr_to_symtab = coff_rd_u32(data + 8); - u32 nsymbols = coff_rd_u32(data + 12); - u16 size_opt_hdr = coff_rd_u16(data + 16); - /* data + 18: Characteristics (2 bytes, currently ignored). */ - - if (size_opt_hdr != 0) - compiler_panic(c, no_loc(), - "read_coff: input has optional header (size=%u); " - "use read_coff_pe for executables", - (u32)size_opt_hdr); - - if (machine != IMAGE_FILE_MACHINE_AMD64 && - machine != IMAGE_FILE_MACHINE_ARM64 && - machine != IMAGE_FILE_MACHINE_ARM64EC) - compiler_panic(c, no_loc(), "read_coff: unsupported machine %#x", - (u32)machine); - - const ArchImpl* arch = arch_lookup_coff_machine(machine); - if (!arch || !arch->coff || !arch->coff->reloc_from) - compiler_panic(c, no_loc(), "read_coff: no arch impl for machine %#x", - (u32)machine); - u32 (*reloc_from)(u32) = arch->coff->reloc_from; - - if ((u64)COFF_FILE_HEADER_SIZE + - (u64)nsections * (u64)COFF_SECTION_HEADER_SIZE > - (u64)len) - compiler_panic(c, no_loc(), - "read_coff: section header table out of range"); - - /* ---- Step 1: bootstrap, locate strtab ---- */ - /* Strtab is at PointerToSymbolTable + NumberOfSymbols * 18. When the - * file has no symbol table (ptr=0, n=0) we treat strtab as empty. */ - const u8* strtab = NULL; - u32 strtab_size = 0; - if (ptr_to_symtab && nsymbols) { - u64 symtab_end = - (u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYMBOL_SIZE; - if (symtab_end + COFF_STRTAB_SIZE_FIELD_BYTES > (u64)len) - compiler_panic(c, no_loc(), - "read_coff: symbol table / strtab header out of range"); - u32 declared = coff_rd_u32(data + symtab_end); - /* The size field is inclusive of the 4-byte prefix; treat <4 as - * "empty" (some tools write 0). */ - if (declared < COFF_STRTAB_SIZE_FIELD_BYTES) declared = 0; - if (declared) { - if (symtab_end + (u64)declared > (u64)len) - compiler_panic(c, no_loc(), "read_coff: strtab body out of range"); - strtab = data + symtab_end; - strtab_size = declared; - } else { - strtab = data + symtab_end; - strtab_size = COFF_STRTAB_SIZE_FIELD_BYTES; - } - } - - ObjBuilder* ob = obj_new(c); - if (!ob) compiler_panic(c, no_loc(), "read_coff: obj_new failed"); - - /* ---- Step 2: ingest sections ---- */ - CSecRec* secs = arena_array(c->scratch, CSecRec, nsections ? nsections : 1); - const u8* shdr_base = data + COFF_FILE_HEADER_SIZE; - for (u32 i = 0; i < nsections; ++i) { - CSecRec* s = &secs[i]; - parse_shdr(shdr_base + (u64)i * COFF_SECTION_HEADER_SIZE, s); - - const char* nm; - u32 nlen; - resolve_section_name(s->raw_name, strtab, strtab_size, &nm, &nlen); - Sym sn = pool_intern_slice(c->global, (Slice){ .s = nm, .len = nlen }); - - u16 kind = coff_sec_kind(nm, nlen, s->characteristics); - u16 flags = coff_sec_flags(nm, nlen, s->characteristics); - u32 align = coff_sec_align(s->characteristics); - - int is_bss = (s->characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA) != 0; - u16 sem = is_bss ? SSEM_NOBITS : SSEM_PROGBITS; - - ObjSecId id = obj_section_ex(ob, sn, (SecKind)kind, (SecSem)sem, flags, - align, 0u, 0u, 0u); - if (id == OBJ_SEC_NONE) - compiler_panic(c, no_loc(), - "read_coff: obj_section_ex failed for section %u", i); - s->obj_sec = id; - - /* Preserve raw Characteristics so emit_coff can write back any bits - * the canonical SecFlag/SecSem mapping doesn't model (LNK_INFO, - * LNK_REMOVE, MEM_DISCARDABLE, MEM_SHARED, GPREL, alignment nibble). */ - obj_section_set_ext(ob, id, OBJ_EXT_COFF, s->characteristics, 0); - - if (is_bss) { - u32 bss_size = s->virtual_size ? s->virtual_size : s->size_of_raw_data; - obj_reserve_bss(ob, id, bss_size, align); - } else if (s->size_of_raw_data) { - u64 end = (u64)s->pointer_to_raw_data + (u64)s->size_of_raw_data; - if (end > (u64)len) - compiler_panic(c, no_loc(), - "read_coff: section %u bytes out of range", i); - u8* dst = obj_reserve(ob, id, s->size_of_raw_data); - memcpy(dst, data + s->pointer_to_raw_data, s->size_of_raw_data); - } - } - - /* ---- Step 3: ingest symbols (with aux-record awareness) ---- - * sym_to_obj is indexed by RAW symbol-table index (including aux - * slots), so reloc.SymbolTableIndex resolves directly without - * adjusting for skipped aux records. Aux slots map to OBJ_SYM_NONE. */ - ObjSymId* sym_to_obj = - arena_zarray(c->scratch, ObjSymId, nsymbols ? nsymbols : 1); - - /* Track section-symbol primary symtab index per section, stored as - * (raw_index + 1) so 0 can mean "not seen yet" without colliding - * with the (legitimate) first symbol-table slot — emit_coff always - * lays the first section's section-symbol at index 0. */ - u32* sec_sym_primary = arena_zarray(c->scratch, u32, nsections + 1u); - - const u8* sym_base = data + ptr_to_symtab; - if (nsymbols) { - if ((u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYMBOL_SIZE > (u64)len) - compiler_panic(c, no_loc(), - "read_coff: symbol table body out of range"); - } - - for (u32 i = 0; i < nsymbols; ) { - const u8* p = sym_base + (u64)i * COFF_SYMBOL_SIZE; - const char* nm; - u32 nlen; - resolve_sym_name(p, strtab, strtab_size, &nm, &nlen); - - u32 value = coff_rd_u32(p + 8); - i16 sec_num = (i16)coff_rd_u16(p + 12); - u16 type = coff_rd_u16(p + 14); - u8 sclass = p[16]; - u8 naux = p[17]; - - /* FILE storage class: concatenate aux records' raw bytes (each - * 18 bytes, NUL-padded) for the source-file name. */ - if (sclass == IMAGE_SYM_CLASS_FILE) { - /* Build name from aux records (up to naux*18 bytes); fall back - * to the primary record's name if naux==0. */ - const char* fnm = nm; - u32 fnlen = nlen; - if (naux) { - /* Each aux record's 18 bytes are interpreted as raw file-name - * bytes; concatenate then trim trailing NULs. */ - u32 total = (u32)naux * COFF_SYMBOL_SIZE; - if ((u64)i + 1u + (u64)naux > (u64)nsymbols) - compiler_panic(c, no_loc(), - "read_coff: FILE aux records extend past symbol " - "table"); - const u8* aux = p + COFF_SYMBOL_SIZE; - u32 n = 0; - while (n < total && aux[n] != '\0') ++n; - fnm = (const char*)aux; - fnlen = n; - } - Sym fsn = fnlen ? pool_intern_slice( - c->global, (Slice){ .s = fnm, .len = fnlen }) - : 0; - ObjSymId id = obj_symbol_ex(ob, fsn, SB_LOCAL, SV_DEFAULT, SK_FILE, - OBJ_SEC_NONE, 0, 0, 0); - obj_sym_mark_referenced(ob, id); - sym_to_obj[i] = id; - i += 1u + naux; - continue; - } - - /* Skip .bf/.ef debug pair primaries (FUNCTION storage class) and - * the END_OF_FUNCTION marker: they carry no symbol cfree models. */ - if (sclass == IMAGE_SYM_CLASS_FUNCTION || - sclass == IMAGE_SYM_CLASS_END_OF_FUNCTION) { - sym_to_obj[i] = OBJ_SYM_NONE; - i += 1u + naux; - continue; - } - - /* Resolve (bind, vis, kind, section_id, value, size, cmnalign). */ - SymBind bind = SB_LOCAL; - SymVis vis = SV_DEFAULT; - SymKind kind = SK_NOTYPE; - ObjSecId target_sec = OBJ_SEC_NONE; - u64 sym_value = 0; - u64 sym_size = 0; - u64 cmnalign = 0; - - if (sec_num == IMAGE_SYM_UNDEFINED) { - /* Undef or common. EXTERNAL with Value > 0 is a common. */ - if (sclass == IMAGE_SYM_CLASS_EXTERNAL && value > 0) { - bind = SB_GLOBAL; - kind = SK_COMMON; - sym_size = value; - cmnalign = 1; /* COFF doesn't carry per-common alignment */ - } else { - bind = (sclass == IMAGE_SYM_CLASS_WEAK_EXTERNAL) ? SB_WEAK - : (sclass == IMAGE_SYM_CLASS_EXTERNAL) ? SB_GLOBAL - : SB_LOCAL; - kind = SK_UNDEF; - } - } else if (sec_num == IMAGE_SYM_ABSOLUTE) { - kind = SK_ABS; - sym_value = value; - bind = (sclass == IMAGE_SYM_CLASS_EXTERNAL) ? SB_GLOBAL : SB_LOCAL; - } else if (sec_num == IMAGE_SYM_DEBUG) { - /* Defined-in-debug — cfree has no model for it. Skip with an - * OBJ_SYM_NONE entry; relocations against this slot will resolve - * to OBJ_SYM_NONE, which obj_reloc_ex tolerates. */ - sym_to_obj[i] = OBJ_SYM_NONE; - i += 1u + naux; - continue; - } else if (sec_num >= 1 && (u32)sec_num <= nsections) { - target_sec = secs[sec_num - 1].obj_sec; - sym_value = value; - switch (sclass) { - case IMAGE_SYM_CLASS_EXTERNAL: - bind = SB_GLOBAL; - break; - case IMAGE_SYM_CLASS_WEAK_EXTERNAL: - bind = SB_WEAK; - break; - case IMAGE_SYM_CLASS_STATIC: - case IMAGE_SYM_CLASS_LABEL: - default: - bind = SB_LOCAL; - break; - } - - /* Detect SECTION symbols: STATIC, Value==0, name matches the - * section's own name, and the section has at least one aux - * record (the section-definition aux). Mark as SK_SECTION so - * emit_coff regenerates the synthetic entry. */ - int is_section_sym = 0; - if (sclass == IMAGE_SYM_CLASS_STATIC && value == 0 && naux >= 1) { - const CSecRec* cs = &secs[sec_num - 1]; - u32 raw_nlen = 0; - while (raw_nlen < 8 && cs->raw_name[raw_nlen] != '\0') ++raw_nlen; - if (raw_nlen == nlen && memcmp(cs->raw_name, nm, nlen) == 0) { - is_section_sym = 1; - } else if (cs->raw_name[0] == '/') { - /* Long-named section: compare the resolved name. */ - const char* rn; - u32 rnlen; - resolve_section_name(cs->raw_name, strtab, strtab_size, &rn, &rnlen); - if (rnlen == nlen && memcmp(rn, nm, nlen) == 0) is_section_sym = 1; - } - } - - if (is_section_sym) { - kind = SK_SECTION; - sec_sym_primary[sec_num] = i + 1u; - } else if (sclass == IMAGE_SYM_CLASS_SECTION) { - kind = SK_SECTION; - } else if (sclass == IMAGE_SYM_CLASS_LABEL) { - kind = SK_NOTYPE; - } else if ((type >> 8) == IMAGE_SYM_DTYPE_FUNCTION) { - kind = SK_FUNC; - } else if (type == IMAGE_SYM_TYPE_NULL) { - kind = (bind == SB_LOCAL) ? SK_NOTYPE : SK_OBJ; - } else { - kind = SK_OBJ; - } - } else { - compiler_panic(c, no_loc(), - "read_coff: symbol section number %d out of range", - (int)sec_num); - } - - /* WEAK_EXTERNAL primary: aux record carries TagIndex + Characteristics. - * cfree's model has SB_WEAK; the fallback symbol is link-time - * resolution by name and we drop the explicit index. */ - if (sclass == IMAGE_SYM_CLASS_WEAK_EXTERNAL) bind = SB_WEAK; - - Sym sn = nlen ? pool_intern_slice(c->global, (Slice){ .s = nm, .len = nlen }) - : 0; - ObjSymId id = obj_symbol_ex(ob, sn, bind, vis, kind, target_sec, sym_value, - sym_size, cmnalign); - obj_sym_mark_referenced(ob, id); - sym_to_obj[i] = id; - i += 1u + naux; - } - - /* ---- Step 4: stitch COMDAT groups from section-definition aux ---- - * Each COMDAT section has a STATIC primary symbol (the section - * symbol) followed by one section-definition aux record. Selection - * != 0 marks the section as a COMDAT member; the signature symbol - * is the section symbol itself (Number field's selection variant - * controls dedup policy at link time). */ - for (u32 s = 1; s <= nsections; ++s) { - u32 prim_plus1 = sec_sym_primary[s]; - if (!prim_plus1) continue; - u32 prim = prim_plus1 - 1u; - const CSecRec* cs = &secs[s - 1]; - if (!(cs->characteristics & IMAGE_SCN_LNK_COMDAT)) continue; - const u8* p = sym_base + (u64)prim * COFF_SYMBOL_SIZE; - u8 naux = p[17]; - if (!naux) continue; - const u8* aux = p + COFF_SYMBOL_SIZE; - /* Aux layout: Length(4), NumberOfRelocations(2), NumberOfLinenumbers(2), - * CheckSum(4), Number(2), Selection(1), Unused(3). */ - u16 assoc_number = coff_rd_u16(aux + 12); - u8 selection = aux[14]; - if (selection == 0) continue; - - ObjSymId sig = sym_to_obj[prim]; - const ObjSym* sigsym = obj_symbol_get(ob, sig); - Sym gname = sigsym ? sigsym->name : 0; - ObjGroupId gid = obj_group(ob, gname, sig, (u32)selection); - obj_group_add_section(ob, gid, cs->obj_sec); - obj_section_set_group(ob, cs->obj_sec, gid); - - /* ASSOCIATIVE: the COMDAT member is associated with another - * section's group. Add this section to that group's list too so - * dead-strip keeps them paired. */ - if (selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE && assoc_number >= 1 && - (u32)assoc_number <= nsections) { - u32 other_prim_plus1 = sec_sym_primary[assoc_number]; - if (other_prim_plus1) { - u32 other_prim = other_prim_plus1 - 1u; - const u8* op = sym_base + (u64)other_prim * COFF_SYMBOL_SIZE; - if (op[17]) { - const u8* oaux = op + COFF_SYMBOL_SIZE; - u8 osel = oaux[14]; - if (osel != 0) { - ObjSymId osig = sym_to_obj[other_prim]; - const ObjSym* osigsym = obj_symbol_get(ob, osig); - Sym ogname = osigsym ? osigsym->name : 0; - ObjGroupId ogid = obj_group(ob, ogname, osig, (u32)osel); - obj_group_add_section(ob, ogid, cs->obj_sec); - } - } - } - } - } - - /* ---- Step 5: per-section relocations ---- */ - for (u32 i = 0; i < nsections; ++i) { - const CSecRec* s = &secs[i]; - if (!s->number_of_relocations) continue; - u64 reloc_end = (u64)s->pointer_to_relocations + - (u64)s->number_of_relocations * (u64)COFF_RELOC_SIZE; - if (reloc_end > (u64)len) - compiler_panic(c, no_loc(), - "read_coff: relocation table for section %u out of range", - i); - const u8* rbase = data + s->pointer_to_relocations; - for (u32 j = 0; j < s->number_of_relocations; ++j) { - const u8* rp = rbase + (u64)j * COFF_RELOC_SIZE; - u32 r_va = coff_rd_u32(rp + 0); - u32 r_sym = coff_rd_u32(rp + 4); - u16 r_type = coff_rd_u16(rp + 8); - - u32 kind = reloc_from(r_type); - if (kind == (u32)-1) - compiler_panic(c, no_loc(), - "read_coff: unsupported reloc type %u for machine %#x", - (u32)r_type, (u32)machine); - - ObjSymId target = OBJ_SYM_NONE; - if (r_sym < nsymbols) target = sym_to_obj[r_sym]; - - /* AMD64 REL32 encodings are relative to a PC after the relocated - * field, while cfree's R_PC32-style apply formula subtracts the - * relocation field address P. Plain REL32 is relative to P+4; - * REL32_N is relative to P+N. Record that convention as an - * implicit negative addend so link_reloc_apply can stay format - * neutral. */ - /* ARM64 PAGEOFFSET_12L is one wire code for LDST{8,16,32,64,128}. - * The per-arch translator returns R_AARCH64_LDST64_ABS_LO12_NC by - * default; recover the actual access width from the patched LDR/ - * STR instruction's size field at bits [31:30] (and a SIMD/FP - * extension via bit 26 + opc[23]) so the linker applies the right - * scale. Mismatch panics at apply-time with "misaligned - * address" otherwise — see link_reloc.c. */ - if ((machine == IMAGE_FILE_MACHINE_ARM64 || - machine == IMAGE_FILE_MACHINE_ARM64EC) && - r_type == IMAGE_REL_ARM64_PAGEOFFSET_12L && - s->size_of_raw_data && (u64)r_va + 4u <= (u64)s->size_of_raw_data) { - const u8* ibytes = data + s->pointer_to_raw_data + r_va; - u32 instr = (u32)ibytes[0] | ((u32)ibytes[1] << 8) | - ((u32)ibytes[2] << 16) | ((u32)ibytes[3] << 24); - u32 sz = (instr >> 30) & 0x3u; - int is_simd = (instr >> 26) & 0x1u; - if (is_simd && ((instr >> 23) & 0x1u)) { - kind = R_AARCH64_LDST128_ABS_LO12_NC; - } else { - switch (sz) { - case 0: kind = R_AARCH64_LDST8_ABS_LO12_NC; break; - case 1: kind = R_AARCH64_LDST16_ABS_LO12_NC; break; - case 2: kind = R_AARCH64_LDST32_ABS_LO12_NC; break; - default: kind = R_AARCH64_LDST64_ABS_LO12_NC; break; - } - } - } - - i64 addend = 0; - int has_explicit = 0; - if (machine == IMAGE_FILE_MACHINE_AMD64) { - switch (r_type) { - case IMAGE_REL_AMD64_REL32: - addend = -4; has_explicit = 1; break; - case IMAGE_REL_AMD64_REL32_1: - addend = -1; has_explicit = 1; break; - case IMAGE_REL_AMD64_REL32_2: - addend = -2; has_explicit = 1; break; - case IMAGE_REL_AMD64_REL32_3: - addend = -3; has_explicit = 1; break; - case IMAGE_REL_AMD64_REL32_4: - addend = -4; has_explicit = 1; break; - case IMAGE_REL_AMD64_REL32_5: - addend = -5; has_explicit = 1; break; - default: - break; - } - } - - obj_reloc_ex(ob, s->obj_sec, r_va, (RelocKind)kind, target, addend, - has_explicit, 0); - } - } - - /* ---- Step 6: finalize and return ---- */ - obj_finalize(ob); - return ob; -} diff --git a/src/obj/coff_read_dso.c b/src/obj/coff_read_dso.c @@ -1,236 +0,0 @@ -/* PE32+ DLL reader. Peer of read_elf_dso / read_macho_dso: walks the - * IMAGE_DIRECTORY_ENTRY_EXPORT data directory of a Windows .dll and - * produces an ObjBuilder of defined OBJ_SEC_NONE symbols — one per - * name in the Export Name Table. The DLL's own Name string (the - * analogue of DT_SONAME / LC_ID_DYLIB) is returned via *soname_out. - * - * The produced ObjBuilder carries no sections, relocations, or groups - * — DSO inputs contribute no bytes to the link. The consumer's - * resolve_undefs pass sees the exports as defined globals and marks - * matching consumer-side undefs as `imported`; the import-table - * emitter (Phase 3 / 4.4) later groups them by providing DLL. - * - * Scope: PE32+ only (IMAGE_NT_OPTIONAL_HDR64_MAGIC), AMD64 or ARM64, - * with IMAGE_FILE_DLL set. Ordinal-only exports (entries present in - * the EAT but absent from the ENT) are not synthesized in v1 — almost - * all real-world imports are by name. Forwarder entries (EAT RVA - * falls within the export directory's own range) are still emitted as - * symbols so the linker can satisfy imports against them; the OS - * loader follows the forwarder chain at runtime. This contract is - * pinned by test/coff/pe-dso-forwarder.c. */ - -#include <string.h> - -#include "core/arena.h" -#include "core/heap.h" -#include "core/pool.h" -#include "core/slice.h" -#include "obj/coff.h" - -static SrcLoc no_loc(void) { - SrcLoc l = {0, 0, 0}; - return l; -} - -/* ---- RVA -> file offset ---- - * Walks the section table once per call. Returns 1 on success and - * fills *off_out; returns 0 if the RVA falls outside every section's - * [VirtualAddress, VirtualAddress + max(VirtualSize, SizeOfRawData)) - * range or the resulting file offset would exceed `len`. */ -static int rva_to_offset(const u8* shdrs, u16 nsec, u32 rva, size_t len, - u64* off_out) { - for (u16 i = 0; i < nsec; ++i) { - const u8* sh = shdrs + (u64)i * COFF_SECTION_HEADER_SIZE; - u32 vsize = coff_rd_u32(sh + 8); - u32 vaddr = coff_rd_u32(sh + 12); - u32 raw_size = coff_rd_u32(sh + 16); - u32 raw_ptr = coff_rd_u32(sh + 20); - /* Some linkers leave VirtualSize == 0 in objects; use raw_size as - * a fallback so we still resolve RVAs in well-formed images. */ - u32 span = vsize ? vsize : raw_size; - if (rva >= vaddr && rva < vaddr + span) { - u64 delta = (u64)(rva - vaddr); - if (delta >= raw_size) return 0; /* RVA past on-disk data */ - u64 off = (u64)raw_ptr + delta; - if (off >= len) return 0; - *off_out = off; - return 1; - } - } - return 0; -} - -/* Read a NUL-terminated string starting at `off`, bounded by `len`. - * Returns the string length (excluding NUL); writes the pointer to - * *out. Returns 0 if off is out of range or the string is not - * terminated within the file. */ -static u32 read_cstr(const u8* data, size_t len, u64 off, const char** out) { - if (off >= len) { *out = ""; return 0; } - const char* s = (const char*)(data + off); - u64 max = (u64)len - off; - u64 n = 0; - while (n < max && s[n] != '\0') ++n; - if (n == max) { *out = ""; return 0; } /* unterminated */ - *out = s; - return (u32)n; -} - -ObjBuilder* read_coff_dso(Compiler* c, const char* name, const u8* data, - size_t len, Sym* soname_out) { - (void)name; - if (soname_out) *soname_out = 0; - - /* ---- DOS header + PE signature ---- */ - if (len < COFF_DOS_HEADER_SIZE) - compiler_panic(c, no_loc(), "read_coff_dso: input shorter than DOS header"); - u16 e_magic = coff_rd_u16(data + 0); - if (e_magic != IMAGE_DOS_SIGNATURE) - compiler_panic(c, no_loc(), "read_coff_dso: bad DOS magic 0x%x", e_magic); - u32 e_lfanew = coff_rd_u32(data + 60); - - u64 nt_end = (u64)e_lfanew + 4u + COFF_FILE_HEADER_SIZE + COFF_OPT_HDR64_SIZE; - if (nt_end > len) - compiler_panic(c, no_loc(), - "read_coff_dso: PE headers extend past end of file"); - - u32 pe_sig = coff_rd_u32(data + e_lfanew); - if (pe_sig != IMAGE_NT_SIGNATURE) - compiler_panic(c, no_loc(), "read_coff_dso: bad PE signature 0x%x", pe_sig); - - /* ---- IMAGE_FILE_HEADER ---- */ - const u8* fh = data + e_lfanew + 4u; - u16 machine = coff_rd_u16(fh + 0); - u16 nsec = coff_rd_u16(fh + 2); - u16 size_of_opt = coff_rd_u16(fh + 16); - u16 chars = coff_rd_u16(fh + 18); - - if (machine != IMAGE_FILE_MACHINE_AMD64 && machine != IMAGE_FILE_MACHINE_ARM64) - compiler_panic(c, no_loc(), - "read_coff_dso: unsupported machine 0x%x", machine); - if (!(chars & IMAGE_FILE_DLL)) - compiler_panic(c, no_loc(), - "read_coff_dso: not a DLL (Characteristics=0x%x)", chars); - if (size_of_opt < COFF_OPT_HDR64_SIZE) - compiler_panic(c, no_loc(), - "read_coff_dso: SizeOfOptionalHeader %u too small for PE32+", - size_of_opt); - - /* ---- IMAGE_OPTIONAL_HEADER64 ---- */ - const u8* oh = fh + COFF_FILE_HEADER_SIZE; - u16 opt_magic = coff_rd_u16(oh + 0); - if (opt_magic != IMAGE_NT_OPTIONAL_HDR64_MAGIC) - compiler_panic(c, no_loc(), - "read_coff_dso: not PE32+ (optional header Magic=0x%x)", - opt_magic); - - /* DataDirectory begins at offset 112 inside the PE32+ optional header - * (28 standard + 84 windows-specific + NumberOfRvaAndSizes = 112). */ - const u8* data_dir = oh + COFF_OPT_HDR64_SIZE - - COFF_NUM_DATA_DIRECTORIES * COFF_DATA_DIRECTORY_SIZE; - u32 export_rva = coff_rd_u32(data_dir - + IMAGE_DIRECTORY_ENTRY_EXPORT - * COFF_DATA_DIRECTORY_SIZE); - u32 export_size = coff_rd_u32(data_dir - + IMAGE_DIRECTORY_ENTRY_EXPORT - * COFF_DATA_DIRECTORY_SIZE - + 4u); - - /* ---- section table ---- */ - u64 shdrs_off = (u64)e_lfanew + 4u + COFF_FILE_HEADER_SIZE + size_of_opt; - u64 shdrs_end = shdrs_off + (u64)nsec * COFF_SECTION_HEADER_SIZE; - if (shdrs_end > len) - compiler_panic(c, no_loc(), - "read_coff_dso: section table extends past end of file"); - const u8* shdrs = data + shdrs_off; - - ObjBuilder* ob = obj_new(c); - if (!ob) compiler_panic(c, no_loc(), "read_coff_dso: obj_new failed"); - - /* No export directory => empty DSO (legal for stub DLLs). */ - if (export_size == 0 || export_rva == 0) { - obj_finalize(ob); - return ob; - } - - u64 exp_off; - if (!rva_to_offset(shdrs, nsec, export_rva, len, &exp_off)) - compiler_panic(c, no_loc(), - "read_coff_dso: export directory RVA 0x%x out of range", - export_rva); - if (exp_off + COFF_EXPORT_DIR_SIZE > len) - compiler_panic(c, no_loc(), - "read_coff_dso: export directory truncated"); - - const u8* ed = data + exp_off; - u32 name_rva = coff_rd_u32(ed + 12); - u32 num_funcs = coff_rd_u32(ed + 20); - u32 num_names = coff_rd_u32(ed + 24); - u32 eat_rva = coff_rd_u32(ed + 28); - u32 ent_rva = coff_rd_u32(ed + 32); - u32 ord_rva = coff_rd_u32(ed + 36); - /* Base (ed + 16) is the user-visible ordinal offset; the cfree linker - * matches imports by name, so we don't propagate it. */ - - /* ---- DLL name (soname) ---- */ - if (name_rva) { - u64 name_off; - if (!rva_to_offset(shdrs, nsec, name_rva, len, &name_off)) - compiler_panic(c, no_loc(), - "read_coff_dso: DLL name RVA 0x%x out of range", - name_rva); - const char* dll_name; - u32 nlen = read_cstr(data, len, name_off, &dll_name); - if (nlen && soname_out) - *soname_out = pool_intern_slice(c->global, (Slice){ .s = dll_name, .len = nlen }); - } - - /* ---- resolve EAT / ENT / ordinal table once ---- */ - u64 eat_off = 0, ent_off = 0, ord_off = 0; - if (num_names) { - if (!rva_to_offset(shdrs, nsec, eat_rva, len, &eat_off)) - compiler_panic(c, no_loc(), - "read_coff_dso: EAT RVA 0x%x out of range", eat_rva); - if (!rva_to_offset(shdrs, nsec, ent_rva, len, &ent_off)) - compiler_panic(c, no_loc(), - "read_coff_dso: ENT RVA 0x%x out of range", ent_rva); - if (!rva_to_offset(shdrs, nsec, ord_rva, len, &ord_off)) - compiler_panic(c, no_loc(), - "read_coff_dso: ordinal table RVA 0x%x out of range", - ord_rva); - if (ent_off + (u64)num_names * 4u > len - || ord_off + (u64)num_names * 2u > len) - compiler_panic(c, no_loc(), - "read_coff_dso: ENT/ordinal table extends past file"); - if (eat_off + (u64)num_funcs * 4u > len) - compiler_panic(c, no_loc(), - "read_coff_dso: EAT extends past file"); - } - - /* ---- walk the ENT ---- - * Forwarders (EAT RVA inside [export_rva, export_rva + export_size)) - * still produce a symbol: cfree's linker doesn't follow the chain, - * but the import needs to be satisfiable so the OS loader can. */ - for (u32 i = 0; i < num_names; ++i) { - u32 nrva = coff_rd_u32(data + ent_off + (u64)i * 4u); - u16 ord = coff_rd_u16(data + ord_off + (u64)i * 2u); - if (ord >= num_funcs) continue; /* malformed; skip rather than panic */ - /* func_rva is fetched for forwarder classification only; cfree does - * not consume the address itself (DSO symbols are OBJ_SEC_NONE). */ - u32 func_rva = coff_rd_u32(data + eat_off + (u64)ord * 4u); - (void)func_rva; /* see comment above re: forwarders */ - - u64 name_off; - if (!rva_to_offset(shdrs, nsec, nrva, len, &name_off)) continue; - const char* nm; - u32 nlen = read_cstr(data, len, name_off, &nm); - if (!nlen) continue; - - Sym sn = pool_intern_slice(c->global, (Slice){ .s = nm, .len = nlen }); - ObjSymId id = obj_symbol(ob, sn, SB_GLOBAL, SK_FUNC, - OBJ_SEC_NONE, 0, 0); - obj_sym_mark_referenced(ob, id); - } - - obj_finalize(ob); - return ob; -} diff --git a/src/obj/coff_reloc_aarch64.c b/src/obj/coff_reloc_aarch64.c @@ -1,96 +0,0 @@ -/* RelocKind <-> AArch64 PE/COFF reloc-type mapping. Mirror of - * elf_reloc_aarch64.c for PE/COFF. - * - * The ARM64 PE/COFF reloc set covers the common AArch64 patch sites: - * ADRP page-base / page-offset pairs, BRANCH26/19/14, ADDR32/64, plus - * the section-relative SECREL family which cfree does not model in v1. - * PAGEOFFSET_12L collapses all LDST*_ABS_LO12_NC widths into one wire - * code; the width is recoverable from the patched LDR/STR instruction - * encoding, so the reader picks the LDST64 form and the consumer can - * disambiguate later if it cares. ADDR32NB is image-relative; v1 - * collapses it to R_ABS32 and lets layout subtract the image base. */ - -#include "obj/coff.h" - -u32 coff_aarch64_reloc_to(u32 kind /* RelocKind */) { - switch (kind) { - case R_NONE: - return IMAGE_REL_ARM64_ABSOLUTE; - case R_ABS64: - return IMAGE_REL_ARM64_ADDR64; - case R_ABS32: - return IMAGE_REL_ARM64_ADDR32; - case R_AARCH64_CALL26: - case R_AARCH64_JUMP26: - return IMAGE_REL_ARM64_BRANCH26; - case R_AARCH64_CONDBR19: - return IMAGE_REL_ARM64_BRANCH19; - case R_AARCH64_TSTBR14: - return IMAGE_REL_ARM64_BRANCH14; - case R_AARCH64_ADR_PREL_PG_HI21: - case R_AARCH64_ADR_PREL_PG_HI21_NC: - return IMAGE_REL_ARM64_PAGEBASE_REL21; - case R_AARCH64_ADR_PREL_LO21: - return IMAGE_REL_ARM64_REL21; - case R_AARCH64_ADD_ABS_LO12_NC: - return IMAGE_REL_ARM64_PAGEOFFSET_12A; - case R_AARCH64_LDST8_ABS_LO12_NC: - case R_AARCH64_LDST16_ABS_LO12_NC: - case R_AARCH64_LDST32_ABS_LO12_NC: - case R_AARCH64_LDST64_ABS_LO12_NC: - case R_AARCH64_LDST128_ABS_LO12_NC: - return IMAGE_REL_ARM64_PAGEOFFSET_12L; - case R_PC32: - case R_REL32: - return IMAGE_REL_ARM64_REL32; - case R_COFF_SECREL: - return IMAGE_REL_ARM64_SECREL; - case R_COFF_SECTION: - return IMAGE_REL_ARM64_SECTION; - case R_COFF_AARCH64_SECREL_LOW12A: - return IMAGE_REL_ARM64_SECREL_LOW12A; - case R_COFF_AARCH64_SECREL_HIGH12A: - return IMAGE_REL_ARM64_SECREL_HIGH12A; - default: - return IMAGE_REL_ARM64_ABSOLUTE; - } -} - -u32 coff_aarch64_reloc_from(u32 wire_type) { - switch (wire_type) { - case IMAGE_REL_ARM64_ABSOLUTE: - return R_NONE; - case IMAGE_REL_ARM64_ADDR64: - return R_ABS64; - case IMAGE_REL_ARM64_ADDR32: - return R_ABS32; - case IMAGE_REL_ARM64_ADDR32NB: - return R_ABS32; - case IMAGE_REL_ARM64_BRANCH26: - return R_AARCH64_CALL26; - case IMAGE_REL_ARM64_BRANCH19: - return R_AARCH64_CONDBR19; - case IMAGE_REL_ARM64_BRANCH14: - return R_AARCH64_TSTBR14; - case IMAGE_REL_ARM64_PAGEBASE_REL21: - return R_AARCH64_ADR_PREL_PG_HI21; - case IMAGE_REL_ARM64_REL21: - return R_AARCH64_ADR_PREL_LO21; - case IMAGE_REL_ARM64_PAGEOFFSET_12A: - return R_AARCH64_ADD_ABS_LO12_NC; - case IMAGE_REL_ARM64_PAGEOFFSET_12L: - return R_AARCH64_LDST64_ABS_LO12_NC; - case IMAGE_REL_ARM64_REL32: - return R_PC32; - case IMAGE_REL_ARM64_SECREL: - return R_COFF_SECREL; - case IMAGE_REL_ARM64_SECTION: - return R_COFF_SECTION; - case IMAGE_REL_ARM64_SECREL_LOW12A: - return R_COFF_AARCH64_SECREL_LOW12A; - case IMAGE_REL_ARM64_SECREL_HIGH12A: - return R_COFF_AARCH64_SECREL_HIGH12A; - default: - return (u32)-1; /* sentinel */ - } -} diff --git a/src/obj/coff_reloc_x86_64.c b/src/obj/coff_reloc_x86_64.c @@ -1,76 +0,0 @@ -/* RelocKind <-> x86_64 PE/COFF reloc-type mapping. Mirror of - * elf_reloc_x86_64.c for PE/COFF. - * - * PE/COFF's AMD64 reloc set is much narrower than ELF's: only ABSOLUTE, - * ADDR64, ADDR32, ADDR32NB, REL32 (with REL32_1..5 implicit-addend - * variants), plus a few section-relative forms cfree does not model in - * v1. We emit plain REL32 (4) for every PC-relative kind and let the - * explicit Reloc.addend ride in the patched bytes; on the read side - * REL32_1..5 collapse to R_PC32 (the reader applies the implicit - * addend itself). IMAGE_REL_AMD64_ABSOLUTE (== 0) doubles as the - * "unsupported" sentinel on the _to side, matching the ELF contract. */ - -#include "obj/coff.h" - -u32 coff_x86_64_reloc_to(u32 kind /* RelocKind */) { - switch (kind) { - case R_NONE: - return IMAGE_REL_AMD64_ABSOLUTE; - case R_ABS64: - return IMAGE_REL_AMD64_ADDR64; - case R_ABS32: - return IMAGE_REL_AMD64_ADDR32; - case R_X64_32S: - return IMAGE_REL_AMD64_ADDR32NB; - case R_PC32: - case R_REL32: - case R_PLT32: - case R_X64_PLT32: - case R_X64_GOTPCREL: - case R_X64_GOTPCRELX: - case R_X64_REX_GOTPCRELX: - return IMAGE_REL_AMD64_REL32; - case R_COFF_SECREL: - return IMAGE_REL_AMD64_SECREL; - case R_COFF_SECTION: - return IMAGE_REL_AMD64_SECTION; - default: - return IMAGE_REL_AMD64_ABSOLUTE; - } -} - -u32 coff_x86_64_reloc_from(u32 wire_type) { - switch (wire_type) { - case IMAGE_REL_AMD64_ABSOLUTE: - return R_NONE; - case IMAGE_REL_AMD64_ADDR64: - return R_ABS64; - case IMAGE_REL_AMD64_ADDR32: - return R_ABS32; - case IMAGE_REL_AMD64_ADDR32NB: - return R_X64_32S; - case IMAGE_REL_AMD64_REL32: - case IMAGE_REL_AMD64_REL32_1: - case IMAGE_REL_AMD64_REL32_2: - case IMAGE_REL_AMD64_REL32_3: - case IMAGE_REL_AMD64_REL32_4: - case IMAGE_REL_AMD64_REL32_5: - return R_PC32; - case IMAGE_REL_AMD64_SECREL: - return R_COFF_SECREL; - case IMAGE_REL_AMD64_SECTION: - return R_COFF_SECTION; - /* SECREL7 (7-bit section-relative) appears in mingw-emitted archive - * members (intrinsic helpers, exception tables, DWARF). cfree - * doesn't currently apply or emit these, but panicking at read - * time would block ingesting any mingw archive whose non-import - * members carry .debug_info / .pdata. Map to R_NONE so the - * relocation slot is preserved structurally but treated as a - * no-op by the relocator; the member can still be dead-stripped - * when nothing references it. */ - case IMAGE_REL_AMD64_SECREL7: - return R_NONE; - default: - return (u32)-1; /* sentinel */ - } -} diff --git a/src/obj/elf.h b/src/obj/elf/elf.h diff --git a/src/obj/elf/emit.c b/src/obj/elf/emit.c @@ -0,0 +1,752 @@ +/* ELF ET_REL writer. Walks a finalized ObjBuilder and emits a 64-bit + * little-endian relocatable object via the supplied Writer. + * + * Layout strategy: + * 1. plan ELF section headers (one per obj section, plus synthesized + * .symtab / .strtab / .shstrtab and one .rela.<name> per obj section + * that carries relocations); + * 2. build .symtab + .strtab content (locals first — STT_SECTION + * synthesized for every input section, then ordinary locals, then + * globals/weaks); + * 3. build .rela.* content using the per-arch reloc map (selected + * by Compiler.target.arch); + * 4. build .shstrtab; + * 5. assign file offsets sequentially, respecting per-section + * addralign; + * 6. write Ehdr, then each section's bytes (seeking to its sh_offset), + * then the section header table. + * + * 64-bit little-endian only. Per-arch reloc tables (elf_reloc_<arch>.c) + * supply the RelocKind -> ELF type mapping; e_machine is selected from + * Compiler.target.arch. Big-endian / 32-bit ELF panic at entry. + * + * See doc/DESIGN.md §5.5 for the round-trip invariant: read_elf of this + * output must produce an ObjBuilder shape-equivalent to the input, + * modulo (a) section ordering and (b) the synthesized STT_SECTION + * symbols (which are visible to read_elf but were not in the input). */ + +#include <string.h> + +#include "core/arena.h" +#include "core/buf.h" +#include "core/heap.h" +#include "core/pool.h" +#include "core/slice.h" +#include "core/util.h" +#include "obj/elf/elf.h" +#include "obj/format.h" + +static SrcLoc no_loc(void) { + SrcLoc l = {0, 0, 0}; + return l; +} + +/* ---- per-ELF-section plan record ---- */ + +/* Internal section descriptor used during planning. Mirrors Elf64_Shdr + * but with an explicit pointer to the source bytes (either an obj + * Section's chunked Buf or a synthesized linear buffer). NOBITS sections + * have no source bytes and consume no file space. */ +typedef struct ElfSec { + /* Final shdr fields (little-endian-encoded at write time). */ + u32 sh_name; /* offset into shstrtab */ + u32 sh_type; + u64 sh_flags; + u64 sh_addr; /* always 0 for ET_REL */ + u64 sh_offset; + u64 sh_size; + u32 sh_link; + u32 sh_info; + u64 sh_addralign; + u64 sh_entsize; + + /* Section name. The name string lives in scratch (synthesized) or in + * the global pool (obj-section names); buf-source is set for sections + * carrying obj-section bytes, raw_bytes for synthesized. */ + const char* name; + u32 name_len; + + const Buf* obj_bytes; /* one of these three is set: */ + const u8* raw_bytes; /* */ + int is_nobits; /* */ +} ElfSec; + +/* ---- emit ---- */ + +static u32 sec_flags_to_elf(u16 flags) { + u64 r = 0; + if (flags & SF_ALLOC) r |= SHF_ALLOC; + if (flags & SF_EXEC) r |= SHF_EXECINSTR; + if (flags & SF_WRITE) r |= SHF_WRITE; + if (flags & SF_TLS) r |= SHF_TLS; + if (flags & SF_MERGE) r |= SHF_MERGE; + if (flags & SF_STRINGS) r |= SHF_STRINGS; + if (flags & SF_GROUP) r |= SHF_GROUP; + if (flags & SF_LINK_ORDER) r |= SHF_LINK_ORDER; + if (flags & SF_RETAIN) r |= SHF_GNU_RETAIN; + return (u32)r; +} + +static u32 sec_sem_to_elf(u16 sem) { + switch (sem) { + case SSEM_PROGBITS: + return SHT_PROGBITS; + case SSEM_NOBITS: + return SHT_NOBITS; + case SSEM_SYMTAB: + return SHT_SYMTAB; + case SSEM_STRTAB: + return SHT_STRTAB; + case SSEM_RELA: + return SHT_RELA; + case SSEM_REL: + return SHT_REL; + case SSEM_NOTE: + return SHT_NOTE; + case SSEM_INIT_ARRAY: + return SHT_INIT_ARRAY; + case SSEM_FINI_ARRAY: + return SHT_FINI_ARRAY; + case SSEM_PREINIT_ARRAY: + return SHT_PREINIT_ARRAY; + case SSEM_GROUP: + return SHT_GROUP; + default: + return SHT_PROGBITS; + } +} + +static u8 sym_bind_to_elf(u16 bind) { + switch (bind) { + case SB_LOCAL: + return STB_LOCAL; + case SB_GLOBAL: + return STB_GLOBAL; + case SB_WEAK: + return STB_WEAK; + default: + return STB_LOCAL; + } +} + +static u8 sym_kind_to_elf(u16 kind) { + switch (kind) { + case SK_UNDEF: + return STT_NOTYPE; + case SK_FUNC: + return STT_FUNC; + case SK_OBJ: + return STT_OBJECT; + case SK_SECTION: + return STT_SECTION; + case SK_FILE: + return STT_FILE; + /* Tentative definitions: real ELF emitters (clang, gcc, GNU as) + * write these as STT_OBJECT with shndx=SHN_COMMON. STT_COMMON is + * a near-extinct convention that llvm-readelf renders as the + * literal type name "COMMON" — emitting it breaks roundtrip + * against any toolchain-produced .o. */ + case SK_COMMON: + return STT_OBJECT; + case SK_TLS: + return STT_TLS; + case SK_ABS: + return STT_NOTYPE; /* SHN_ABS, NOTYPE */ + case SK_NOTYPE: + return STT_NOTYPE; + case SK_IFUNC: + return STT_GNU_IFUNC; + default: + return STT_NOTYPE; + } +} + +static u8 sym_vis_to_elf(u8 vis) { + switch (vis) { + case SV_DEFAULT: + return STV_DEFAULT; + case SV_HIDDEN: + return STV_HIDDEN; + case SV_PROTECTED: + return STV_PROTECTED; + case SV_INTERNAL: + return STV_INTERNAL; + default: + return STV_DEFAULT; + } +} + +static u16 sym_shndx(const ObjSym* s, const u32* obj_to_elf, u32 nsec) { + if (s->kind == SK_COMMON) return (u16)SHN_COMMON; + if (s->kind == SK_ABS) return (u16)SHN_ABS; + /* STT_FILE conventionally carries SHN_ABS as its shndx — its value + * field is not an address. Match clang/binutils. */ + if (s->kind == SK_FILE) return (u16)SHN_ABS; + if (s->section_id == OBJ_SEC_NONE) return (u16)SHN_UNDEF; + if (s->section_id >= nsec) return (u16)SHN_UNDEF; + return (u16)obj_to_elf[s->section_id]; +} + +static const char* sym_to_str(Compiler* c, Sym n, u32* len_out) { + Slice sl = pool_slice(c->global, n); + const char* s = sl.s; + if (!s) { + *len_out = 0; + return ""; + } + *len_out = (u32)sl.len; + return s; +} + +/* Append `len` bytes of `s` followed by a single NUL to `b`, return + * the offset at which `s` was placed. + * + * If `s` already exists at some offset (as a NUL-terminated substring + * starting at any offset), reuse that offset — clang/binutils both + * dedupe trivially identical strings, and matching the convention + * keeps our strtab the same size as theirs. The dedupe is linear in + * the strtab; section + symbol counts are small enough that this is + * fine without a hash. */ +static u32 strtab_add(Buf* b, const char* s, u32 len) { + /* Empty string: always at offset 0 (the leading NUL). */ + if (len == 0) return 0; + + /* Linear search for an existing copy. We must scan chunk-by-chunk + * because Buf is segmented; flatten to a temp scratch buffer first + * if non-empty and search there. For our tiny strtabs, the cost is + * dominated by the writes anyway. */ + u32 total = buf_pos(b); + if (total > len) { + /* Flatten just to search — not optimal but the strtab here is + * always small (low kilobytes at most). */ + u8 stack[256]; + u8* tmp = + total <= sizeof stack ? stack : (u8*)b->heap->alloc(b->heap, total, 1); + if (tmp) { + buf_flatten(b, tmp); + for (u32 i = 0; i + len < total; ++i) { + if (tmp[i + len] == 0 && memcmp(tmp + i, s, len) == 0) { + if (tmp != stack) b->heap->free(b->heap, tmp, total); + return i; + } + } + if (tmp != stack) b->heap->free(b->heap, tmp, total); + } + } + + u32 off = total; + buf_write(b, s, len); + { + u8 z = 0; + buf_write(b, &z, 1); + } + return off; +} + +void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) { + Heap* h = (Heap*)c->ctx->heap; + + /* Run the tombstone sweep before any iteration: cascades removed + * sections into their defining symbols, drops dangling relocs, + * compacts groups, and absorbs the historical UNDEF prune. After this + * call every direct ID-based access below must skip entries whose + * `removed` bit is set. */ + obj_sweep_dead(ob); + + /* ---- target validation ------------------------------------------ */ + const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_ELF); + const ObjElfArchOps* elf = + fmt && fmt->elf_arch ? fmt->elf_arch(c->target.arch) : NULL; + u32 e_machine; + u32 (*reloc_to)(u32); + if (!elf || !elf->reloc_to) { + compiler_panic(c, no_loc(), "emit_elf: unsupported target arch %u", + (u32)c->target.arch); + } + e_machine = elf->e_machine; + reloc_to = elf->reloc_to; + if (c->target.big_endian) { + compiler_panic(c, no_loc(), "emit_elf: big-endian ELF not supported"); + } + if (c->target.ptr_size != 8) { + compiler_panic(c, no_loc(), "emit_elf: ptr_size %u (expected 8)", + (u32)c->target.ptr_size); + } + + /* ---- pass 1: plan ELF section list ------------------------------ */ + + u32 nobjsec = obj_section_count(ob); + + u32 nobjgrp = obj_group_count(ob); + /* Upper bound on ELF section count: + * 1 (SHN_UNDEF) + * + nobjsec - 1 (one ELF entry per real obj section) + * + nobjsec - 1 (worst case: a .rela.<name> per obj section) + * + nobjgrp - 1 (one synthesized SHT_GROUP per ObjGroup) + * + 3 (.symtab, .strtab, .shstrtab) + */ + u32 max_secs = + 1 + (nobjsec - 1) + (nobjsec - 1) + (nobjgrp ? nobjgrp - 1 : 0) + 3; + if (max_secs < 4) max_secs = 4; + ElfSec* secs = arena_array(c->scratch, ElfSec, max_secs); + u32 nsecs = 0; + memset(&secs[nsecs++], 0, sizeof secs[0]); /* index 0 = SHN_UNDEF */ + + /* Map obj section id -> ELF section index. */ + u32* obj_to_elf = arena_zarray(c->scratch, u32, nobjsec); + + for (u32 i = 1; i < nobjsec; ++i) { + const Section* s = obj_section_get(ob, i); + if (s->removed) continue; /* tombstone — see obj_sweep_dead */ + ElfSec* es = &secs[nsecs]; + memset(es, 0, sizeof *es); + u32 nlen; + es->name = sym_to_str(c, s->name, &nlen); + es->name_len = nlen; + /* Honor format-specific overrides preserved by the reader for + * sh_type/sh_flags bits the canonical SecSem/SecFlag enums + * don't model (e.g. SHT_LLVM_ADDRSIG, SHF_EXCLUDE). */ + es->sh_type = (s->ext_kind == OBJ_EXT_ELF && s->ext_type) + ? s->ext_type + : sec_sem_to_elf(s->sem); + es->sh_flags = sec_flags_to_elf(s->flags); + if (s->ext_kind == OBJ_EXT_ELF) es->sh_flags |= s->ext_flags; + es->sh_addr = 0; + es->sh_addralign = s->align ? s->align : 1; + es->sh_entsize = s->entsize; + es->sh_link = 0; + es->sh_info = 0; + if (s->sem == SSEM_NOBITS) { + es->is_nobits = 1; + es->sh_size = s->bss_size; + } else { + es->obj_bytes = &s->bytes; + es->sh_size = s->bytes.total; + } + obj_to_elf[i] = nsecs++; + } + + /* ---- pass 2: build .symtab + .strtab content -------------------- */ + + /* .strtab: leading NUL byte. Then a name per emitted symbol. */ + Buf strtab; + buf_init(&strtab, h); + { + u8 z = 0; + buf_write(&strtab, &z, 1); + } + + /* The .symtab is built into a contiguous arena buffer of fixed-size + * 24-byte records. We don't know the count up front; bound by + * (nobjsec section symbols) + (obj symbol count). */ + u32 nobjsym = 0; + { + ObjSymIter* it = obj_symiter_new(ob); + ObjSymEntry e; + while (obj_symiter_next(it, &e)) ++nobjsym; + obj_symiter_free(it); + } + u32 max_syms = 1 + (nobjsec - 1) + nobjsym; + u8* symtab = (u8*)arena_alloc(c->scratch, (size_t)ELF64_SYM_SIZE * max_syms, + _Alignof(u64)); + u32 nsyms = 0; + memset(&symtab[nsyms * ELF64_SYM_SIZE], 0, ELF64_SYM_SIZE); + nsyms = 1; /* index 0: STN_UNDEF */ + +/* Helper to emit one Elf64_Sym record at index `idx` into symtab. */ +#define WRITE_SYM(idx, st_name, st_info, st_other, st_shndx, st_value, \ + st_size) \ + do { \ + u8* slot = &symtab[(idx) * ELF64_SYM_SIZE]; \ + slot[0] = (u8)((st_name)); \ + slot[1] = (u8)((st_name) >> 8); \ + slot[2] = (u8)((st_name) >> 16); \ + slot[3] = (u8)((st_name) >> 24); \ + slot[4] = (u8)((st_info)); \ + slot[5] = (u8)((st_other)); \ + slot[6] = (u8)((st_shndx)); \ + slot[7] = (u8)((st_shndx) >> 8); \ + for (int _b = 0; _b < 8; ++_b) \ + slot[8 + _b] = (u8)((u64)(st_value) >> (_b * 8)); \ + for (int _b = 0; _b < 8; ++_b) \ + slot[16 + _b] = (u8)((u64)(st_size) >> (_b * 8)); \ + } while (0) + + /* No automatic STT_SECTION synthesis. Section symbols are emitted + * iff they are present in the input ObjBuilder (typically as + * SK_SECTION ObjSyms preserved by read_elf, or added explicitly by + * a hand-built caller that needs to reference a section by sym). + * This matches clang's output: only sections referenced by section + * symbols carry one. */ + + /* Map obj symbol id -> elf symbol index. */ + u32* sym_to_elf = arena_zarray(c->scratch, u32, nobjsym + 2); + + /* Two passes over obj symbols: locals, then globals/weak. */ + for (int pass = 0; pass < 2; ++pass) { + ObjSymIter* it = obj_symiter_new(ob); + ObjSymEntry e; + while (obj_symiter_next(it, &e)) { + const ObjSym* s = e.sym; + if (s->removed) continue; /* spurious-UNDEF prune + explicit removal */ + int is_local = (s->bind == SB_LOCAL); + if ((pass == 0) != is_local) continue; + u32 nlen; + const char* nm = sym_to_str(c, s->name, &nlen); + u32 nameoff = nlen ? strtab_add(&strtab, nm, nlen) : 0; + u8 info = + ELF64_ST_INFO(sym_bind_to_elf(s->bind), sym_kind_to_elf(s->kind)); + u8 other = sym_vis_to_elf(s->vis); + u16 shndx = sym_shndx(s, obj_to_elf, nobjsec); + u64 value = (s->kind == SK_COMMON) ? s->common_align : s->value; + WRITE_SYM(nsyms, nameoff, info, other, shndx, value, s->size); + sym_to_elf[e.id] = nsyms; + nsyms++; + } + obj_symiter_free(it); + } +#undef WRITE_SYM + + /* sh_info on .symtab is the index of the first non-local symbol. + * Locals = 1 (STN_UNDEF) + count of input-side LOCAL obj symbols. */ + u32 nlocals = 1; + { + ObjSymIter* it = obj_symiter_new(ob); + ObjSymEntry e; + while (obj_symiter_next(it, &e)) { + if (e.sym->removed) continue; + if (e.sym->bind == SB_LOCAL) ++nlocals; + } + obj_symiter_free(it); + } + + /* Append .symtab + .strtab + .shstrtab planning records. + * sh_link/sh_info for .symtab and .rela.* are filled in once we know + * each section's elf index. */ + u32 idx_symtab = 0, idx_strtab = 0, idx_shstrtab = 0; + + /* ---- pass 2.5: synthesize SHT_GROUP sections from ObjGroups ---- + * Append one SHT_GROUP section per ObjGroup. The body is a 4-byte LE + * flags word followed by the elf section index of each member. + * Placed before relas so the file layout has data sections, then + * groups, then relas/symtab/strtab — matching clang's ordering and + * keeping data-section offsets independent of group presence. */ + u32* group_elf_idx = + nobjgrp > 1 ? arena_array(c->scratch, u32, nobjgrp) : NULL; + if (group_elf_idx) memset(group_elf_idx, 0, sizeof(u32) * nobjgrp); + for (u32 gi = 1; gi < nobjgrp; ++gi) { + const ObjGroup* g = obj_group_get(ob, gi); + if (!g || g->removed) continue; + + u32 body_size = 4u + 4u * g->nsections; + u8* body = (u8*)arena_alloc(c->scratch, body_size, _Alignof(u32)); + u32 gflags = g->flags ? g->flags : 1u; /* GRP_COMDAT default */ + body[0] = (u8)(gflags); + body[1] = (u8)(gflags >> 8); + body[2] = (u8)(gflags >> 16); + body[3] = (u8)(gflags >> 24); + for (u32 j = 0; j < g->nsections; ++j) { + ObjSecId sid = g->sections[j]; + u32 eidx = (sid && sid < nobjsec) ? obj_to_elf[sid] : 0; + u8* slot = body + 4 + j * 4; + slot[0] = (u8)(eidx); + slot[1] = (u8)(eidx >> 8); + slot[2] = (u8)(eidx >> 16); + slot[3] = (u8)(eidx >> 24); + } + + u32 nlen; + const char* gname = sym_to_str(c, g->name, &nlen); + if (nlen == 0) { + gname = ".group"; + nlen = 6; + } + + ElfSec* es = &secs[nsecs]; + memset(es, 0, sizeof *es); + es->name = gname; + es->name_len = nlen; + es->sh_type = SHT_GROUP; + es->sh_flags = 0; + es->sh_addralign = 4; + es->sh_entsize = 4; + es->sh_info = (g->signature && g->signature < nobjsym + 2) + ? sym_to_elf[g->signature] + : 0; + /* sh_link patched below once idx_symtab is known. */ + es->raw_bytes = body; + es->sh_size = body_size; + group_elf_idx[gi] = nsecs; + nsecs++; + } + + /* ---- pass 3: build .rela.<name> contents ------------------------ */ + + /* Allocate one .rela section per obj section that has any relocs. */ + u32 total_relocs = obj_reloc_total(ob); + + typedef struct RelaPlan { + u32 obj_section; /* obj section the rela applies to */ + u8* bytes; /* arena-allocated rela bytes */ + u32 size; /* bytes count = nrelocs * 24 */ + } RelaPlan; + + RelaPlan* rela_plans = arena_zarray(c->scratch, RelaPlan, nobjsec); + u32 nrela_plans = 0; + + for (u32 si = 1; si < nobjsec; ++si) { + const Section* host = obj_section_get(ob, si); + if (!host || host->removed) continue; + u32 nr = obj_reloc_count(ob, si); + if (!nr) continue; + u8* buf = (u8*)arena_alloc(c->scratch, (size_t)ELF64_RELA_SIZE * nr, + _Alignof(u64)); + u32 j = 0; + for (u32 i = 0; i < total_relocs; ++i) { + const Reloc* r = obj_reloc_at(ob, i); + if (r->removed) continue; + if (r->section_id != si) continue; + u32 etype = reloc_to(r->kind); + if (etype == ELF_R_AARCH64_NONE /* == ELF_R_X86_64_NONE == 0 */ && + r->kind != R_NONE) { + compiler_panic(c, no_loc(), + "emit_elf: unsupported relocation kind %u for arch %u", + (u32)r->kind, (u32)c->target.arch); + } + u32 sym_elf_idx; + if (r->sym == OBJ_SYM_NONE) { + /* Reloc against a section: use the synthesized + * STT_SECTION symbol if the obj reloc carries a + * section_id-equivalent; otherwise 0. */ + sym_elf_idx = 0; + } else { + sym_elf_idx = sym_to_elf[r->sym]; + } + u8* slot = &buf[j * ELF64_RELA_SIZE]; + for (int b = 0; b < 8; ++b) slot[b] = (u8)((u64)r->offset >> (b * 8)); + u64 info = ELF64_R_INFO(sym_elf_idx, etype); + for (int b = 0; b < 8; ++b) slot[8 + b] = (u8)(info >> (b * 8)); + for (int b = 0; b < 8; ++b) + slot[16 + b] = (u8)((u64)r->addend >> (b * 8)); + ++j; + } + rela_plans[nrela_plans].obj_section = si; + rela_plans[nrela_plans].bytes = buf; + rela_plans[nrela_plans].size = nr * ELF64_RELA_SIZE; + nrela_plans++; + } + + /* Append ElfSec entries for each .rela.<name>. Names are ".rela" + + * the obj section name; allocate in scratch. */ + u32* rela_elf_idx = arena_array(c->scratch, u32, nrela_plans + 1); + for (u32 ri = 0; ri < nrela_plans; ++ri) { + u32 si = rela_plans[ri].obj_section; + const Section* s = obj_section_get(ob, si); + u32 base_len; + const char* base = sym_to_str(c, s->name, &base_len); + u32 nlen = 5 + base_len; /* ".rela" + base */ + char* nm = (char*)arena_alloc(c->scratch, nlen + 1, 1); + memcpy(nm, ".rela", 5); + memcpy(nm + 5, base, base_len); + nm[nlen] = 0; + + ElfSec* es = &secs[nsecs]; + memset(es, 0, sizeof *es); + es->name = nm; + es->name_len = nlen; + es->sh_type = SHT_RELA; + es->sh_flags = SHF_INFO_LINK; + es->sh_addralign = 8; + es->sh_entsize = ELF64_RELA_SIZE; + es->sh_info = obj_to_elf[si]; /* section the relas apply to */ + /* sh_link filled below once we know symtab's elf index. */ + es->raw_bytes = rela_plans[ri].bytes; + es->sh_size = rela_plans[ri].size; + rela_elf_idx[ri] = nsecs; + nsecs++; + } + + /* Append .symtab. */ + { + ElfSec* es = &secs[nsecs]; + memset(es, 0, sizeof *es); + es->name = ".symtab"; + es->name_len = 7; + es->sh_type = SHT_SYMTAB; + es->sh_flags = 0; + es->sh_addralign = 8; + es->sh_entsize = ELF64_SYM_SIZE; + es->raw_bytes = symtab; + es->sh_size = (u64)nsyms * ELF64_SYM_SIZE; + es->sh_info = nlocals; /* first non-local symbol */ + idx_symtab = nsecs; + nsecs++; + } + + /* Patch sh_link on each .rela section now that we have idx_symtab. */ + for (u32 ri = 0; ri < nrela_plans; ++ri) { + secs[rela_elf_idx[ri]].sh_link = idx_symtab; + } + /* SHT_GROUP also points its sh_link at .symtab (the symtab the + * signature symbol's index in sh_info refers to). */ + for (u32 gi = 1; gi < nobjgrp; ++gi) { + if (group_elf_idx && group_elf_idx[gi]) { + secs[group_elf_idx[gi]].sh_link = idx_symtab; + } + } + + /* ---- pass 4: append section names to the same strtab and emit it. + * + * clang reuses .strtab for both symbol names and section names — + * e_shstrndx and .symtab.sh_link both point at it. Match that + * convention: continue appending into `strtab` (which already + * contains the symbol names), then emit one STRTAB section. */ + + /* secs[0] (SHN_UNDEF) carries name "" → offset 0. */ + secs[0].sh_name = 0; + for (u32 i = 1; i < nsecs; ++i) { + secs[i].sh_name = strtab_add(&strtab, secs[i].name, secs[i].name_len); + } + + /* Append the .strtab section record itself; its own name lands in + * the same buffer (so the strtab is self-describing). */ + { + const char* nm = ".strtab"; + u32 nlen = 7; + u32 nameoff = strtab_add(&strtab, nm, nlen); + u32 sz = buf_pos(&strtab); + u8* flat = (u8*)arena_alloc(c->scratch, sz, 1); + buf_flatten(&strtab, flat); + buf_fini(&strtab); + + ElfSec* es = &secs[nsecs]; + memset(es, 0, sizeof *es); + es->name = nm; + es->name_len = nlen; + es->sh_name = nameoff; + es->sh_type = SHT_STRTAB; + es->sh_addralign = 1; + es->raw_bytes = flat; + es->sh_size = sz; + idx_strtab = nsecs; + idx_shstrtab = nsecs; /* same section serves both roles */ + nsecs++; + } + secs[idx_symtab].sh_link = idx_strtab; + + /* ---- pass 5: assign file offsets -------------------------------- */ + + u64 cur = ELF64_EHDR_SIZE; + for (u32 i = 1; i < nsecs; ++i) { + ElfSec* es = &secs[i]; + if (es->is_nobits) { + /* sh_offset for NOBITS is conventionally where the next + * non-NOBITS section begins; we set it to cur without + * advancing. */ + es->sh_offset = cur; + continue; + } + u64 a = es->sh_addralign ? es->sh_addralign : 1; + cur = ALIGN_UP(cur, a); + es->sh_offset = cur; + cur += es->sh_size; + } + cur = ALIGN_UP(cur, (u64)8); + u64 e_shoff = cur; + + /* ---- pass 6: write Ehdr ----------------------------------------- */ + + u8 ident[EI_NIDENT] = {0}; + ident[EI_MAG0] = ELFMAG0; + ident[EI_MAG1] = ELFMAG1; + ident[EI_MAG2] = ELFMAG2; + ident[EI_MAG3] = ELFMAG3; + ident[EI_CLASS] = ELFCLASS64; + ident[EI_DATA] = ELFDATA2LSB; + ident[EI_VERSION] = EV_CURRENT; + /* SysV is the canonical OSABI for relocatable AArch64 .o; clang and + * GNU ld both emit it for Linux targets. Linking does not key off + * EI_OSABI for plain AArch64 ELF — it's e_machine that matters. + * + * Exception: GNU extensions (STT_GNU_IFUNC, SHF_GNU_RETAIN, ...) + * require EI_OSABI=ELFOSABI_GNU. Clang sets it for any TU using a + * GNU-flavored marker; we mirror that so roundtrip is byte-stable. */ + ident[EI_OSABI] = ELFOSABI_NONE; + { + ObjSymIter* it = obj_symiter_new(ob); + ObjSymEntry e; + u32 nsec = obj_section_count(ob), si; + while (obj_symiter_next(it, &e)) { + if (e.sym->removed) continue; + if (e.sym->kind == SK_IFUNC) { + ident[EI_OSABI] = ELFOSABI_GNU; + break; + } + } + obj_symiter_free(it); + if (ident[EI_OSABI] != ELFOSABI_GNU) { + for (si = 1; si < nsec; ++si) { + const Section* sec = obj_section_get(ob, si); + if (sec && !sec->removed && (sec->flags & SF_RETAIN)) { + ident[EI_OSABI] = ELFOSABI_GNU; + break; + } + } + } + } + /* e_flags: prefer the value preserved from a prior read (round-trip); + * else synthesize a sensible per-arch default. RV64 cfree targets the + * Linux psABI's lp64d soft-relax convention (RVC + double-float ABI). */ + u32 e_flags; + if (!obj_get_elf_e_flags(ob, &e_flags)) e_flags = elf->e_flags; + + cfree_writer_seek(w, 0); + cfree_writer_write(w, ident, EI_NIDENT); + elf_wr_u16(w, ET_REL); + elf_wr_u16(w, (u16)e_machine); + elf_wr_u32(w, EV_CURRENT); + elf_wr_u64(w, 0); /* e_entry */ + elf_wr_u64(w, 0); /* e_phoff */ + elf_wr_u64(w, e_shoff); /* e_shoff */ + elf_wr_u32(w, e_flags); /* e_flags */ + elf_wr_u16(w, ELF64_EHDR_SIZE); /* e_ehsize */ + elf_wr_u16(w, 0); /* e_phentsize */ + elf_wr_u16(w, 0); /* e_phnum */ + elf_wr_u16(w, ELF64_SHDR_SIZE); /* e_shentsize */ + elf_wr_u16(w, (u16)nsecs); /* e_shnum */ + elf_wr_u16(w, (u16)idx_shstrtab); /* e_shstrndx */ + + /* ---- pass 7: write each section's bytes ------------------------- */ + + for (u32 i = 1; i < nsecs; ++i) { + ElfSec* es = &secs[i]; + if (es->is_nobits || es->sh_size == 0) continue; + cfree_writer_seek(w, es->sh_offset); + if (es->obj_bytes) { + u32 sz = es->obj_bytes->total; + u8* tmp = (u8*)h->alloc(h, sz ? sz : 1, 1); + if (sz) buf_flatten(es->obj_bytes, tmp); + cfree_writer_write(w, tmp, sz); + h->free(h, tmp, sz ? sz : 1); + } else if (es->raw_bytes) { + cfree_writer_write(w, es->raw_bytes, (size_t)es->sh_size); + } + } + + /* ---- pass 8: write section header table ------------------------- */ + + cfree_writer_seek(w, e_shoff); + for (u32 i = 0; i < nsecs; ++i) { + const ElfSec* es = &secs[i]; + elf_wr_u32(w, es->sh_name); + elf_wr_u32(w, es->sh_type); + elf_wr_u64(w, es->sh_flags); + elf_wr_u64(w, es->sh_addr); + elf_wr_u64(w, es->sh_offset); + elf_wr_u64(w, es->sh_size); + elf_wr_u32(w, es->sh_link); + elf_wr_u32(w, es->sh_info); + elf_wr_u64(w, es->sh_addralign); + elf_wr_u64(w, es->sh_entsize); + } +} diff --git a/src/obj/elf/link.c b/src/obj/elf/link.c @@ -0,0 +1,1421 @@ +/* link_emit_elf: write a static ET_EXEC ELF64 image to the + * caller-provided Writer. + * + * 64-bit little-endian only. The per-arch ELF reloc-type tables in + * obj/elf_reloc_<arch>.c handle RelocKind <-> ELF translation; this + * file gets e_machine from the link arch descriptor. + * + * File layout (in write order): + * + * [headers PT_LOAD, PF_R, mapped at IMAGE_BASE] + * Ehdr64 + * Phdr64[nphdr] -- one per loaded segment + headers + + * PT_NOTE .note.gnu.build-id -- 12 + 16 = 28 bytes + * (deterministic 16-byte id) pad to PAGE + * + * [PT_LOAD per kept image segment, in img->segments order] + * segment bytes (padded to its file_offset) + * + * [non-allocatable sections, file-only] + * .symtab -- ELF64_SYM_SIZE * nsyms + * .strtab -- NUL-led blob + * .shstrtab -- NUL-led blob + * + * [section header table at e_shoff] + * Shdr64[nshdr] + * + * Section header schema (for nm / objdump -t / gdb consumption): + * + * 0 SHN_UNDEF (zero entry) + * N one shdr per loaded sub-region: .text/.rodata/.data/.bss as + * the corresponding RX/R/RW segments materialize (.bss split + * out as the trailing memsz>filesz tail of the RW segment). + * 1 .note.gnu.build-id (allocatable, in headers PT_LOAD) + * 1 .symtab (sh_link -> .strtab; sh_info = first non-local idx) + * 1 .strtab + * 1 .shstrtab (Ehdr64.e_shstrndx) + * + * Build-id is computed deterministically over the post-relocation + * segment bytes (FNV-1a 64 over each segment, mixed into a 128-bit + * accumulator). The 16-byte digest is written into the note before the + * note is emitted to the Writer. + * + * The image image-relative addresses on entry are bumped by + * align_up(headers_size, PAGE) before relocs are applied, exactly as + * before — segment bytes / symbol vaddrs land at their final IMAGE_BASE + * absolute addresses by the time relocs run. */ + +#include "link/link.h" + +#include <string.h> + +#include "core/heap.h" +#include "core/pool.h" +#include "core/slice.h" +#include "core/util.h" +#include "core/vec.h" +#include "link/link_arch.h" +#include "link/link_internal.h" +#include "obj/elf/elf.h" +#include "obj/format.h" + +static SrcLoc no_loc(void) { + SrcLoc l = {0, 0, 0}; + return l; +} + +/* ---- ELF64 wire structs (subset) ---- */ + +#define EI_NIDENT 16 + +typedef struct __attribute__((packed)) Ehdr64 { + u8 e_ident[EI_NIDENT]; + u16 e_type; + u16 e_machine; + u32 e_version; + u64 e_entry; + u64 e_phoff; + u64 e_shoff; + u32 e_flags; + u16 e_ehsize; + u16 e_phentsize; + u16 e_phnum; + u16 e_shentsize; + u16 e_shnum; + u16 e_shstrndx; +} Ehdr64; + +typedef struct __attribute__((packed)) Phdr64 { + u32 p_type; + u32 p_flags; + u64 p_offset; + u64 p_vaddr; + u64 p_paddr; + u64 p_filesz; + u64 p_memsz; + u64 p_align; +} Phdr64; + +typedef struct __attribute__((packed)) Shdr64 { + u32 sh_name; + u32 sh_type; + u64 sh_flags; + u64 sh_addr; + u64 sh_offset; + u64 sh_size; + u32 sh_link; + u32 sh_info; + u64 sh_addralign; + u64 sh_entsize; +} Shdr64; + +#define PT_NOTE 4 +#define PT_TLS 7 + +/* Static ET_EXEC base. ET_DYN (PIE) uses 0 — the loader picks the + * runtime base. The active value lives in `img_base` below; the macro + * stays for the static path's hard-coded vaddrs. */ +#define IMAGE_BASE_STATIC 0x400000ULL + +#define BUILD_ID_DESC_LEN 16u +#define NOTE_NAME_GNU "GNU" +#define NOTE_NAME_GNU_LEN 4u /* "GNU\0" */ +#define NOTE_BUILD_ID_TYPE 3u +#define BUILD_ID_NOTE_BYTES (12u + NOTE_NAME_GNU_LEN + BUILD_ID_DESC_LEN) + +/* ---- byte writer helpers ---- */ + +static void write_bytes(Writer* w, const void* data, size_t n) { + w->write(w, data, n); +} + +static void write_zeroes(Writer* w, size_t n) { + static const u8 zeroes[256] = {0}; + while (n) { + size_t step = n > sizeof(zeroes) ? sizeof(zeroes) : n; + w->write(w, zeroes, step); + n -= step; + } +} + +static u32 perms_to_pflags(u32 secflags) { + u32 f = PF_R; + if (secflags & SF_EXEC) f |= PF_X; + if (secflags & SF_WRITE) f |= PF_W; + return f; +} + +/* Scripted-layout post-pass: vaddrs are already final (the script + * pinned them via `. = …`), so only file offsets need to bump to + * leave room for ehdr+phdrs. Mirror of shift_image_addresses but + * touches only the file dimension. */ +static void shift_image_file_offsets(LinkImage* img, u64 delta) { + u32 i; + for (i = 0; i < img->nsegments; ++i) img->segments[i].file_offset += delta; + for (i = 0; i < img->nsections; ++i) img->sections[i].file_offset += delta; + for (i = 0; i < LinkRelocs_count(&img->relocs); ++i) + LinkRelocs_at(&img->relocs, i)->write_file_offset += delta; +} + +static void shift_image_addresses(LinkImage* img, u64 delta) { + u32 i; + for (i = 0; i < img->nsegments; ++i) { + img->segments[i].file_offset += delta; + img->segments[i].vaddr += delta; + } + for (i = 0; i < img->nsections; ++i) { + img->sections[i].file_offset += delta; + img->sections[i].vaddr += delta; + } + for (i = 0; i < LinkRelocs_count(&img->relocs); ++i) { + LinkRelocs_at(&img->relocs, i)->write_file_offset += delta; + LinkRelocs_at(&img->relocs, i)->write_vaddr += delta; + } + for (i = 0; i < LinkSyms_count(&img->syms); ++i) { + LinkSymbol* s = LinkSyms_at(&img->syms, i); + if (s->kind == SK_ABS) continue; + if (!s->defined) continue; + s->vaddr += delta; + } + /* tls_vaddr lives in the same image-relative coordinate system as + * the segments it tracks, so it bumps with them. */ + if (img->tls_memsz) img->tls_vaddr += delta; + /* Dyn-link state mirrors a few segment / section vaddrs and pre- + * populated DynRela.r_offset values from layout_dyn. Bump them so + * the post-shift .rela.plt / .dynamic emit and apply_all_relocs see + * the right addresses (sym_plt_vaddr is read to redirect CALL26 + * against imports). */ + if (img->dyn) { + LinkDynState* dyn = img->dyn; + if (dyn->plt_vaddr) dyn->plt_vaddr += delta; + if (dyn->got_plt_vaddr) dyn->got_plt_vaddr += delta; + if (dyn->dynamic_vaddr) dyn->dynamic_vaddr += delta; + if (dyn->sym_plt_vaddr) { + u32 j; + for (j = 0; j < dyn->sym_dynidx_size; ++j) + if (dyn->sym_plt_vaddr[j]) dyn->sym_plt_vaddr[j] += delta; + } + if (dyn->rela_plt) { + u32 j; + for (j = 0; j < dyn->nrela_plt; ++j) dyn->rela_plt[j].r_offset += delta; + } + /* rela_dyn is populated by apply_all_relocs (which runs after this + * shift), so its records are already in post-shift coordinates. */ + } +} + +/* AArch64 ELF ABI: the per-thread TLS block starts at TP + 16 bytes + * (the TCB sits ahead of the TLS image). RISC-V psABI normally points + * tp at the start of the TLS image; the cfree harness's start.c + * places a 16-byte TCB ahead of .tdata and biases tp accordingly, so + * the TPREL offset for both arches is (target - tls_vaddr) + 16. */ +#define TLS_TCB_SIZE 16ull + +static int reloc_is_tlsle(RelocKind k) { + return k == R_AARCH64_TLSLE_ADD_TPREL_HI12 || + k == R_AARCH64_TLSLE_ADD_TPREL_LO12_NC || k == R_RV_TPREL_HI20 || + k == R_RV_TPREL_LO12_I || k == R_RV_TPREL_LO12_S; +} + +/* x86_64 SysV ABI: TLS variant II — the per-thread TLS image sits at + * *negative* offsets from %fs (which points at the TCB). start.c + * lays out [tdata | tbss | TCB] and arch_prctl(ARCH_SET_FS, &TCB), so + * a symbol at offset X within the TLS image is at fs-relative offset + * (X - tls_memsz). The two ELF reloc kinds R_X86_64_TPOFF32/_TPOFF64 + * encode that signed offset directly at the reloc site (no TCB bias — + * variant II's TCB sits *after* the image, so TPOFF is negative). */ +static int reloc_is_x64_tlsle(RelocKind k) { + return k == R_X64_TPOFF32 || k == R_X64_TPOFF64; +} + +static int reloc_is_abs(RelocKind k) { return k == R_ABS32 || k == R_ABS64; } + +/* Function-call relocs that may route through the PLT when the target + * is imported. aarch64 CALL26/JUMP26, x86_64 PLT32, and risc-v CALL_PLT + * (which cfree maps to R_PLT32) all carry the "call this address; if + * it's not resolvable here use the PLT trampoline" contract; the apply + * pass overwrites S with the PLT entry vaddr in that case. */ +static int reloc_is_branch26(RelocKind k) { + return k == R_AARCH64_CALL26 || k == R_AARCH64_JUMP26 || k == R_X64_PLT32 || + k == R_PLT32 || k == R_RV_CALL; +} + +static void emit_dyn_record(LinkImage* img, u64 site_vaddr, u32 reloc_type, + u32 dynidx, i64 addend) { + LinkDynState* dyn = img->dyn; + if (!dyn || !dyn->rela_dyn) return; + if (dyn->nrela_dyn >= dyn->cap_rela_dyn) { + compiler_panic(img->c, no_loc(), + "link: too many .rela.dyn records (%u >= %u); raise " + "cap_rela_dyn in layout_dyn", + dyn->nrela_dyn, dyn->cap_rela_dyn); + } + DynRela* r = &dyn->rela_dyn[dyn->nrela_dyn++]; + r->r_offset = site_vaddr; + r->r_info = ELF64_R_INFO((u64)dynidx, reloc_type); + r->r_addend = addend; +} + +static const ObjElfArchOps* elf_arch_or_panic(Compiler* c, const char* where) { + const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_ELF); + const ObjElfArchOps* arch = + fmt && fmt->elf_arch ? fmt->elf_arch(c->target.arch) : NULL; + if (!arch) + compiler_panic(c, no_loc(), "%.*s: no ELF arch descriptor", + SLICE_ARG(slice_from_cstr(where))); + return arch; +} + +static void emit_relative_record(LinkImage* img, u64 site_vaddr, u64 addend) { + const ObjElfArchOps* arch = elf_arch_or_panic(img->c, "link"); + emit_dyn_record(img, site_vaddr, arch->r_relative, 0, (i64)addend); +} + +static void emit_globdat_record(LinkImage* img, u64 site_vaddr, u32 dynidx, + i64 addend) { + const ObjElfArchOps* arch = elf_arch_or_panic(img->c, "link"); + emit_dyn_record(img, site_vaddr, arch->r_glob_dat, dynidx, addend); +} + +/* RISC-V PCREL_LO12_* references the address of an AUIPC carrying the + * paired PCREL_HI20. Given the AUIPC's site vaddr (post-shift), find + * its PCREL_HI20 reloc and compute the displacement that AUIPC + * encoded — the LO12 then takes the low 12 bits of the same disp. + * + * Linear scan over img->relocs is fine in practice: kernel images and + * cg cases produce at most a few hundred relocs total. */ +static i64 rv_pcrel_lo12_disp(LinkImage* img, u64 auipc_vaddr, u64 img_base) { + u32 i; + for (i = 0; i < LinkRelocs_count(&img->relocs); ++i) { + const LinkRelocApply* hi = LinkRelocs_at(&img->relocs, i); + const LinkSymbol* hi_tgt; + u64 hi_S, hi_P; + if (hi->kind != R_RV_PCREL_HI20 && hi->kind != R_RV_GOT_HI20) continue; + if (hi->write_vaddr + img_base != auipc_vaddr) continue; + hi_tgt = LinkSyms_at(&img->syms, hi->target - 1); + hi_S = (hi_tgt->kind == SK_ABS) ? hi_tgt->vaddr : hi_tgt->vaddr + img_base; + hi_P = hi->write_vaddr + img_base; + return (i64)hi_S + hi->addend - (i64)hi_P; + } + compiler_panic(img->c, no_loc(), + "link: PCREL_LO12 at 0x%llx has no paired PCREL_HI20", + (unsigned long long)auipc_vaddr); + return 0; +} + +static void apply_all_relocs(LinkImage* img, u64 img_base) { + u32 i; + int pie = img->pie; + for (i = 0; i < LinkRelocs_count(&img->relocs); ++i) { + LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); + const LinkSymbol* tgt = LinkSyms_at(&img->syms, r->target - 1); + const LinkSection* sec = &img->sections[r->link_section_id - 1]; + const LinkSegment* seg = &img->segments[sec->segment_id - 1]; + u64 S, P; + u8* P_bytes; + if (reloc_is_tlsle(r->kind)) { + /* S is the target's TP-relative offset: distance from the + * TLS image start plus the 16-byte TCB. Both vaddrs are + * in the same (post-shift, image-relative) coordinate + * system, so img_base cancels out. */ + S = (tgt->vaddr - img->tls_vaddr) + TLS_TCB_SIZE; + } else if (reloc_is_x64_tlsle(r->kind)) { + /* x86_64 variant II: TP points just past the TLS image, so a + * symbol at offset X within the image is at TP-relative offset + * (X - tls_memsz). Cast through i64/u64 so the reloc apply + * writes the full 32- or 64-bit signed value. */ + i64 off = (i64)(tgt->vaddr - img->tls_vaddr) - (i64)img->tls_memsz; + S = (u64)off; + } else if (r->kind == R_RV_PCREL_LO12_I || r->kind == R_RV_PCREL_LO12_S) { + /* PCREL_LO12: rewrite S so that link_reloc_apply's existing + * LO12_I/LO12_S encoder produces the right low 12 bits of the + * paired AUIPC's PC-relative displacement. The reloc's own + * addend is unused; signed lo12 = disp & 0xfff. */ + P = r->write_vaddr + img_base; + P_bytes = img->segment_bytes[seg->id - 1] + + (size_t)(r->write_file_offset - seg->file_offset); + { + i64 disp = rv_pcrel_lo12_disp(img, tgt->vaddr + img_base, img_base); + RelocKind alias = + (r->kind == R_RV_PCREL_LO12_I) ? R_RV_LO12_I : R_RV_LO12_S; + link_reloc_apply(img->c, alias, P_bytes, (u64)disp, 0, P); + } + continue; + } else { + S = tgt->vaddr + img_base; + if (tgt->kind == SK_ABS) S = tgt->vaddr; + } + P = r->write_vaddr + img_base; + P_bytes = img->segment_bytes[seg->id - 1] + + (size_t)(r->write_file_offset - seg->file_offset); + + /* Imported target: redirect / rewrite per reloc kind (Phase 5). + * + * - CALL26 / JUMP26: target the import's PLT entry. The PLT stub + * reads .got.plt[3+i], which the loader pre-fills via JUMP_SLOT + * (.rela.plt). S becomes the PLT-entry vaddr; the existing + * apply path computes the disp from there. + * - R_ABS{32,64}: leave the patch site at zero and emit a + * GLOB_DAT record so the loader writes the resolved address + * into the site at load time. This covers both + * layout_got-emitted .got slot fills (target = import) and any + * direct absolute reference in user data (e.g. a function + * pointer initializer). + * - GOT-page / LO12-NC against an import: emit_reloc_records has + * already redirected the target from the import to the + * synthetic .got slot symbol, so the apply path here sees the + * slot, not the import — nothing special needed; the slot's + * own R_ABS64 fill against the (vaddr=0) import will trip the + * abs-import branch above and emit GLOB_DAT. + * + * Anything else against an imported symbol (e.g. PREL19 / ADR + * etc.) is rare in real binaries and would need its own + * dynamic-reloc kind; panic loudly so a future test that needs + * it announces itself. */ + if (tgt->imported) { + /* `tgt` may be a per-input shadow LinkSymbol — resolve_undefs + * stamps `imported = 1` on every undef matched by name, but + * collect_imports only stashes plt_vaddr / dynidx on the + * canonical entry registered in img->globals. Resolve to the + * canonical id before indexing the dyn-state arrays. */ + LinkSymId canon_id = tgt->id; + if (tgt->name != 0) { + LinkSymId hit = symhash_get(&img->globals, tgt->name); + if (hit != LINK_SYM_NONE) canon_id = hit; + } + u32 dynidx = (img->dyn && canon_id < img->dyn->sym_dynidx_size) + ? img->dyn->sym_dynidx[canon_id] + : 0u; + if (reloc_is_branch26(r->kind)) { + u64 plt_v = (img->dyn && canon_id < img->dyn->sym_dynidx_size) + ? img->dyn->sym_plt_vaddr[canon_id] + : 0u; + if (plt_v == 0) + compiler_panic(img->c, no_loc(), + "link: imported sym has no PLT entry (CALL26)"); + S = plt_v + img_base; + link_reloc_apply(img->c, r->kind, P_bytes, S, r->addend, P); + continue; + } + if (reloc_is_abs(r->kind)) { + if (dynidx == 0) + compiler_panic(img->c, no_loc(), + "link: imported sym has no .dynsym entry"); + emit_globdat_record(img, r->write_vaddr, dynidx, r->addend); + /* Site bytes are irrelevant: the loader's GLOB_DAT writes + * (sym_value + r_addend) into r_offset before user code runs, + * overwriting whatever's there. Leaving the existing zero + * fill saves a write. */ + continue; + } + { + Slice nm_s = + tgt->name ? pool_slice(img->c->global, tgt->name) : SLICE_NULL; + const char* nm = nm_s.s ? nm_s.s : ""; + size_t nl = nm_s.len; + compiler_panic( + img->c, no_loc(), + "link: unhandled reloc kind %u against imported symbol '%.*s'", + (unsigned)r->kind, (int)nl, nm); + } + } + + /* PIE: an absolute reloc against a defined non-imported symbol + * stays image-relative in the file (the loader adds load-base via + * a synthesized R_AARCH64_RELATIVE). img_base is 0 for PIE so + * S above is already image-relative — the apply writes that into + * the site, and the RELATIVE record tells the loader to add + * load_base on top. */ + if (pie && reloc_is_abs(r->kind) && tgt->defined && tgt->kind != SK_ABS) { + emit_relative_record(img, r->write_vaddr, tgt->vaddr); + } + link_reloc_apply(img->c, r->kind, P_bytes, S, r->addend, P); + } +} + +/* The build-id payload is a format-agnostic image identity hash — + * see link_image_id_compute in link_image_id.c. Mach-O wraps the + * same bytes in LC_UUID; ELF wraps them in a .note.gnu.build-id. */ + +/* ---- string-table builder ---- */ + +typedef struct StrBuilder { + Heap* heap; + u8* data; + u32 len; + u32 cap; +} StrBuilder; + +static void strb_init(StrBuilder* s, Heap* h, u32 reserve) { + s->heap = h; + s->cap = reserve > 16u ? reserve : 16u; + s->data = (u8*)h->alloc(h, s->cap, 1); + if (!s->data) s->cap = 0; + s->len = 0; + if (s->cap) { + s->data[0] = 0; + s->len = 1; + } /* leading NUL */ +} + +static void strb_fini(StrBuilder* s) { + if (s->data) s->heap->free(s->heap, s->data, s->cap); + s->data = NULL; + s->cap = s->len = 0; +} + +static void strb_grow(StrBuilder* s, u32 need) { + (void)VEC_GROW(s->heap, s->data, s->cap, need); +} + +static u32 strb_add(StrBuilder* s, const char* str, u32 slen) { + u32 off; + if (slen == 0) return 0; + /* Linear dedup: scan existing data for a matching NUL-terminated + * substring. Strtabs are small enough to make this acceptable. */ + if (s->len > slen) { + u32 i; + for (i = 0; i + slen < s->len; ++i) { + if (s->data[i + slen] == 0 && memcmp(s->data + i, str, slen) == 0) + return i; + } + } + off = s->len; + strb_grow(s, s->len + slen + 1u); + memcpy(s->data + s->len, str, slen); + s->data[s->len + slen] = 0; + s->len += slen + 1u; + return off; +} + +static u32 strb_add_cstr(StrBuilder* s, const char* str) { + return strb_add(s, str, (u32)slice_from_cstr(str).len); +} + +/* ---- symtab builder ---- */ + +typedef struct SymRec { + u32 st_name; + u8 st_info; + u8 st_other; + u16 st_shndx; + u64 st_value; + u64 st_size; +} SymRec; + +static u8 sym_kind_to_st_type(u8 kind) { + switch (kind) { + case SK_FUNC: + return STT_FUNC; + case SK_OBJ: + return STT_OBJECT; + case SK_SECTION: + return STT_SECTION; + case SK_FILE: + return STT_FILE; + case SK_TLS: + return STT_TLS; + case SK_IFUNC: + return STT_GNU_IFUNC; + case SK_NOTYPE: + case SK_ABS: + case SK_UNDEF: + default: + return STT_NOTYPE; + } +} + +static u8 sym_bind_to_st_bind(u8 bind) { + switch (bind) { + case SB_GLOBAL: + return STB_GLOBAL; + case SB_WEAK: + return STB_WEAK; + case SB_LOCAL: + default: + return STB_LOCAL; + } +} + +/* Produces one Elf64_Sym record on the wire from a SymRec. */ +static void write_sym_rec(Writer* w, const SymRec* r) { + u8 buf[ELF64_SYM_SIZE]; + buf[0] = (u8)(r->st_name); + buf[1] = (u8)(r->st_name >> 8); + buf[2] = (u8)(r->st_name >> 16); + buf[3] = (u8)(r->st_name >> 24); + buf[4] = r->st_info; + buf[5] = r->st_other; + buf[6] = (u8)(r->st_shndx); + buf[7] = (u8)(r->st_shndx >> 8); + { + u32 i; + for (i = 0; i < 8; ++i) buf[8 + i] = (u8)(r->st_value >> (i * 8)); + for (i = 0; i < 8; ++i) buf[16 + i] = (u8)(r->st_size >> (i * 8)); + } + write_bytes(w, buf, sizeof buf); +} + +/* ---- section header layout ---- * + * + * Per-segment cuts: each kept image segment contributes 1 .text/.rodata + * shdr for its file portion, plus a separate .bss shdr for the trailing + * NOBITS portion of an RW segment (memsz > filesz). The headers PT_LOAD + * contributes a single .note.gnu.build-id shdr. Trailing non-alloc + * shdrs: .symtab .strtab .shstrtab (always 3). */ + +typedef struct OutShdr { + u32 shdr_idx; /* 1-based; assigned during planning */ + LinkSegmentId segment_id; + Sym name; + u16 sem; /* SecSem from source LinkSection */ + u32 flags; /* SF_* from source LinkSection */ + u32 align; + u64 vaddr; + u64 file_offset; + u64 size; + int is_nobits; +} OutShdr; + +static u16 sym_shndx_for(const LinkSymbol* s, const OutShdr* outshdrs, + u32 noutshdr) { + if (!s->defined) return SHN_UNDEF; + if (s->kind == SK_ABS) return SHN_ABS; + if (s->kind == SK_FILE) return SHN_ABS; + if (s->kind == SK_COMMON) return SHN_COMMON; + /* Find an output shdr whose [vaddr, vaddr+size) covers s->vaddr. + * Boundary symbols match at the upper edge. */ + { + u32 i; + for (i = 0; i < noutshdr; ++i) { + u64 lo = outshdrs[i].vaddr; + u64 hi = lo + outshdrs[i].size; + if (s->vaddr >= lo && s->vaddr <= hi) return (u16)outshdrs[i].shdr_idx; + } + } + return SHN_ABS; +} + +static u32 sec_sem_to_sht(u16 sem) { + switch (sem) { + case SSEM_PROGBITS: + return SHT_PROGBITS; + case SSEM_NOBITS: + return SHT_NOBITS; + case SSEM_NOTE: + return SHT_NOTE; + case SSEM_INIT_ARRAY: + return SHT_INIT_ARRAY; + case SSEM_FINI_ARRAY: + return SHT_FINI_ARRAY; + case SSEM_PREINIT_ARRAY: + return SHT_PREINIT_ARRAY; + default: + return SHT_PROGBITS; + } +} + +static u64 sec_flags_to_shf(u32 flags) { + u64 r = 0; + if (flags & SF_ALLOC) r |= SHF_ALLOC; + if (flags & SF_EXEC) r |= SHF_EXECINSTR; + if (flags & SF_WRITE) r |= SHF_WRITE; + if (flags & SF_TLS) r |= SHF_TLS; + if (flags & SF_MERGE) r |= SHF_MERGE; + if (flags & SF_STRINGS) r |= SHF_STRINGS; + if (flags & SF_LINK_ORDER) r |= SHF_LINK_ORDER; + if (flags & SF_RETAIN) r |= SHF_GNU_RETAIN; + return r; +} + +void link_emit_elf(LinkImage* img, Writer* w) { + Heap* heap = img->heap; + Compiler* c = img->c; + const ObjElfArchOps* arch = elf_arch_or_panic(c, "link_emit_elf"); + u32 e_machine = arch->e_machine; + if (img->entry_sym == LINK_SYM_NONE) + compiler_panic(c, no_loc(), "link_emit_elf: no resolved entry symbol"); + /* IFUNC trampolines: layout_iplt builds the .iplt stubs + .igot.plt + * slots and (when emit_static_exe was set) synthesizes a + * .init_array entry that calls __cfree_ifunc_init at startup. The + * rt member walks .iplt.pairs and fills each slot before user code + * runs. The ELF writer doesn't have to do anything special here. */ + + /* PIE / ET_DYN: img_base is 0 (the loader picks the runtime base; + * absolute relocs against internal symbols are emitted as + * R_AARCH64_RELATIVE in .rela.dyn). Otherwise classic ET_EXEC at + * IMAGE_BASE_STATIC. + * + * Scripted: the linker script pinned absolute vaddrs (e.g. + * `. = 0x40080000`); img_base stays 0 and the headers PT_LOAD / + * build-id note are dropped — the script's image is consumed by a + * raw loader (qemu -kernel, a bootloader) that doesn't need a + * self-describing memory image. */ + int pie = img->pie; + int scripted = img->scripted; + u64 img_base = (pie || scripted) ? 0ULL : IMAGE_BASE_STATIC; + + /* ---- plan number of program headers ---- + * + * 1 headers PT_LOAD + nsegments PT_LOAD + 1 PT_NOTE (build-id) + * + 1 PT_TLS when this image carries any TLS sections. + * + 4 dyn phdrs (PT_PHDR / PT_INTERP / PT_DYNAMIC / PT_GNU_STACK) on PIE. + * + * Scripted images skip the headers PT_LOAD and PT_NOTE: phdrs are + * just the per-segment PT_LOADs. */ + u32 has_tls = img->tls_memsz ? 1u : 0u; + u32 nphdr_extra_dyn = pie ? 4u : 0u; + u32 nphdr_headers = scripted ? 0u : 1u; + u32 nphdr_buildid = scripted ? 0u : 1u; + u32 nphdr_total = nphdr_headers + img->nsegments + nphdr_buildid + has_tls + + nphdr_extra_dyn; + u64 build_id_note_bytes = scripted ? 0ULL : BUILD_ID_NOTE_BYTES; + u64 headers_size = + sizeof(Ehdr64) + (u64)nphdr_total * sizeof(Phdr64) + build_id_note_bytes; + u64 headers_load = ALIGN_UP(headers_size, (u64)PAGE_SIZE); + + /* The build-id note lives inside the headers PT_LOAD at this offset. */ + u64 build_id_off = sizeof(Ehdr64) + (u64)nphdr_total * sizeof(Phdr64); + u64 build_id_addr = img_base + build_id_off; + + /* ---- shift image addresses, apply relocations ---- + * + * Must happen before segshdrs/symtab construction so they observe + * post-shift vaddrs (the values that will land in the file). */ + if (scripted) + shift_image_file_offsets(img, headers_load); + else + shift_image_addresses(img, headers_load); + apply_all_relocs(img, img_base); + + /* ---- write .dynamic body + re-serialize .rela.dyn (PIE only) ---- + * + * Both depend on post-shift vaddrs. .dynamic embeds image-relative + * pointers to .dynsym/.dynstr/.gnu.hash/.rela.dyn/.rela.plt/.got.plt + * (the loader adds load_base at runtime). .rela.dyn picked up + * RELATIVE records during apply_all_relocs; rewrite the section + * bytes to include them. */ + if (pie && img->dyn) { + LinkDynState* dyn = img->dyn; + const LinkSection* sec_dynamic = &img->sections[dyn->sec_dynamic - 1]; + const LinkSection* sec_dynsym = &img->sections[dyn->sec_dynsym - 1]; + const LinkSection* sec_dynstr = &img->sections[dyn->sec_dynstr - 1]; + const LinkSection* sec_gnuhash = &img->sections[dyn->sec_gnu_hash - 1]; + const LinkSection* sec_reladyn = &img->sections[dyn->sec_rela_dyn - 1]; + const LinkSection* sec_relaplt = (dyn->sec_rela_plt != LINK_SEC_NONE) + ? &img->sections[dyn->sec_rela_plt - 1] + : NULL; + const LinkSection* sec_gotplt = (dyn->sec_got_plt != LINK_SEC_NONE) + ? &img->sections[dyn->sec_got_plt - 1] + : NULL; + const LinkSegment* dseg = &img->segments[sec_dynamic->segment_id - 1]; + u8* dyn_bytes_at = img->segment_bytes[dseg->id - 1] + + (size_t)(sec_dynamic->file_offset - dseg->file_offset); + + /* Build DT_* entries in order. Layout matches count_dynamic_entries. */ + u32 written = 0; + u8* p = dyn_bytes_at; +#define DT_PUT(TAG, VAL) \ + do { \ + wr_u64_le(p, (u64)(TAG)); \ + wr_u64_le(p + 8, (u64)(VAL)); \ + p += 16; \ + written++; \ + } while (0) + + /* DT_NEEDED entries — d_un.d_val is the offset of the soname + * within .dynstr. The dynstr was built in layout_dyn with + * dedup; look each soname up by name to compute its offset. */ + { + u32 ni; + for (ni = 0; ni < dyn->nneeded; ++ni) { + Sym soname = dyn->needed[ni]; + Slice nm_s = pool_slice(c->global, soname); + const char* nm = nm_s.s; + size_t namelen = nm_s.len; + /* Linear search dynstr for this name. */ + u32 off = 0; + if (nm && namelen) { + u32 si; + for (si = 0; si + namelen < dyn->dynstr_len; ++si) { + if (dyn->dynstr[si + namelen] == 0 && + memcmp(dyn->dynstr + si, nm, namelen) == 0) { + off = si; + break; + } + } + /* Should always be present — collect_needed populated dynstr + * via build_dynsym? Actually build_dynsym only added import + * names. We need to also add NEEDED sonames. */ + if (off == 0) { + /* Fallback: append to dynstr. Phase 4 layout_dyn pre-sized + * .dynstr exactly to its current content; appending here + * would overflow the section. Instead, panic with a clear + * message — the soname was supposed to be added during + * layout. */ + compiler_panic(c, no_loc(), + "link_emit_elf: DT_NEEDED soname missing from " + ".dynstr"); + } + } + DT_PUT(DT_NEEDED, off); + } + } + + DT_PUT(DT_STRTAB, img_base + sec_dynstr->vaddr); + DT_PUT(DT_STRSZ, sec_dynstr->size); + DT_PUT(DT_SYMTAB, img_base + sec_dynsym->vaddr); + DT_PUT(DT_SYMENT, 24); + DT_PUT(DT_GNU_HASH, img_base + sec_gnuhash->vaddr); + /* DT_PLT* / DT_JMPREL only make sense when there's a PLT. Emitting + * them with size=0 / vaddr=0 (or pointing past the end of any + * PT_LOAD) trips llvm-readelf's "address not in any segment" check + * and confuses some loaders' DT walk. */ + if (dyn->nrela_plt) { + DT_PUT(DT_PLTGOT, sec_gotplt ? (img_base + sec_gotplt->vaddr) : 0); + DT_PUT(DT_PLTRELSZ, sec_relaplt ? sec_relaplt->size : 0); + DT_PUT(DT_PLTREL, DT_RELA); + DT_PUT(DT_JMPREL, sec_relaplt ? (img_base + sec_relaplt->vaddr) : 0); + } + if (dyn->cap_rela_dyn) { + DT_PUT(DT_RELA, img_base + sec_reladyn->vaddr); + DT_PUT(DT_RELASZ, sec_reladyn->size); + DT_PUT(DT_RELAENT, 24); + } + DT_PUT(DT_FLAGS_1, DF_1_NOW); + DT_PUT(DT_NULL, 0); +#undef DT_PUT + + /* Pad any pre-allocated tail with DT_NULL. */ + while (written < dyn->ndyn_entries) { + wr_u64_le(p, 0); + wr_u64_le(p + 8, 0); + p += 16; + written++; + } + + /* Re-serialize .rela.dyn body. GLOB_DAT records (imports against + * .got slots) and RELATIVE records (PIE internal abs64 fixups) + * are both populated during apply_all_relocs; .rela.dyn was empty + * coming out of layout_dyn. Trailing capacity stays zero — + * readers stop at the first R_AARCH64_NONE record. */ + { + const LinkSegment* rdseg = &img->segments[sec_reladyn->segment_id - 1]; + u8* rd_bytes = img->segment_bytes[rdseg->id - 1] + + (size_t)(sec_reladyn->file_offset - rdseg->file_offset); + u32 i; + for (i = 0; i < dyn->nrela_dyn; ++i) { + const DynRela* rr = &dyn->rela_dyn[i]; + u8* rp = rd_bytes + (u64)i * ELF64_RELA_SIZE; + wr_u64_le(rp + 0, rr->r_offset); + wr_u64_le(rp + 8, rr->r_info); + wr_u64_le(rp + 16, (u64)rr->r_addend); + } + } + + /* Re-serialize .rela.plt body. JUMP_SLOT records were written by + * layout_dyn at pre-shift vaddrs; shift_image_addresses bumped + * dyn->rela_plt[i].r_offset along with the rest, so the post-shift + * values match the .got.plt slot vaddrs the loader will patch. */ + if (sec_relaplt && dyn->nrela_plt) { + const LinkSegment* rpseg = &img->segments[sec_relaplt->segment_id - 1]; + u8* rp_bytes = img->segment_bytes[rpseg->id - 1] + + (size_t)(sec_relaplt->file_offset - rpseg->file_offset); + u32 i; + for (i = 0; i < dyn->nrela_plt; ++i) { + const DynRela* rr = &dyn->rela_plt[i]; + u8* rp = rp_bytes + (u64)i * ELF64_RELA_SIZE; + wr_u64_le(rp + 0, rr->r_offset); + wr_u64_le(rp + 8, rr->r_info); + wr_u64_le(rp + 16, (u64)rr->r_addend); + } + } + + /* Re-write .got.plt[0] = &.dynamic with the post-shift vaddr. + * layout_dyn wrote the pre-shift value into the segment bytes; + * shift_image_addresses bumped dyn->dynamic_vaddr so we can refill + * the slot here. Slots 1 and 2 (link_map cookie, + * _dl_runtime_resolve) are loader-owned for lazy binding; under + * DF_1_NOW they're never read so leaving them zero is fine. */ + if (sec_gotplt && dyn->dynamic_vaddr) { + const LinkSegment* gpseg = &img->segments[sec_gotplt->segment_id - 1]; + u8* gp_bytes = img->segment_bytes[gpseg->id - 1] + + (size_t)(sec_gotplt->file_offset - gpseg->file_offset); + wr_u64_le(gp_bytes, dyn->dynamic_vaddr); + } + } + + /* ---- compute build-id (post-reloc, deterministic) ---- + * + * Format-agnostic — Mach-O LC_UUID will hash the same bytes. */ + u8 build_id[BUILD_ID_DESC_LEN]; + link_image_id_compute(img, build_id); + + /* ---- plan section headers covering loaded segments ---- + * + * Worst case: 1 file shdr per segment + 1 .bss shdr if RW has a tail. + * shdr indices: 0=NULL, 1..nsegshdr=these, then build-id/symtab/... + */ + /* Walk img->sections sorted by (segment_id, vaddr) and merge into + * one OutShdr per (segment_id, name) run. layout already places + * same-name sections adjacent within a segment, so a stable + * by-vaddr sort followed by run-length grouping captures it. */ + OutShdr* outshdrs; + u32 noutshdr = 0; + u32 outshdr_cap = img->nsections + 1u; + outshdrs = (OutShdr*)heap->alloc(heap, sizeof(*outshdrs) * outshdr_cap, + _Alignof(OutShdr)); + if (!outshdrs) compiler_panic(c, no_loc(), "link_emit_elf: oom on outshdrs"); + memset(outshdrs, 0, sizeof(*outshdrs) * outshdr_cap); + { + /* Build a sort index over LinkSection ids by (segment_id, vaddr). */ + u32* order = (u32*)heap->alloc(heap, sizeof(u32) * (img->nsections + 1u), + _Alignof(u32)); + if (!order && img->nsections) + compiler_panic(c, no_loc(), "link_emit_elf: oom on shdr sort"); + u32 i, j; + for (i = 0; i < img->nsections; ++i) order[i] = i; + /* Insertion sort — section count is small. */ + for (i = 1; i < img->nsections; ++i) { + u32 cur = order[i]; + const LinkSection* a = &img->sections[cur]; + j = i; + while (j > 0) { + const LinkSection* b = &img->sections[order[j - 1]]; + if ((b->segment_id < a->segment_id) || + (b->segment_id == a->segment_id && b->vaddr <= a->vaddr)) + break; + order[j] = order[j - 1]; + --j; + } + order[j] = cur; + } + for (i = 0; i < img->nsections; ++i) { + const LinkSection* ls = &img->sections[order[i]]; + OutShdr* tail = noutshdr ? &outshdrs[noutshdr - 1] : NULL; + int merge = tail && tail->segment_id == ls->segment_id && + tail->name == ls->name && + tail->is_nobits == (ls->sem == SSEM_NOBITS); + if (merge) { + u64 end = ls->vaddr + ls->size; + u64 prev_end = tail->vaddr + tail->size; + if (end > prev_end) tail->size = end - tail->vaddr; + if (ls->align > tail->align) tail->align = ls->align; + } else { + OutShdr* o = &outshdrs[noutshdr]; + o->shdr_idx = 1u + noutshdr; + o->segment_id = ls->segment_id; + o->name = ls->name; + o->sem = ls->sem; + o->flags = ls->flags; + o->align = ls->align; + o->vaddr = ls->vaddr; + o->file_offset = ls->file_offset; + o->size = ls->size; + o->is_nobits = (ls->sem == SSEM_NOBITS); + noutshdr++; + } + } + heap->free(heap, order, sizeof(u32) * (img->nsections + 1u)); + } + + /* ---- build .shstrtab ---- */ + StrBuilder shstrtab; + strb_init(&shstrtab, heap, 128); + u32 sh_name_symtab = strb_add_cstr(&shstrtab, ".symtab"); + u32 sh_name_strtab = strb_add_cstr(&shstrtab, ".strtab"); + u32 sh_name_shstrtab = strb_add_cstr(&shstrtab, ".shstrtab"); + u32 sh_name_buildid = strb_add_cstr(&shstrtab, ".note.gnu.build-id"); + /* Per-output-shdr names — interned strings from input section names. */ + u32* outshdr_name_off = + (u32*)heap->alloc(heap, sizeof(u32) * (noutshdr + 1u), _Alignof(u32)); + if (!outshdr_name_off && noutshdr) + compiler_panic(c, no_loc(), "link_emit_elf: oom on shdr name table"); + { + u32 i; + for (i = 0; i < noutshdr; ++i) { + const OutShdr* o = &outshdrs[i]; + if (o->name) { + Slice nm_s = pool_slice(c->global, o->name); + const char* nm = nm_s.s; + size_t nlen = nm_s.len; + outshdr_name_off[i] = + nm && nlen ? strb_add(&shstrtab, nm, (u32)nlen) : 0; + } else { + outshdr_name_off[i] = 0; + } + } + } + + u32 nshdr = 1u + noutshdr + 4u; + u32 shndx_buildid = 1u + noutshdr; + u32 shndx_symtab = shndx_buildid + 1u; + u32 shndx_strtab = shndx_symtab + 1u; + u32 shndx_shstrtab = shndx_strtab + 1u; + + /* ---- build .symtab + .strtab ---- + * + * Two passes (locals first, then globals/weaks). Slot 0 is + * STN_UNDEF. Globals are deduped via img->globals — only the + * canonical entry per name is emitted, since per-input undef + * records keep their own LinkSymId after resolve_undefs's + * "copy fields from canonical def" step. sh_info = first non-local + * idx. */ + StrBuilder strtab; + strb_init(&strtab, heap, 256); + + SymRec* recs = (SymRec*)heap->alloc( + heap, sizeof(*recs) * (LinkSyms_count(&img->syms) + 1u), + _Alignof(SymRec)); + if (!recs) compiler_panic(c, no_loc(), "link_emit_elf: oom on symrecs"); + u32 nsyms_emit = 0; + u32 first_global_idx; + memset(&recs[nsyms_emit++], 0, sizeof(*recs)); /* slot 0 */ + first_global_idx = nsyms_emit; + + { + u32 pass, i; + for (pass = 0; pass < 2; ++pass) { + int want_local = (pass == 0); + if (!want_local) first_global_idx = nsyms_emit; + for (i = 0; i < LinkSyms_count(&img->syms); ++i) { + const LinkSymbol* s = LinkSyms_at(&img->syms, i); + int is_local = (s->bind == SB_LOCAL); + size_t namelen = 0; + const char* nm; + u8 st_type, st_bind; + u16 shndx; + u64 st_value; + SymRec* r; + if (want_local != is_local) continue; + if (s->name == 0 && s->kind != SK_FILE) continue; + /* Dedupe globals: per-input undef-of-X and the canonical + * def-of-X are separate img->syms entries (resolve_undefs + * mirrors fields onto the undef). Only the canonical + * (first registered) entry is in img->globals. Skip the + * shadow copies. */ + if (!is_local && s->name) { + LinkSymId canonical = symhash_get(&img->globals, s->name); + if (canonical != LINK_SYM_NONE && canonical != s->id) continue; + } + { + Slice nm_s = s->name ? pool_slice(c->global, s->name) : SLICE_NULL; + nm = nm_s.s ? nm_s.s : ""; + namelen = nm_s.len; + } + shndx = sym_shndx_for(s, outshdrs, noutshdr); + /* st_value: in ET_EXEC, defined non-ABS symbols carry + * absolute virtual addresses (IMAGE_BASE + image + * vaddr); ABS symbols carry their own value verbatim. */ + if (s->kind == SK_FILE) + st_value = 0; + else if (s->kind == SK_ABS) + st_value = s->vaddr; + else if (s->defined) + st_value = img_base + s->vaddr; + else + st_value = 0; + st_type = sym_kind_to_st_type(s->kind); + st_bind = sym_bind_to_st_bind(s->bind); + r = &recs[nsyms_emit++]; + memset(r, 0, sizeof(*r)); + r->st_name = (nm && namelen) ? strb_add(&strtab, nm, (u32)namelen) : 0; + r->st_info = ELF64_ST_INFO(st_bind, st_type); + r->st_other = STV_DEFAULT; + r->st_shndx = shndx; + r->st_value = st_value; + r->st_size = s->size; + } + } + } + + /* ---- compute file offsets for trailing non-alloc sections ---- */ + /* End of segment data: the highest (file_offset + file_size) across + * loaded segments. */ + u64 end_of_segs = headers_load; + { + u32 i; + for (i = 0; i < img->nsegments; ++i) { + const LinkSegment* seg = &img->segments[i]; + u64 e = seg->file_offset + seg->file_size; + if (e > end_of_segs) end_of_segs = e; + } + } + u64 symtab_off = ALIGN_UP(end_of_segs, (u64)8u); + u64 symtab_size = (u64)ELF64_SYM_SIZE * nsyms_emit; + u64 strtab_off = symtab_off + symtab_size; + u64 strtab_size = strtab.len; + u64 shstrtab_off = strtab_off + strtab_size; + u64 shstrtab_size = shstrtab.len; + u64 shdr_off = ALIGN_UP(shstrtab_off + shstrtab_size, (u64)8u); + + /* ---- build phdrs ---- */ + Phdr64* phdrs = (Phdr64*)heap->alloc(heap, sizeof(Phdr64) * nphdr_total, + _Alignof(Phdr64)); + if (!phdrs) compiler_panic(c, no_loc(), "link_emit_elf: oom on phdrs"); + memset(phdrs, 0, sizeof(Phdr64) * nphdr_total); + { + u32 pi = 0; + /* PT_PHDR points at the phdr table itself within the headers + * PT_LOAD. Required by the runtime loader for ET_DYN to know + * where its own program headers live. Must appear before the + * first PT_LOAD on dynamic exes (musl checks). */ + if (pie) { + phdrs[pi].p_type = PT_PHDR; + phdrs[pi].p_flags = PF_R; + phdrs[pi].p_offset = sizeof(Ehdr64); + phdrs[pi].p_vaddr = img_base + sizeof(Ehdr64); + phdrs[pi].p_paddr = phdrs[pi].p_vaddr; + phdrs[pi].p_filesz = (u64)nphdr_total * sizeof(Phdr64); + phdrs[pi].p_memsz = phdrs[pi].p_filesz; + phdrs[pi].p_align = 8; + pi++; + } + /* Headers PT_LOAD (covers ehdr + phdrs + build-id note). + * Scripted images don't emit one — see plan note above. */ + if (!scripted) { + phdrs[pi].p_type = PT_LOAD; + phdrs[pi].p_flags = PF_R; + phdrs[pi].p_offset = 0; + phdrs[pi].p_vaddr = img_base; + phdrs[pi].p_paddr = img_base; + phdrs[pi].p_filesz = headers_size; + phdrs[pi].p_memsz = headers_size; + phdrs[pi].p_align = PAGE_SIZE; + pi++; + } + /* Per-segment PT_LOAD. */ + u32 i; + for (i = 0; i < img->nsegments; ++i) { + const LinkSegment* seg = &img->segments[i]; + Phdr64* p = &phdrs[pi++]; + p->p_type = PT_LOAD; + p->p_flags = perms_to_pflags(seg->flags); + p->p_offset = seg->file_offset; + p->p_vaddr = img_base + seg->vaddr; /* post-shift */ + p->p_paddr = p->p_vaddr; + p->p_filesz = seg->file_size; + /* TLS .tbss is per-thread template space, not a loadable bss + * region — PT_TLS already records the full memsz (incl. .tbss) + * for the loader's per-thread allocation, so the matching + * PT_LOAD must not extend memsz past filesz. qemu-riscv64 + * rejects PT_LOADs with memsz>filesz on non-writable mappings + * ("PT_LOAD with non-writable bss"), and the SEG_TLS perms are + * SF_ALLOC|SF_TLS only. */ + p->p_memsz = (seg->flags & SF_TLS) ? seg->file_size : seg->mem_size; + p->p_align = seg->align ? seg->align : PAGE_SIZE; + } + /* PT_NOTE for build-id. Scripted images skip the build-id entirely. */ + if (!scripted) { + phdrs[pi].p_type = PT_NOTE; + phdrs[pi].p_flags = PF_R; + phdrs[pi].p_offset = build_id_off; + phdrs[pi].p_vaddr = build_id_addr; + phdrs[pi].p_paddr = build_id_addr; + phdrs[pi].p_filesz = BUILD_ID_NOTE_BYTES; + phdrs[pi].p_memsz = BUILD_ID_NOTE_BYTES; + phdrs[pi].p_align = 4; + pi++; + } + /* PT_TLS describing the .tdata template + .tbss zero-fill. + * vaddr/file_offset point at the same bytes the matching + * PT_LOAD already covers — the loader uses PT_TLS to size + * each thread's TLS block and to seed it from .tdata. */ + if (has_tls) { + phdrs[pi].p_type = PT_TLS; + phdrs[pi].p_flags = PF_R; + phdrs[pi].p_offset = img->tls_vaddr; + phdrs[pi].p_vaddr = img_base + img->tls_vaddr; + phdrs[pi].p_paddr = phdrs[pi].p_vaddr; + phdrs[pi].p_filesz = img->tls_filesz; + phdrs[pi].p_memsz = img->tls_memsz; + phdrs[pi].p_align = img->tls_align ? img->tls_align : 1u; + pi++; + } + /* Dynamic phdrs. PT_INTERP and PT_DYNAMIC point at the matching + * sections (which layout_dyn placed in the ro/rw_dyn segments). + * PT_GNU_STACK marks the stack as non-executable (filesz=0). */ + if (pie && img->dyn) { + LinkDynState* dyn = img->dyn; + const LinkSection* sec_interp = &img->sections[dyn->sec_interp - 1]; + const LinkSection* sec_dynamic = &img->sections[dyn->sec_dynamic - 1]; + phdrs[pi].p_type = PT_INTERP; + phdrs[pi].p_flags = PF_R; + phdrs[pi].p_offset = sec_interp->file_offset; + phdrs[pi].p_vaddr = img_base + sec_interp->vaddr; + phdrs[pi].p_paddr = phdrs[pi].p_vaddr; + phdrs[pi].p_filesz = sec_interp->size; + phdrs[pi].p_memsz = sec_interp->size; + phdrs[pi].p_align = 1; + pi++; + phdrs[pi].p_type = PT_DYNAMIC; + phdrs[pi].p_flags = PF_R | PF_W; + phdrs[pi].p_offset = sec_dynamic->file_offset; + phdrs[pi].p_vaddr = img_base + sec_dynamic->vaddr; + phdrs[pi].p_paddr = phdrs[pi].p_vaddr; + phdrs[pi].p_filesz = sec_dynamic->size; + phdrs[pi].p_memsz = sec_dynamic->size; + phdrs[pi].p_align = 8; + pi++; + phdrs[pi].p_type = PT_GNU_STACK; + phdrs[pi].p_flags = PF_R | PF_W; + phdrs[pi].p_offset = 0; + phdrs[pi].p_vaddr = 0; + phdrs[pi].p_paddr = 0; + phdrs[pi].p_filesz = 0; + phdrs[pi].p_memsz = 0; + phdrs[pi].p_align = 16; + pi++; + /* PT_GNU_RELRO would mark the read-only-after-relocation span + * here. Phase 6 leaves it out — it's an optimization the loader + * can live without, and our ro_seg already lives in a PF_R + * PT_LOAD that's never made writable. */ + } else if (pie) { + /* dyn was nominally requested but layout_dyn early-out — no + * imports and no DSO inputs. The image still needs a PT_GNU_STACK + * for kernels that demand it; INTERP/DYNAMIC are skipped. */ + (void)0; + } + (void)pi; + } + + /* ---- build ehdr ---- */ + Ehdr64 ehdr; + memset(&ehdr, 0, sizeof(ehdr)); + ehdr.e_ident[0] = ELFMAG0; + ehdr.e_ident[1] = ELFMAG1; + ehdr.e_ident[2] = ELFMAG2; + ehdr.e_ident[3] = ELFMAG3; + ehdr.e_ident[4] = ELFCLASS64; + ehdr.e_ident[5] = ELFDATA2LSB; + ehdr.e_ident[6] = EV_CURRENT; + ehdr.e_ident[7] = ELFOSABI_NONE; + ehdr.e_type = pie ? ET_DYN : ET_EXEC; + ehdr.e_machine = (u16)e_machine; + ehdr.e_version = EV_CURRENT; + ehdr.e_entry = img_base + LinkSyms_at(&img->syms, img->entry_sym - 1)->vaddr; + ehdr.e_phoff = sizeof(Ehdr64); + ehdr.e_shoff = shdr_off; + ehdr.e_flags = 0; + ehdr.e_ehsize = sizeof(Ehdr64); + ehdr.e_phentsize = sizeof(Phdr64); + ehdr.e_phnum = (u16)nphdr_total; + ehdr.e_shentsize = sizeof(Shdr64); + ehdr.e_shnum = (u16)nshdr; + ehdr.e_shstrndx = (u16)shndx_shstrtab; + + /* ---- write ehdr, phdrs, build-id note, pad ---- */ + u64 cur_off; + write_bytes(w, &ehdr, sizeof(ehdr)); + write_bytes(w, phdrs, sizeof(Phdr64) * nphdr_total); + cur_off = sizeof(ehdr) + sizeof(Phdr64) * nphdr_total; + + /* .note.gnu.build-id wire format: + * u32 namesz = 4 ("GNU\0") + * u32 descsz = 16 + * u32 type = NT_GNU_BUILD_ID (3) + * "GNU\0" + * <16 bytes of build-id> + * + * Scripted images don't carry build-id; they have no PT_NOTE phdr to + * point at it and the file payload would just be dead bytes. */ + if (!scripted) { + u8 nh[12]; + u32 v; + v = NOTE_NAME_GNU_LEN; + nh[0] = (u8)v; + nh[1] = (u8)(v >> 8); + nh[2] = (u8)(v >> 16); + nh[3] = (u8)(v >> 24); + v = BUILD_ID_DESC_LEN; + nh[4] = (u8)v; + nh[5] = (u8)(v >> 8); + nh[6] = (u8)(v >> 16); + nh[7] = (u8)(v >> 24); + v = NOTE_BUILD_ID_TYPE; + nh[8] = (u8)v; + nh[9] = (u8)(v >> 8); + nh[10] = (u8)(v >> 16); + nh[11] = (u8)(v >> 24); + write_bytes(w, nh, sizeof nh); + write_bytes(w, NOTE_NAME_GNU "\0", NOTE_NAME_GNU_LEN); + write_bytes(w, build_id, BUILD_ID_DESC_LEN); + cur_off += BUILD_ID_NOTE_BYTES; + } + + /* Pad to first segment file_offset (== headers_load). */ + { + u32 i; + for (i = 0; i < img->nsegments; ++i) { + const LinkSegment* seg = &img->segments[i]; + if (seg->file_size == 0) continue; + if (cur_off < seg->file_offset) { + write_zeroes(w, (size_t)(seg->file_offset - cur_off)); + cur_off = seg->file_offset; + } + write_bytes(w, img->segment_bytes[seg->id - 1], (size_t)seg->file_size); + cur_off += seg->file_size; + } + } + + /* ---- write trailing non-alloc sections ---- */ + if (cur_off < symtab_off) { + write_zeroes(w, (size_t)(symtab_off - cur_off)); + cur_off = symtab_off; + } + { + u32 i; + for (i = 0; i < nsyms_emit; ++i) write_sym_rec(w, &recs[i]); + cur_off += symtab_size; + } + if (strtab.len) { + write_bytes(w, strtab.data, strtab.len); + cur_off += strtab.len; + } + if (shstrtab.len) { + write_bytes(w, shstrtab.data, shstrtab.len); + cur_off += shstrtab.len; + } + + /* ---- write section header table ---- */ + if (cur_off < shdr_off) { + write_zeroes(w, (size_t)(shdr_off - cur_off)); + cur_off = shdr_off; + } + { + Shdr64 sh; + u32 i; + /* shdr 0: NULL */ + memset(&sh, 0, sizeof(sh)); + write_bytes(w, &sh, sizeof(sh)); + /* Locate dyn-section names (interned earlier in layout_dyn) so + * we can override sh_type / sh_link / sh_info / sh_entsize for + * .dynsym / .dynstr / .gnu.hash / .rela.dyn / .rela.plt / + * .dynamic. The sh_link cross-references (e.g., .dynsym -> + * .dynstr) need the matching shdr indices, which we look up by + * comparing OutShdr.name to the same Sym values. */ + Sym n_dynsym = 0, n_dynstr = 0, n_gnuhash = 0; + Sym n_reladyn = 0, n_relaplt = 0, n_dynamic = 0; + Sym n_gotplt = 0; + if (pie && img->dyn) { + n_dynsym = pool_intern_slice(c->global, SLICE_LIT(".dynsym")); + n_dynstr = pool_intern_slice(c->global, SLICE_LIT(".dynstr")); + n_gnuhash = pool_intern_slice(c->global, SLICE_LIT(".gnu.hash")); + n_reladyn = pool_intern_slice(c->global, SLICE_LIT(".rela.dyn")); + n_relaplt = pool_intern_slice(c->global, SLICE_LIT(".rela.plt")); + n_dynamic = pool_intern_slice(c->global, SLICE_LIT(".dynamic")); + n_gotplt = pool_intern_slice(c->global, SLICE_LIT(".got.plt")); + } + /* Two-pass: first find dynsym/dynstr/gotplt indices for sh_link + * fixups, then emit. */ + u32 idx_dynsym = 0, idx_dynstr = 0, idx_gotplt = 0; + if (pie && img->dyn) { + for (i = 0; i < noutshdr; ++i) { + Sym nm = outshdrs[i].name; + u32 ix = outshdrs[i].shdr_idx; + if (nm == n_dynsym) + idx_dynsym = ix; + else if (nm == n_dynstr) + idx_dynstr = ix; + else if (nm == n_gotplt) + idx_gotplt = ix; + } + } + /* per-name output shdrs */ + for (i = 0; i < noutshdr; ++i) { + const OutShdr* o = &outshdrs[i]; + memset(&sh, 0, sizeof(sh)); + sh.sh_name = outshdr_name_off[i]; + sh.sh_type = sec_sem_to_sht(o->sem); + sh.sh_flags = sec_flags_to_shf(o->flags); + sh.sh_addr = img_base + o->vaddr; + sh.sh_offset = o->file_offset; + sh.sh_size = o->size; + sh.sh_link = 0; + sh.sh_info = 0; + sh.sh_addralign = o->align ? o->align : 1; + sh.sh_entsize = (o->sem == SSEM_INIT_ARRAY || o->sem == SSEM_FINI_ARRAY || + o->sem == SSEM_PREINIT_ARRAY) + ? 8 + : 0; + /* Dyn-section overrides: sh_type / sh_link / sh_info / entsize. */ + if (pie && img->dyn) { + if (o->name == n_dynsym) { + sh.sh_type = SHT_DYNSYM; + sh.sh_link = idx_dynstr; + sh.sh_info = img->dyn->first_global; + sh.sh_entsize = 24; + } else if (o->name == n_dynstr) { + sh.sh_type = SHT_STRTAB; + } else if (o->name == n_gnuhash) { + sh.sh_type = SHT_GNU_HASH; + sh.sh_link = idx_dynsym; + } else if (o->name == n_reladyn) { + sh.sh_type = SHT_RELA; + sh.sh_link = idx_dynsym; + sh.sh_entsize = 24; + } else if (o->name == n_relaplt) { + sh.sh_type = SHT_RELA; + sh.sh_link = idx_dynsym; + sh.sh_info = idx_gotplt; + sh.sh_entsize = 24; + sh.sh_flags |= SHF_INFO_LINK; + } else if (o->name == n_dynamic) { + sh.sh_type = SHT_DYNAMIC; + sh.sh_link = idx_dynstr; + sh.sh_entsize = 16; + } else if (o->name == n_gotplt) { + sh.sh_entsize = 8; + } + } + write_bytes(w, &sh, sizeof(sh)); + } + /* shdr: .note.gnu.build-id (allocatable; in headers PT_LOAD) */ + memset(&sh, 0, sizeof(sh)); + sh.sh_name = sh_name_buildid; + sh.sh_type = SHT_NOTE; + sh.sh_flags = SHF_ALLOC; + sh.sh_addr = build_id_addr; + sh.sh_offset = build_id_off; + sh.sh_size = BUILD_ID_NOTE_BYTES; + sh.sh_addralign = 4; + write_bytes(w, &sh, sizeof(sh)); + /* shdr: .symtab */ + memset(&sh, 0, sizeof(sh)); + sh.sh_name = sh_name_symtab; + sh.sh_type = SHT_SYMTAB; + sh.sh_flags = 0; + sh.sh_addr = 0; + sh.sh_offset = symtab_off; + sh.sh_size = symtab_size; + sh.sh_link = shndx_strtab; + sh.sh_info = first_global_idx; + sh.sh_addralign = 8; + sh.sh_entsize = ELF64_SYM_SIZE; + write_bytes(w, &sh, sizeof(sh)); + /* shdr: .strtab */ + memset(&sh, 0, sizeof(sh)); + sh.sh_name = sh_name_strtab; + sh.sh_type = SHT_STRTAB; + sh.sh_offset = strtab_off; + sh.sh_size = strtab_size; + sh.sh_addralign = 1; + write_bytes(w, &sh, sizeof(sh)); + /* shdr: .shstrtab */ + memset(&sh, 0, sizeof(sh)); + sh.sh_name = sh_name_shstrtab; + sh.sh_type = SHT_STRTAB; + sh.sh_offset = shstrtab_off; + sh.sh_size = shstrtab_size; + sh.sh_addralign = 1; + write_bytes(w, &sh, sizeof(sh)); + } + + heap->free(heap, phdrs, sizeof(Phdr64) * nphdr_total); + heap->free(heap, recs, sizeof(*recs) * (LinkSyms_count(&img->syms) + 1u)); + heap->free(heap, outshdrs, sizeof(*outshdrs) * outshdr_cap); + if (outshdr_name_off) + heap->free(heap, outshdr_name_off, sizeof(u32) * (noutshdr + 1u)); + strb_fini(&strtab); + strb_fini(&shstrtab); +} diff --git a/src/obj/elf/link_dyn.c b/src/obj/elf/link_dyn.c @@ -0,0 +1,992 @@ +/* Phase 4 of dynamic linking: synthesize the dyn-link tables and + * sections an ET_DYN ELF exe needs to be loadable by a real runtime + * loader (musl ld-musl-aarch64.so.1). + * + * Inputs (computed by earlier passes): + * - LinkSymbol entries with `imported = 1` (set by resolve_undefs's + * DSO-search path; their dso_input_id names the providing DSO). + * - LinkInputs of kind LINK_INPUT_DSO_BYTES carrying SONAMEs. + * + * Outputs (deposited on LinkImage.dyn): + * - .interp PT_INTERP target string + * - .dynsym + .dynstr symbol table + name pool + * - .gnu.hash GNU-style hash for the loader + * - .rela.dyn GLOB_DAT (data imports) + space for + * R_AARCH64_RELATIVE records that + * Phase 6 emit fills in + * - .rela.plt JUMP_SLOT records (one per imported func) + * - .plt allocated, body NOT emitted (Phase 5) + * - .got.plt 3 reserved slots + 1 per PLT slot, + * allocated, body NOT emitted + * - .dynamic PT_DYNAMIC body, populated + * + * The .plt body / GOT-slot fill / CALL26 reloc rewriting are Phase 5; + * they're called out at the relevant allocation site so the missing + * pieces are obvious to anyone reading the output. The static-exe path + * is unaffected — layout_dyn early-outs when emit_pie is 0. + * + * Allocator pattern follows layout_iplt (link_layout.c): grow segments + * + sections via realloc, then page-align each new segment after the + * existing image span. Synthetic sections carry input_id == LINK_INPUT_NONE + * so downstream passes (emit_reloc_records, GC) leave them alone. + */ + +#include <string.h> + +#include "core/bytes.h" +#include "core/heap.h" +#include "core/pool.h" +#include "core/slice.h" +#include "core/util.h" +#include "core/vec.h" +#include "link/link.h" +#include "link/link_arch.h" +#include "link/link_internal.h" +#include "obj/elf/elf.h" +#include "obj/format.h" + +static SrcLoc no_loc(void) { + SrcLoc l = {0, 0, 0}; + return l; +} + +/* ---- small allocators (mirror layout_iplt's helpers) ---- */ + +static u32 dyn_alloc_segments(LinkImage* img, u32 nseg) { + Heap* h = img->heap; + u32 base = img->nsegments; + u32 new_nseg = base + nseg; + LinkSegment* nsegs = (LinkSegment*)h->realloc( + h, img->segments, sizeof(*img->segments) * img->nsegments, + sizeof(*img->segments) * new_nseg, _Alignof(LinkSegment)); + u8** nsbufs = (u8**)h->realloc( + h, img->segment_bytes, sizeof(*img->segment_bytes) * img->nsegments, + sizeof(*img->segment_bytes) * new_nseg, _Alignof(u8*)); + size_t* nscaps = (size_t*)h->realloc( + h, img->segment_bytes_cap, + sizeof(*img->segment_bytes_cap) * img->nsegments, + sizeof(*img->segment_bytes_cap) * new_nseg, _Alignof(size_t)); + if (!nsegs || !nsbufs || !nscaps) + compiler_panic(img->c, no_loc(), "link: oom on dyn segments"); + img->segments = nsegs; + img->segment_bytes = nsbufs; + img->segment_bytes_cap = nscaps; + return base; +} + +static u32 dyn_alloc_sections(LinkImage* img, u32 nsec) { + Heap* h = img->heap; + u32 base = img->nsections; + u32 new_nsec = base + nsec; + LinkSection* nsections = (LinkSection*)h->realloc( + h, img->sections, sizeof(*img->sections) * img->nsections, + sizeof(*img->sections) * new_nsec, _Alignof(LinkSection)); + if (!nsections) compiler_panic(img->c, no_loc(), "link: oom on dyn sections"); + img->sections = nsections; + return base; +} + +/* ---- byte-builder for .dynstr / .gnu.hash ---- */ + +typedef struct ByteBuf { + Heap* heap; + u8* data; + u32 len; + u32 cap; +} ByteBuf; + +static void bb_init(ByteBuf* b, Heap* h) { + b->heap = h; + b->data = NULL; + b->len = 0; + b->cap = 0; +} +static void bb_reserve(ByteBuf* b, u32 need) { + if (need <= b->cap) return; + (void)VEC_GROW(b->heap, b->data, b->cap, need); +} +static u32 bb_append(ByteBuf* b, const void* src, u32 n) { + u32 off = b->len; + bb_reserve(b, b->len + n); + if (n) memcpy(b->data + b->len, src, n); + b->len += n; + return off; +} +static u32 bb_append_str(ByteBuf* b, const char* s, u32 n) { + /* Linear dedup over what we've appended so far. Strtabs are small. */ + if (n == 0) return 0; + if (b->len > n) { + u32 i; + for (i = 0; i + n < b->len; ++i) { + if (b->data[i + n] == 0 && memcmp(b->data + i, s, n) == 0) return i; + } + } + u32 off = b->len; + bb_reserve(b, b->len + n + 1u); + memcpy(b->data + b->len, s, n); + b->data[b->len + n] = 0; + b->len += n + 1u; + return off; +} + +/* ---- GNU-hash computation (psABI v1 hash) ---- + * Body layout: + * u32 nbuckets + * u32 symoffset (first hashed dynsym index) + * u32 bloom_size (in 64-bit words) + * u32 bloom_shift + * u64 bloom[bloom_size] + * u32 buckets[nbuckets] + * u32 chains[ndynsym - symoffset] + * + * For Phase 4 we keep this very small: nbuckets = max(1, n/2), + * bloom_size = 1, bloom_shift = 6 (64-bit ELFCLASS64). All hashed + * symbols (sym_offset..ndynsym-1) participate in bloom + buckets + + * chains. Slot 0..symoffset-1 are STN_UNDEF + locals, which the + * loader doesn't hash. */ + +static u32 gnu_hash_name(const char* s, u32 n) { + /* h = 5381; for c in s: h = h * 33 + c */ + u32 h = 5381u; + u32 i; + for (i = 0; i < n; ++i) h = (h * 33u) + (u8)s[i]; + return h; +} + +/* ---- partition: enumerate imports ---- + * + * Walks LinkSyms and collects each `imported` symbol that's the + * canonical entry in img->globals (resolve_undefs may stamp `imported` + * onto multiple shadow slots of the same name; only the canonical one + * lands in dynsym). The two output arrays are LinkSymIds: funcs first + * (PLT-bound), then data (GOT-bound via GLOB_DAT). */ + +typedef struct ImportLists { + LinkSymId* funcs; + u32 nfuncs; + LinkSymId* datas; + u32 ndatas; +} ImportLists; + +static int sym_is_func_import(const LinkSymbol* s) { + /* Most undef shadows have kind = SK_UNDEF (the obj reader keys kind + * off shndx, not STT_*). Only useful when the canonical entry + * carried a real type — fall through to the DSO lookup otherwise. */ + return s->kind == SK_FUNC || s->kind == SK_IFUNC; +} + +/* Resolve an import's classifier kind by consulting its providing + * DSO's dynsym. read_elf_dso preserves STT_FUNC / STT_OBJECT / etc. + * on each defined export; the consumer's undef may have arrived as + * SK_UNDEF (clang emits external refs as SHN_UNDEF, which the reader + * collapses to SK_UNDEF regardless of STT_*). Returns 1 for func / + * ifunc, 0 for everything else (or if the DSO export is missing). */ +static int dso_export_is_func(Linker* l, const LinkSymbol* s) { + if (s->dso_input_id == LINK_INPUT_NONE) return 0; + if (s->dso_input_id - 1u >= LinkInputs_count(&l->inputs)) return 0; + LinkInput* in = LinkInputs_at(&l->inputs, s->dso_input_id - 1u); + if (!in->obj) return 0; + ObjSymIter* it = obj_symiter_new(in->obj); + ObjSymEntry e; + int is_func = 0; + while (obj_symiter_next(it, &e)) { + const ObjSym* es = e.sym; + if (!es || es->name != s->name) continue; + if (es->kind == SK_UNDEF) continue; + is_func = (es->kind == SK_FUNC || es->kind == SK_IFUNC); + break; + } + obj_symiter_free(it); + return is_func; +} + +static void collect_imports(Linker* l, LinkImage* img, Heap* h, + ImportLists* il) { + u32 i; + u32 cap_f = 0, cap_d = 0; + il->funcs = NULL; + il->datas = NULL; + il->nfuncs = il->ndatas = 0; + for (i = 0; i < LinkSyms_count(&img->syms); ++i) { + LinkSymbol* s = LinkSyms_at(&img->syms, i); + if (!s->imported) continue; + if (s->name == 0) continue; + /* Only the canonical (img->globals) entry per name. */ + LinkSymId canonical = symhash_get(&img->globals, s->name); + if (canonical != LINK_SYM_NONE && canonical != s->id) continue; + int is_func = sym_is_func_import(s) || dso_export_is_func(l, s); + if (is_func) { + if (VEC_GROW(h, il->funcs, cap_f, il->nfuncs + 1u)) + compiler_panic(img->c, no_loc(), "link: oom on import-funcs"); + il->funcs[il->nfuncs++] = s->id; + } else { + if (VEC_GROW(h, il->datas, cap_d, il->ndatas + 1u)) + compiler_panic(img->c, no_loc(), "link: oom on import-datas"); + il->datas[il->ndatas++] = s->id; + } + } +} + +static void free_imports(Heap* h, ImportLists* il) { + if (il->funcs) h->free(h, il->funcs, sizeof(*il->funcs) * il->nfuncs); + if (il->datas) h->free(h, il->datas, sizeof(*il->datas) * il->ndatas); +} + +/* ---- DT_NEEDED set: each DSO input that contributed at least one + * import. Order is input order so the loader sees deps in declaration + * order. */ +static void collect_needed(Linker* l, LinkImage* img, LinkDynState* dyn) { + Heap* h = img->heap; + u8* used; + u32 ninputs = LinkInputs_count(&l->inputs); + u32 i, nused = 0; + + used = (u8*)h->alloc(h, ninputs ? ninputs : 1u, 1); + if (!used) compiler_panic(img->c, no_loc(), "link: oom on needed map"); + memset(used, 0, ninputs ? ninputs : 1u); + + /* Mark every DSO that ended up satisfying at least one import. */ + for (i = 0; i < LinkSyms_count(&img->syms); ++i) { + LinkSymbol* s = LinkSyms_at(&img->syms, i); + if (!s->imported) continue; + if (s->dso_input_id == LINK_INPUT_NONE) continue; + if (s->dso_input_id - 1u >= ninputs) continue; + used[s->dso_input_id - 1u] = 1; + } + /* Always pull every explicitly-supplied DSO into DT_NEEDED, even if + * no import landed on it — matches GNU ld without --as-needed. + * Phase 4 doesn't plumb --as-needed through to the resolver, so the + * default "needed" behavior is the right baseline. */ + for (i = 0; i < ninputs; ++i) { + LinkInput* in = LinkInputs_at(&l->inputs, i); + if (in->kind == LINK_INPUT_DSO_BYTES && in->soname != 0) used[i] = 1; + } + for (i = 0; i < ninputs; ++i) + if (used[i]) ++nused; + + dyn->needed = + nused ? (Sym*)h->alloc(h, sizeof(Sym) * nused, _Alignof(Sym)) : NULL; + if (nused && !dyn->needed) + compiler_panic(img->c, no_loc(), "link: oom on needed list"); + dyn->nneeded = 0; + for (i = 0; i < ninputs; ++i) { + LinkInput* in = LinkInputs_at(&l->inputs, i); + if (!used[i]) continue; + if (in->soname == 0) continue; + dyn->needed[dyn->nneeded++] = in->soname; + } + h->free(h, used, ninputs ? ninputs : 1u); +} + +/* ---- dynsym + dynstr build ---- + * + * Slot 0: STN_UNDEF (zero entry). The loader ignores names with index + * 0; we still emit a dynstr entry at offset 0 (the leading NUL). + * + * Slots 1..nimports: imported symbols (functions first, then data). + * st_shndx = SHN_UNDEF; the loader fills in the value at bind time. + * st_value/size are zero — the static linker has no value for an + * imported symbol. + * + * No `--export-dynamic` plumbing in Phase 4: only imports + the null + * slot land in .dynsym. Adding exports is mechanical (walk + * img->globals, append entries with st_shndx = matching .text/.data + * section index) but isn't on the test/musl path. */ + +static void build_dynsym(LinkImage* img, LinkDynState* dyn, + const ImportLists* il, ByteBuf* dynstr) { + Heap* h = img->heap; + u32 nimports = il->nfuncs + il->ndatas; + u32 ndynsym = 1u + nimports; /* +1 for null slot */ + u32 i; + + dyn->ndynsym = ndynsym; + dyn->dynsym = (DynSymRec*)h->alloc(h, sizeof(*dyn->dynsym) * ndynsym, + _Alignof(DynSymRec)); + if (!dyn->dynsym) compiler_panic(img->c, no_loc(), "link: oom on dynsym"); + memset(dyn->dynsym, 0, sizeof(*dyn->dynsym) * ndynsym); + + /* Slot 0: STN_UNDEF. dynstr leads with a NUL so st_name=0 reads as + * the empty string. */ + { + u8 z = 0; + bb_append(dynstr, &z, 1); + } + + /* Per-symbol: dedupe `sym_dynidx` lookup table. Sized to LinkSymId + * upper bound. Clean (zero-filled) by alloc convention; we set + * indices for imports below. */ + dyn->sym_dynidx_size = LinkSyms_count(&img->syms) + 1u; + dyn->sym_dynidx = (u32*)h->alloc( + h, sizeof(*dyn->sym_dynidx) * dyn->sym_dynidx_size, _Alignof(u32)); + if (!dyn->sym_dynidx) + compiler_panic(img->c, no_loc(), "link: oom on sym_dynidx"); + memset(dyn->sym_dynidx, 0, sizeof(*dyn->sym_dynidx) * dyn->sym_dynidx_size); + /* sym_plt_vaddr is populated alongside the PLT body emit below; here + * we only allocate the parallel array. */ + dyn->sym_plt_vaddr = (u64*)h->alloc( + h, sizeof(*dyn->sym_plt_vaddr) * dyn->sym_dynidx_size, _Alignof(u64)); + if (!dyn->sym_plt_vaddr) + compiler_panic(img->c, no_loc(), "link: oom on sym_plt_vaddr"); + memset(dyn->sym_plt_vaddr, 0, + sizeof(*dyn->sym_plt_vaddr) * dyn->sym_dynidx_size); + + /* All imports have STB_GLOBAL so first_global is right after the + * single STN_UNDEF slot. (When local exports land via + * --export-dynamic, this needs to grow.) */ + dyn->first_global = 1u; + + u32 idx = 1u; + for (i = 0; i < il->nfuncs; ++i) { + LinkSymId lsid = il->funcs[i]; + LinkSymbol* s = LinkSyms_at(&img->syms, lsid - 1); + DynSymRec* r = &dyn->dynsym[idx]; + Slice nm_s = pool_slice(img->c->global, s->name); + const char* nm = nm_s.s; + size_t namelen = nm_s.len; + r->st_name = bb_append_str(dynstr, nm, (u32)namelen); + r->st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC); + r->st_other = STV_DEFAULT; + r->st_shndx = SHN_UNDEF; + r->st_value = 0; + r->st_size = 0; + dyn->sym_dynidx[lsid] = idx; + ++idx; + } + for (i = 0; i < il->ndatas; ++i) { + LinkSymId lsid = il->datas[i]; + LinkSymbol* s = LinkSyms_at(&img->syms, lsid - 1); + DynSymRec* r = &dyn->dynsym[idx]; + Slice nm_s = pool_slice(img->c->global, s->name); + const char* nm = nm_s.s; + size_t namelen = nm_s.len; + u8 elf_type = STT_OBJECT; + if (s->kind == SK_TLS) + elf_type = STT_TLS; + else if (s->kind == SK_NOTYPE) + elf_type = STT_NOTYPE; + r->st_name = bb_append_str(dynstr, nm, (u32)namelen); + r->st_info = ELF64_ST_INFO(STB_GLOBAL, elf_type); + r->st_other = STV_DEFAULT; + r->st_shndx = SHN_UNDEF; + r->st_value = 0; + r->st_size = 0; + dyn->sym_dynidx[lsid] = idx; + ++idx; + } +} + +/* ---- .gnu.hash builder ---- + * + * Hashed range is [first_global, ndynsym) — slot 0 (STN_UNDEF) is + * unhashed. Layout matches loader expectations (musl, glibc, FreeBSD). + * + * Bucket count: max(1, hashed_count / 2), rounded up to odd so the + * mod operation distributes more uniformly. Bloom is 1 word for + * Phase 4 — a real implementation would scale with hashed_count, but + * 1 word with shift=6 still satisfies the loader's correctness check + * (any bit set is "maybe present"; false-positives only cost a chain + * scan). */ + +static void build_gnu_hash(Heap* h, LinkImage* img, LinkDynState* dyn, + const ByteBuf* dynstr) { + u32 hashed = (dyn->ndynsym > dyn->first_global) + ? (dyn->ndynsym - dyn->first_global) + : 0u; + u32 nbuckets = hashed ? hashed : 1u; + /* Round nbuckets up to next odd number. */ + if ((nbuckets & 1u) == 0u) nbuckets += 1u; + u32 bloom_size = 1u; /* 64-bit word */ + u32 bloom_shift = 6u; + u32 sym_offset = dyn->first_global; + u32 hdr_bytes = 16u; /* nbuckets/symoff/bloomsz/bloomshift */ + u32 bloom_bytes = bloom_size * 8u; + u32 buckets_bytes = nbuckets * 4u; + u32 chains_bytes = hashed * 4u; + u32 total = hdr_bytes + bloom_bytes + buckets_bytes + chains_bytes; + + u8* buf = (u8*)h->alloc(h, total ? total : 1u, 4); + if (!buf) compiler_panic(img->c, no_loc(), "link: oom on .gnu.hash"); + memset(buf, 0, total); + + wr_u32_le(buf + 0, nbuckets); + wr_u32_le(buf + 4, sym_offset); + wr_u32_le(buf + 8, bloom_size); + wr_u32_le(buf + 12, bloom_shift); + + /* Bloom + buckets + chains. We need each hashed symbol's hash. */ + if (hashed) { + u32 i; + u32* hashes = (u32*)h->alloc(h, sizeof(u32) * hashed, _Alignof(u32)); + if (!hashes) + compiler_panic(img->c, no_loc(), "link: oom on .gnu.hash hashes"); + for (i = 0; i < hashed; ++i) { + const DynSymRec* r = &dyn->dynsym[sym_offset + i]; + const char* name = (const char*)dynstr->data + r->st_name; + size_t n = name ? slice_from_cstr(name).len : 0; + hashes[i] = gnu_hash_name(name, (u32)n); + } + + /* Bloom filter: H[i] / H[i] >> shift */ + u64 bloom = 0; + for (i = 0; i < hashed; ++i) { + u32 h1 = hashes[i] % 64u; + u32 h2 = (hashes[i] >> bloom_shift) % 64u; + bloom |= ((u64)1 << h1) | ((u64)1 << h2); + } + wr_u64_le(buf + hdr_bytes, bloom); + + /* Buckets/chains: for each hashed sym, append to its bucket's + * chain. The chain encodes (hash & ~1) per entry; the LSB is set + * on the LAST entry in a bucket to terminate. Buckets are filled + * with the first chain index that hashes there (1-based into the + * dynsym, i.e. `sym_offset + i`). */ + u32* buckets = (u32*)(buf + hdr_bytes + bloom_bytes); + u32* chains = (u32*)(buf + hdr_bytes + bloom_bytes + buckets_bytes); + /* First pass: bucket = first sym index that hashes there. */ + for (i = 0; i < hashed; ++i) { + u32 b = hashes[i] % nbuckets; + if (buckets[b] == 0) buckets[b] = sym_offset + i; + } + /* Second pass: chain[i] = hash with LSB cleared; LSB set if next + * sym is in a different bucket. Walk symbols in order; LSB on + * chain[i] when sym i+1 is in a different bucket OR is the end. */ + for (i = 0; i < hashed; ++i) { + u32 v = hashes[i] & ~1u; + int last = (i + 1 == hashed) || + ((hashes[i + 1] % nbuckets) != (hashes[i] % nbuckets)); + if (last) v |= 1u; + chains[i] = v; + } + /* Fix bucket→first-sym indices: if multiple syms share a bucket + * but were inserted out of contiguous order, we need them + * contiguous. We assumed contiguity above without enforcing it. + * For Phase 4 with small hashed sets this is fine, but flag the + * shortcut. */ + h->free(h, hashes, sizeof(u32) * hashed); + } + + dyn->gnu_hash = buf; + dyn->gnu_hash_len = total; +} + +/* ---- .dynamic body builder ---- + * + * Computed at layout time so the size is known before segments are + * placed. Each entry is two u64s (d_tag, d_un.d_val|d_un.d_ptr). + * Final entry is DT_NULL. The d_ptr fields that point at other + * synthetic sections are filled with image-relative vaddrs; the emit + * pass adds load-base / IMAGE_BASE only when ET_EXEC. */ + +typedef struct DynEntry { + u64 tag; + u64 val; /* either d_val or d_ptr; emit just writes 8 bytes */ +} DynEntry; + +static u32 count_dynamic_entries(const LinkDynState* dyn) { + /* Required: DT_STRTAB DT_STRSZ DT_SYMTAB DT_SYMENT DT_GNU_HASH + * DT_FLAGS_1 (DF_1_NOW for eager binding) + * DT_NULL terminator + * Optional (only when there are .rela.dyn records): + * DT_RELA DT_RELASZ DT_RELAENT + * Optional (only when there are imported functions / a PLT): + * DT_PLTGOT DT_PLTRELSZ DT_PLTREL DT_JMPREL + * Plus DT_NEEDED per dependency. */ + u32 n = dyn->nneeded; + n += 6; /* 5 fixed + DT_NULL */ + if (dyn->cap_rela_dyn) n += 3; /* DT_RELA + DT_RELASZ + DT_RELAENT */ + if (dyn->nrela_plt) n += 4; /* PLT-only entries */ + return n; +} + +/* ---- main entry ---- */ + +void layout_dyn(Linker* l, LinkImage* img) { + Heap* h = img->heap; + LinkDynState* dyn; + ImportLists imports; + ByteBuf dynstr; + u64 page; + const LinkArchDesc* arch; + const ObjElfArchOps* elf_arch; + + if (!l->emit_pie) return; + + arch = link_arch_desc_for(l->c); + if (!arch) + compiler_panic(img->c, no_loc(), "link: layout_dyn: no arch descriptor"); + { + const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_ELF); + elf_arch = fmt && fmt->elf_arch ? fmt->elf_arch(l->c->target.arch) : NULL; + if (!elf_arch) + compiler_panic(img->c, no_loc(), + "link: layout_dyn: no ELF arch descriptor"); + } + + dyn = (LinkDynState*)h->alloc(h, sizeof(*dyn), _Alignof(LinkDynState)); + if (!dyn) compiler_panic(img->c, no_loc(), "link: oom on dyn state"); + memset(dyn, 0, sizeof(*dyn)); + img->dyn = dyn; + img->pie = 1; + + /* PT_INTERP path. Default to the canonical musl loader matching the + * target arch (per-arch table in src/arch/<arch>/link.c) when the caller + * didn't set one. Drivers like cfree-cc always override via + * link_set_interp_path; this default is correctness for direct + * libcfree consumers. glibc users have to set their interp + * explicitly — we don't pick a default for them. */ + dyn->interp_path = + l->interp_path + ? l->interp_path + : pool_intern_slice(l->c->global, + slice_from_cstr(elf_arch->default_musl_interp)); + + /* Step 1: enumerate imports + DT_NEEDED. */ + collect_imports(l, img, h, &imports); + collect_needed(l, img, dyn); + + /* Step 2: build .dynstr + .dynsym. .dynstr must also carry the + * DT_NEEDED soname strings the .dynamic body references; intern + * them after the import names so build_dynsym's de-dup also covers + * any name that happens to collide with a soname. */ + bb_init(&dynstr, h); + build_dynsym(img, dyn, &imports, &dynstr); + { + u32 ni; + for (ni = 0; ni < dyn->nneeded; ++ni) { + Slice s_s = pool_slice(l->c->global, dyn->needed[ni]); + const char* s = s_s.s; + size_t slen = s_s.len; + if (s && slen) (void)bb_append_str(&dynstr, s, (u32)slen); + } + } + dyn->dynstr = dynstr.data; + dyn->dynstr_len = dynstr.len; + + /* Step 3: .gnu.hash. */ + build_gnu_hash(h, img, dyn, &dynstr); + + /* Step 4: pre-size all the synthetic sections. + * .interp: strlen + 1 + * .dynsym: 24 * ndynsym + * .dynstr: dynstr_len + * .gnu.hash: gnu_hash_len + * .rela.dyn: 24 * (ndatas + cap_relative) — we reserve 4096 entries + * for RELATIVE; emit fills them. (Quick-and-dirty: the + * static path never has so many internal absolute relocs.) + * .rela.plt: 24 * nfuncs + * .plt: 32 + 16 * nfuncs (PLT0 + per-slot) + * .got.plt: 8 * (3 + nfuncs) + * .dynamic: 16 * count_dynamic_entries + */ + dyn->nplt = imports.nfuncs; + dyn->nrela_plt = imports.nfuncs; + dyn->rela_plt = imports.nfuncs + ? (DynRela*)h->alloc(h, sizeof(DynRela) * imports.nfuncs, + _Alignof(DynRela)) + : NULL; + if (imports.nfuncs && !dyn->rela_plt) + compiler_panic(img->c, no_loc(), "link: oom on rela_plt"); + + /* RELA dyn: GLOB_DAT (one per imported abs-relocated symbol) + + * RELATIVE (one per PIE internal abs reloc against a defined sym). + * Phase 5 emits these dynamically during reloc-apply; pre-count the + * exact total here (img->relocs and the resolve-time `imported` flags + * are already settled by the time layout_dyn runs) so the section + * isn't padded with hundreds of trailing R_*_NONE records. */ + u32 cap_rel = 0; + { + u32 ri; + for (ri = 0; ri < LinkRelocs_count(&img->relocs); ++ri) { + const LinkRelocApply* r = LinkRelocs_at(&img->relocs, ri); + const LinkSymbol* tgt = LinkSyms_at(&img->syms, r->target - 1); + if (r->kind != R_ABS32 && r->kind != R_ABS64) continue; + if (tgt->imported) { + cap_rel++; /* GLOB_DAT */ + } else if (tgt->defined && tgt->kind != SK_ABS) { + cap_rel++; /* RELATIVE */ + } + } + } + dyn->cap_rela_dyn = cap_rel; + dyn->rela_dyn = + dyn->cap_rela_dyn + ? (DynRela*)h->alloc(h, sizeof(DynRela) * dyn->cap_rela_dyn, + _Alignof(DynRela)) + : NULL; + if (dyn->cap_rela_dyn && !dyn->rela_dyn) + compiler_panic(img->c, no_loc(), "link: oom on rela_dyn"); + dyn->nrela_dyn = 0; + + Slice interp_s = pool_slice(l->c->global, dyn->interp_path); + const char* interp_str = interp_s.s; + size_t namelen = interp_s.len; + u64 interp_bytes = (u64)namelen + 1u; + u64 dynsym_bytes = (u64)dyn->ndynsym * ELF64_SYM_SIZE; + u64 dynstr_bytes = (u64)dyn->dynstr_len; + u64 gnuhash_bytes = (u64)dyn->gnu_hash_len; + /* rela.dyn / rela.plt sized for full capacity; emit only writes + * what's populated, but the section's file_size matches capacity + * so PT_LOAD/.rela.dyn shdr sh_size add up. Trailing zero records + * are harmless to the loader (R_AARCH64_NONE). */ + u64 rela_dyn_bytes = (u64)dyn->cap_rela_dyn * ELF64_RELA_SIZE; + u64 rela_plt_bytes = (u64)dyn->nrela_plt * ELF64_RELA_SIZE; + u64 plt_bytes = + (u64)(imports.nfuncs + ? arch->plt0_size + arch->plt_entry_size * imports.nfuncs + : 0u); + u64 gotplt_bytes = (u64)(imports.nfuncs ? 8u * (3u + imports.nfuncs) : 0u); + dyn->ndyn_entries = count_dynamic_entries(dyn); + u64 dynamic_bytes = (u64)dyn->ndyn_entries * ELF64_DYN_SIZE; + + /* Step 5: place segments, page-aligned after the existing image + * span. New segments: + * ro_seg (PF_R) — .interp + .dynsym + .dynstr + .gnu.hash + + * .rela.dyn + .rela.plt + * rx_seg (PF_R+X)— .plt (only when imports.nfuncs > 0) + * rw_seg (PF_R+W)— .got.plt + .dynamic + * + * .dynamic lives in rw_seg because glibc's loader patches DT_* + * d_un.d_ptr fields in-place at startup (elf_get_dynamic_info + * adjusts STRTAB/SYMTAB/etc. by l_addr); a PF_R-only segment + * causes SEGV_ACCERR. musl's loader doesn't do this rewrite, but + * the RW placement is conventional and works for both. + */ + page = 0x4000u; /* keep aligned with layout_page_size default */ + { + /* Read the page size from layout_page_size by re-using the + * configured execmem if present — duplicates the helper rather + * than expose it; the value is only used for alignment. */ + const CfreeExecMem* m = (l && l->jit_host) ? l->jit_host->execmem : NULL; + if (m && m->page_size) page = (u64)m->page_size; + } + + u64 base_vaddr = 0; + u32 i; + for (i = 0; i < img->nsegments; ++i) { + u64 end = img->segments[i].vaddr + img->segments[i].mem_size; + if (end > base_vaddr) base_vaddr = end; + } + base_vaddr = ALIGN_UP(base_vaddr, page); + + /* Pack ro section offsets (relative to ro_seg.vaddr). 8-byte + * alignment for tables; 4-byte for .interp string. */ + u64 off = 0; + u64 interp_off = off; + off = ALIGN_UP(off + interp_bytes, 8u); + u64 dynsym_off = off; + off = ALIGN_UP(off + dynsym_bytes, 8u); + u64 dynstr_off = off; + off = ALIGN_UP(off + dynstr_bytes, 8u); + u64 gnuhash_off = off; + off = ALIGN_UP(off + gnuhash_bytes, 8u); + u64 rela_dyn_off = off; + off = ALIGN_UP(off + rela_dyn_bytes, 8u); + u64 rela_plt_off = off; + off = ALIGN_UP(off + rela_plt_bytes, 8u); + u64 ro_seg_size = off; + + /* When no PLT is needed, suppress the RX/.plt segment entirely. */ + int has_plt = imports.nfuncs > 0; + + /* Pack rw_seg offsets: .got.plt (when has_plt) followed by .dynamic. */ + u64 rw_off = 0; + u64 gotplt_off = rw_off; + if (has_plt) rw_off = ALIGN_UP(rw_off + gotplt_bytes, 8u); + u64 dynamic_off = rw_off; + rw_off = ALIGN_UP(rw_off + dynamic_bytes, 8u); + u64 rw_seg_size = rw_off; + + u64 ro_vaddr = base_vaddr; + u64 rx_vaddr = ALIGN_UP(ro_vaddr + ro_seg_size, page); + u64 rw_vaddr = ALIGN_UP(rx_vaddr + (has_plt ? plt_bytes : 0u), page); + + /* rw_seg always exists (it carries .dynamic). */ + u32 nseg = 2u + (has_plt ? 1u : 0u); + u32 seg_base = dyn_alloc_segments(img, nseg); + u32 ro_seg_idx = seg_base + 0u; + u32 rx_seg_idx = has_plt ? seg_base + 1u : 0u; + u32 rw_seg_idx = seg_base + (has_plt ? 2u : 1u); + + LinkSegment* ro_seg = &img->segments[ro_seg_idx]; + memset(ro_seg, 0, sizeof(*ro_seg)); + ro_seg->id = (LinkSegmentId)(ro_seg_idx + 1u); + ro_seg->flags = SF_ALLOC; /* PF_R */ + ro_seg->file_offset = ro_vaddr; + ro_seg->vaddr = ro_vaddr; + ro_seg->file_size = ro_seg_size; + ro_seg->mem_size = ro_seg_size; + ro_seg->align = (u32)page; + ro_seg->nsections = 6; + img->segment_bytes[ro_seg_idx] = + ro_seg_size ? (u8*)h->alloc(h, (size_t)ro_seg_size, 16) : NULL; + img->segment_bytes_cap[ro_seg_idx] = (size_t)ro_seg_size; + if (ro_seg_size && !img->segment_bytes[ro_seg_idx]) + compiler_panic(img->c, no_loc(), "link: oom on ro dyn segment"); + if (ro_seg_size) + memset(img->segment_bytes[ro_seg_idx], 0, (size_t)ro_seg_size); + + if (has_plt) { + LinkSegment* rx_seg = &img->segments[rx_seg_idx]; + memset(rx_seg, 0, sizeof(*rx_seg)); + rx_seg->id = (LinkSegmentId)(rx_seg_idx + 1u); + rx_seg->flags = SF_ALLOC | SF_EXEC; + rx_seg->file_offset = rx_vaddr; + rx_seg->vaddr = rx_vaddr; + rx_seg->file_size = plt_bytes; + rx_seg->mem_size = plt_bytes; + rx_seg->align = (u32)page; + rx_seg->nsections = 1; + img->segment_bytes[rx_seg_idx] = (u8*)h->alloc(h, (size_t)plt_bytes, 16); + img->segment_bytes_cap[rx_seg_idx] = (size_t)plt_bytes; + if (!img->segment_bytes[rx_seg_idx]) + compiler_panic(img->c, no_loc(), "link: oom on .plt segment"); + memset(img->segment_bytes[rx_seg_idx], 0, (size_t)plt_bytes); + /* Stash plt / got.plt vaddrs now — the PLT body emit just below + * reads them, and the post-shift fixup in shift_image_addresses + * (link_elf.c) keys on these fields too. */ + dyn->plt_vaddr = rx_vaddr; + dyn->plt_size = plt_bytes; + dyn->got_plt_vaddr = rw_vaddr; + dyn->got_plt_size = gotplt_bytes; + /* PLT body emit: the descriptor owns the psABI-specific bytes. */ + if (!arch->emit_plt0 || !arch->emit_plt_entry) + compiler_panic(l->c, no_loc(), "link: PLT emit not configured"); + { + u8* plt_b = img->segment_bytes[rx_seg_idx]; + u32 ki; + arch->emit_plt0(plt_b, dyn->plt_vaddr, dyn->got_plt_vaddr); + for (ki = 0; ki < imports.nfuncs; ++ki) { + u64 entry_vaddr = dyn->plt_vaddr + arch->plt0_size + + (u64)arch->plt_entry_size * (u64)ki; + u64 slot_vaddr = dyn->got_plt_vaddr + 8u * (3u + ki); + u8* p = + plt_b + arch->plt0_size + (size_t)arch->plt_entry_size * (size_t)ki; + arch->emit_plt_entry(p, entry_vaddr, slot_vaddr); + } + } + } + /* rw_seg always exists — it carries .dynamic, plus .got.plt when + * imports are present. */ + { + LinkSegment* rw_seg = &img->segments[rw_seg_idx]; + memset(rw_seg, 0, sizeof(*rw_seg)); + rw_seg->id = (LinkSegmentId)(rw_seg_idx + 1u); + rw_seg->flags = SF_ALLOC | SF_WRITE; + rw_seg->file_offset = rw_vaddr; + rw_seg->vaddr = rw_vaddr; + rw_seg->file_size = rw_seg_size; + rw_seg->mem_size = rw_seg_size; + rw_seg->align = (u32)page; + rw_seg->nsections = has_plt ? 2u : 1u; + img->segment_bytes[rw_seg_idx] = (u8*)h->alloc(h, (size_t)rw_seg_size, 16); + img->segment_bytes_cap[rw_seg_idx] = (size_t)rw_seg_size; + if (!img->segment_bytes[rw_seg_idx]) + compiler_panic(img->c, no_loc(), "link: oom on rw dyn segment"); + /* Zero-initialize. .got.plt[0] (&.dynamic) is filled later, after + * shift_image_addresses has bumped dyn->dynamic_vaddr. .dynamic + * body is built post-shift in link_emit_elf. Loader + * patches all .got.plt slots from .rela.plt before user code + * under DF_1_NOW. */ + memset(img->segment_bytes[rw_seg_idx], 0, (size_t)rw_seg_size); + } + img->nsegments += nseg; + + /* Step 6: synthetic LinkSection entries. Order in img->sections + * matches the loader-friendly file order and feeds emit's + * outshdr-merge pass. */ + u32 nsec = 7u + (has_plt ? 2u : 0u); + u32 sec_base = dyn_alloc_sections(img, nsec); + + /* helper: populate a fresh LinkSection for a segment-internal range */ + /* Inline because the args differ enough (sem, name) per slot. */ + Sym name_interp = pool_intern_slice(l->c->global, SLICE_LIT(".interp")); + Sym name_dynsym = pool_intern_slice(l->c->global, SLICE_LIT(".dynsym")); + Sym name_dynstr = pool_intern_slice(l->c->global, SLICE_LIT(".dynstr")); + Sym name_gnu_hash = pool_intern_slice(l->c->global, SLICE_LIT(".gnu.hash")); + Sym name_rela_dyn = pool_intern_slice(l->c->global, SLICE_LIT(".rela.dyn")); + Sym name_rela_plt = pool_intern_slice(l->c->global, SLICE_LIT(".rela.plt")); + Sym name_dynamic = pool_intern_slice(l->c->global, SLICE_LIT(".dynamic")); + Sym name_plt = pool_intern_slice(l->c->global, SLICE_LIT(".plt")); + Sym name_got_plt = pool_intern_slice(l->c->global, SLICE_LIT(".got.plt")); + +#define INIT_SEC(IDX, NAME, SEG_IDX, OFF_IN_SEG, SIZE, ALIGN, FLAGS, SEM) \ + do { \ + LinkSection* ls = &img->sections[sec_base + (IDX)]; \ + memset(ls, 0, sizeof(*ls)); \ + ls->id = (LinkSectionId)(sec_base + (IDX) + 1u); \ + ls->input_id = LINK_INPUT_NONE; \ + ls->obj_section_id = OBJ_SEC_NONE; \ + ls->segment_id = img->segments[(SEG_IDX)].id; \ + ls->input_offset = (OFF_IN_SEG); \ + ls->file_offset = img->segments[(SEG_IDX)].file_offset + (OFF_IN_SEG); \ + ls->vaddr = img->segments[(SEG_IDX)].vaddr + (OFF_IN_SEG); \ + ls->size = (SIZE); \ + ls->flags = (FLAGS); \ + ls->align = (ALIGN); \ + ls->name = (NAME); \ + ls->sem = (SEM); \ + } while (0) + + INIT_SEC(0, name_interp, ro_seg_idx, interp_off, interp_bytes, 1, SF_ALLOC, + SSEM_PROGBITS); + INIT_SEC(1, name_dynsym, ro_seg_idx, dynsym_off, dynsym_bytes, 8, SF_ALLOC, + SSEM_PROGBITS); + INIT_SEC(2, name_dynstr, ro_seg_idx, dynstr_off, dynstr_bytes, 1, SF_ALLOC, + SSEM_PROGBITS); + INIT_SEC(3, name_gnu_hash, ro_seg_idx, gnuhash_off, gnuhash_bytes, 8, + SF_ALLOC, SSEM_PROGBITS); + INIT_SEC(4, name_rela_dyn, ro_seg_idx, rela_dyn_off, rela_dyn_bytes, 8, + SF_ALLOC, SSEM_PROGBITS); + INIT_SEC(5, name_rela_plt, ro_seg_idx, rela_plt_off, rela_plt_bytes, 8, + SF_ALLOC, SSEM_PROGBITS); + INIT_SEC(6, name_dynamic, rw_seg_idx, dynamic_off, dynamic_bytes, 8, + SF_ALLOC | SF_WRITE, SSEM_PROGBITS); + + dyn->sec_interp = (LinkSectionId)(sec_base + 0 + 1u); + dyn->sec_dynsym = (LinkSectionId)(sec_base + 1 + 1u); + dyn->sec_dynstr = (LinkSectionId)(sec_base + 2 + 1u); + dyn->sec_gnu_hash = (LinkSectionId)(sec_base + 3 + 1u); + dyn->sec_rela_dyn = (LinkSectionId)(sec_base + 4 + 1u); + dyn->sec_rela_plt = (LinkSectionId)(sec_base + 5 + 1u); + dyn->sec_dynamic = (LinkSectionId)(sec_base + 6 + 1u); + dyn->dynamic_vaddr = img->segments[rw_seg_idx].vaddr + dynamic_off; + dyn->dynamic_size = dynamic_bytes; + + if (has_plt) { + INIT_SEC(7, name_plt, rx_seg_idx, 0, plt_bytes, 16, SF_ALLOC | SF_EXEC, + SSEM_PROGBITS); + INIT_SEC(8, name_got_plt, rw_seg_idx, gotplt_off, gotplt_bytes, 8, + SF_ALLOC | SF_WRITE, SSEM_PROGBITS); + dyn->sec_plt = (LinkSectionId)(sec_base + 7 + 1u); + dyn->sec_got_plt = (LinkSectionId)(sec_base + 8 + 1u); + } +#undef INIT_SEC + + img->nsections += nsec; + + /* Step 7: copy .interp / .dynsym / .dynstr / .gnu.hash bytes into + * the ro segment. .dynamic body is built during emit (it embeds + * runtime vaddrs that PIE keeps image-relative; emit just reads + * the section ids' final vaddrs). */ + u8* ro_bytes = img->segment_bytes[ro_seg_idx]; + + /* .interp */ + if (interp_bytes && ro_bytes) + memcpy(ro_bytes + interp_off, interp_str, (size_t)interp_bytes); + + /* .dynsym: serialize DynSymRec to ELF64 wire layout. */ + { + u32 si; + for (si = 0; si < dyn->ndynsym; ++si) { + u8* p = ro_bytes + dynsym_off + (u64)si * ELF64_SYM_SIZE; + const DynSymRec* r = &dyn->dynsym[si]; + wr_u32_le(p + 0, r->st_name); + p[4] = r->st_info; + p[5] = r->st_other; + wr_u16_le(p + 6, r->st_shndx); + wr_u64_le(p + 8, r->st_value); + wr_u64_le(p + 16, r->st_size); + } + } + + /* .dynstr */ + if (dynstr_bytes && ro_bytes && dyn->dynstr) + memcpy(ro_bytes + dynstr_off, dyn->dynstr, dyn->dynstr_len); + + /* .gnu.hash */ + if (gnuhash_bytes && ro_bytes && dyn->gnu_hash) + memcpy(ro_bytes + gnuhash_off, dyn->gnu_hash, dyn->gnu_hash_len); + + /* .rela.plt: emit JUMP_SLOT records, one per imported function, and + * stash each import's PLT-entry vaddr in `sym_plt_vaddr` so the + * apply pass can redirect CALL26/JUMP26 against the import. The + * record's r_offset addresses the .got.plt slot the PLT stub reads + * through; the loader patches that slot to the resolved runtime + * address before user code runs (DF_1_NOW, BIND_NOW). Bytes are + * written here at pre-shift vaddrs; link_emit re-serializes them + * after shift_image_addresses bumps the dyn vaddrs by headers_load. */ + { + u32 ki; + for (ki = 0; ki < imports.nfuncs; ++ki) { + LinkSymId lsid = imports.funcs[ki]; + u32 dynidx = dyn->sym_dynidx[lsid]; + u64 slot_vaddr = dyn->got_plt_vaddr + 8u * (3u + ki); + u64 plt_entry_vaddr = dyn->plt_vaddr + arch->plt0_size + + (u64)arch->plt_entry_size * (u64)ki; + DynRela* r = &dyn->rela_plt[ki]; + r->r_offset = slot_vaddr; + r->r_info = ELF64_R_INFO((u64)dynidx, elf_arch->r_jump_slot); + r->r_addend = 0; + /* Serialize into segment bytes (will be re-serialized post-shift). */ + u8* p = ro_bytes + rela_plt_off + (u64)ki * ELF64_RELA_SIZE; + wr_u64_le(p + 0, r->r_offset); + wr_u64_le(p + 8, r->r_info); + wr_u64_le(p + 16, (u64)r->r_addend); + /* sym_plt_vaddr is consulted by apply_all_relocs. */ + dyn->sym_plt_vaddr[lsid] = plt_entry_vaddr; + } + } + + /* .rela.dyn entries (GLOB_DAT for imports referenced via .got, and + * RELATIVE for PIE internal abs fixups) are emitted by + * apply_all_relocs as it walks every relocation. layout_dyn + * leaves .rela.dyn empty here; the bytes are written post-shift in + * link_emit_elf. */ + + /* .got.plt prelude: for BIND_NOW we leave the body zero — the + * loader patches every slot from .rela.plt before user code. Some + * loaders still inspect slot 0 (&.dynamic) at startup; provide it + * so glibc-style loaders don't fault. The loader writes the link_map + * cookie into slot 1 at load time. */ + if (has_plt) { + u8* gp_bytes = img->segment_bytes[rw_seg_idx]; + if (gp_bytes && gotplt_bytes >= 8u) { + wr_u64_le(gp_bytes, dyn->dynamic_vaddr); + /* Slots 1, 2, and per-PLT slots stay zero until the loader + * fills them. Phase 5 would prefill the per-PLT slots with + * the address of PLT0 to support lazy binding. */ + } + } + + /* The .dynamic body is built later, after segment shifts are + * applied during emit (link_elf.c). emit_dynamic_body takes the + * post-shift vaddrs of every other dyn section and writes one + * DT_* entry per index. */ + + /* Synthesize linker-defined symbols that reference the .dynamic + * vaddr. Scrt1.o on Linux loads `_DYNAMIC` via ADRP+ADD, and + * libc_nonshared.a's atexit shim takes `__dso_handle` as the + * per-image identity (we use the .dynamic vaddr — any stable + * per-image address satisfies the contract since the shim only + * passes it through to __cxa_atexit, which the program-side glibc + * just stashes). */ + link_define_boundary(l, img, "_DYNAMIC", dyn->dynamic_vaddr); + link_define_boundary(l, img, "__dso_handle", dyn->dynamic_vaddr); + + free_imports(h, &imports); +} + +/* ---- cleanup ---- */ + +void link_dyn_state_free(LinkImage* img) { + Heap* h = img->heap; + LinkDynState* dyn = img->dyn; + if (!dyn) return; + if (dyn->dynsym) h->free(h, dyn->dynsym, sizeof(*dyn->dynsym) * dyn->ndynsym); + if (dyn->dynstr) h->free(h, dyn->dynstr, dyn->dynstr_len); + if (dyn->gnu_hash) h->free(h, dyn->gnu_hash, dyn->gnu_hash_len); + if (dyn->rela_dyn) + h->free(h, dyn->rela_dyn, sizeof(*dyn->rela_dyn) * dyn->cap_rela_dyn); + if (dyn->rela_plt) + h->free(h, dyn->rela_plt, sizeof(*dyn->rela_plt) * dyn->nrela_plt); + if (dyn->needed) h->free(h, dyn->needed, sizeof(*dyn->needed) * dyn->nneeded); + if (dyn->sym_dynidx) + h->free(h, dyn->sym_dynidx, + sizeof(*dyn->sym_dynidx) * dyn->sym_dynidx_size); + if (dyn->sym_plt_vaddr) + h->free(h, dyn->sym_plt_vaddr, + sizeof(*dyn->sym_plt_vaddr) * dyn->sym_dynidx_size); + h->free(h, dyn, sizeof(*dyn)); + img->dyn = NULL; +} diff --git a/src/obj/elf/read.c b/src/obj/elf/read.c @@ -0,0 +1,694 @@ +/* ELF ET_REL reader. Parses a 64-bit little-endian relocatable object + * back into a fresh ObjBuilder. The post-finalize ObjBuilder shape is + * the canonical superset doc/DESIGN.md §5.5 promises: read_elf of an + * emit_elf output produces an ObjBuilder equivalent to the writer's + * input, modulo (a) section ordering and (b) STT_SECTION symbols + * synthesized by the writer. + * + * Scope: AArch64 little-endian. Other archs / endianness produce a + * compiler_panic with a diagnostic. */ + +#include <string.h> + +#include "core/heap.h" +#include "core/pool.h" +#include "core/slice.h" +#include "obj/elf/elf.h" +#include "obj/format.h" + +static SrcLoc no_loc(void) { + SrcLoc l = {0, 0, 0}; + return l; +} + +/* ---- shdr scratch struct ---- */ + +typedef struct ShdrRec { + u32 sh_name; + u32 sh_type; + u64 sh_flags; + u64 sh_addr; + u64 sh_offset; + u64 sh_size; + u32 sh_link; + u32 sh_info; + u64 sh_addralign; + u64 sh_entsize; +} ShdrRec; + +static void parse_shdr(const u8* p, ShdrRec* out) { + out->sh_name = elf_rd_u32(p + 0); + out->sh_type = elf_rd_u32(p + 4); + out->sh_flags = elf_rd_u64(p + 8); + out->sh_addr = elf_rd_u64(p + 16); + out->sh_offset = elf_rd_u64(p + 24); + out->sh_size = elf_rd_u64(p + 32); + out->sh_link = elf_rd_u32(p + 40); + out->sh_info = elf_rd_u32(p + 44); + out->sh_addralign = elf_rd_u64(p + 48); + out->sh_entsize = elf_rd_u64(p + 56); +} + +/* ---- mappers ---- */ + +/* The bits this function maps to SecFlag — anything outside this mask is + * treated as opaque and stashed in Section.ext_flags by the caller so the + * emitter can write it back unchanged. Examples of bits left over: + * SHF_EXCLUDE (0x80000000) on .llvm_addrsig, SHF_COMPRESSED (0x800) on + * compressed .debug_*, SHF_INFO_LINK (0x40) on .rela.* sections. */ +#define ELF_KNOWN_FLAGS_MASK \ + ((u64)(SHF_ALLOC | SHF_EXECINSTR | SHF_WRITE | SHF_TLS | SHF_MERGE | \ + SHF_STRINGS | SHF_GROUP | SHF_LINK_ORDER | SHF_GNU_RETAIN)) + +static u16 elf_flags_to_obj(u64 f) { + u16 r = 0; + if (f & SHF_ALLOC) r |= SF_ALLOC; + if (f & SHF_EXECINSTR) r |= SF_EXEC; + if (f & SHF_WRITE) r |= SF_WRITE; + if (f & SHF_TLS) r |= SF_TLS; + if (f & SHF_MERGE) r |= SF_MERGE; + if (f & SHF_STRINGS) r |= SF_STRINGS; + if (f & SHF_GROUP) r |= SF_GROUP; + if (f & SHF_LINK_ORDER) r |= SF_LINK_ORDER; + if (f & SHF_GNU_RETAIN) r |= SF_RETAIN; + return r; +} + +/* Map ELF sh_type -> SecSem. Sets *known to 1 if the value is one of + * the canonical types the cfree model knows about; 0 means the caller + * fell through to the SSEM_PROGBITS fallback and should preserve the + * raw sh_type via Section.ext_type so emit_elf can write it back. */ +static u16 elf_type_to_sem(u32 t, int* known) { + *known = 1; + switch (t) { + case SHT_PROGBITS: + return SSEM_PROGBITS; + case SHT_NOBITS: + return SSEM_NOBITS; + case SHT_SYMTAB: + return SSEM_SYMTAB; + case SHT_STRTAB: + return SSEM_STRTAB; + case SHT_RELA: + return SSEM_RELA; + case SHT_REL: + return SSEM_REL; + case SHT_NOTE: + return SSEM_NOTE; + case SHT_INIT_ARRAY: + return SSEM_INIT_ARRAY; + case SHT_FINI_ARRAY: + return SSEM_FINI_ARRAY; + case SHT_PREINIT_ARRAY: + return SSEM_PREINIT_ARRAY; + case SHT_GROUP: + return SSEM_GROUP; + default: + *known = 0; + return SSEM_PROGBITS; + } +} + +static u16 elf_kind_from_name(const char* name, u32 nlen, u64 sh_flags, + u32 sh_type) { + if (sh_type == SHT_NOBITS) return SEC_BSS; + if (nlen >= 5 && memcmp(name, ".text", 5) == 0) return SEC_TEXT; + if (nlen >= 7 && memcmp(name, ".rodata", 7) == 0) return SEC_RODATA; + if (nlen >= 5 && memcmp(name, ".data", 5) == 0) return SEC_DATA; + if (nlen >= 4 && memcmp(name, ".bss", 4) == 0) return SEC_BSS; + if (nlen >= 7 && memcmp(name, ".debug_", 7) == 0) return SEC_DEBUG; + /* Fallback: classify by flags. */ + if (sh_flags & SHF_EXECINSTR) return SEC_TEXT; + if (sh_flags & SHF_WRITE) return SEC_DATA; + if (sh_flags & SHF_ALLOC) return SEC_RODATA; + return SEC_OTHER; +} + +static u16 elf_bind_to_obj(u32 b) { + switch (b) { + case STB_GLOBAL: + return SB_GLOBAL; + case STB_WEAK: + return SB_WEAK; + default: + return SB_LOCAL; + } +} + +static u16 elf_type_to_kind(u32 t, u16 shndx) { + if (shndx == SHN_UNDEF) return SK_UNDEF; + if (shndx == SHN_COMMON) return SK_COMMON; + /* SHN_ABS is the convention for STT_FILE and a few other defined + * symbols whose value is not an address. Don't smother the type + * with SK_ABS when the type field carries real information — only + * fall through to SK_ABS for STT_NOTYPE-at-SHN_ABS. */ + if (shndx == SHN_ABS && t == STT_NOTYPE) return SK_ABS; + switch (t) { + case STT_FUNC: + return SK_FUNC; + case STT_OBJECT: + return SK_OBJ; + case STT_SECTION: + return SK_SECTION; + case STT_FILE: + return SK_FILE; + case STT_TLS: + return SK_TLS; + case STT_COMMON: + return SK_COMMON; + case STT_GNU_IFUNC: + return SK_IFUNC; + default: + /* STT_NOTYPE on a defined symbol (e.g. AArch64 mapping symbols + * `$x` / `$d`, or assembly labels) round-trips as SK_NOTYPE. + * The linker keeps definedness keyed on SK_UNDEF; SK_NOTYPE is + * "defined but typeless". */ + return SK_NOTYPE; + } +} + +static u8 elf_other_to_vis(u32 other) { + switch (other & 3) { + case STV_HIDDEN: + return SV_HIDDEN; + case STV_PROTECTED: + return SV_PROTECTED; + case STV_INTERNAL: + return SV_INTERNAL; + default: + return SV_DEFAULT; + } +} + +/* Bounds-checked C-string slice from a strtab section. Returns "" on + * out-of-range so callers don't have to special-case it. `len_out` is + * set to the result's byte length. */ +static const char* strtab_lookup(const u8* tab, u64 tab_size, u32 off, + u32* len_out) { + if (off >= tab_size) { + *len_out = 0; + return ""; + } + const char* s = (const char*)(tab + off); + u32 max = (u32)(tab_size - off); + u32 n = 0; + while (n < max && s[n] != '\0') ++n; + *len_out = n; + return s; +} + +ObjBuilder* read_elf(Compiler* c, const char* name, const u8* data, + size_t len) { + (void)name; + + if (len < ELF64_EHDR_SIZE) + compiler_panic(c, no_loc(), "read_elf: input shorter than ELF header"); + + if (data[EI_MAG0] != ELFMAG0 || data[EI_MAG1] != ELFMAG1 || + data[EI_MAG2] != ELFMAG2 || data[EI_MAG3] != ELFMAG3) + compiler_panic(c, no_loc(), "read_elf: bad ELF magic"); + + if (data[EI_CLASS] != ELFCLASS64) + compiler_panic(c, no_loc(), "read_elf: not ELFCLASS64 (got %u)", + data[EI_CLASS]); + if (data[EI_DATA] != ELFDATA2LSB) + compiler_panic(c, no_loc(), "read_elf: not ELFDATA2LSB (got %u)", + data[EI_DATA]); + + u16 e_type = elf_rd_u16(data + 16); + if (e_type != ET_REL) + compiler_panic( + c, no_loc(), + "read_elf: only ET_REL inputs are accepted by read_elf " + "(got e_type=%u); use read_elf_dso for ET_DYN shared objects", + (u32)e_type); + + u16 e_machine = elf_rd_u16(data + 18); + const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_ELF); + const ObjElfArchOps* arch = + fmt && fmt->elf_machine ? fmt->elf_machine(e_machine) : NULL; + u32 (*reloc_from)(u32); + if (!arch || !arch->reloc_from) { + compiler_panic(c, no_loc(), "read_elf: unsupported e_machine 0x%x", + (u32)e_machine); + } + reloc_from = arch->reloc_from; + + u64 e_shoff = elf_rd_u64(data + 40); + u32 e_flags = elf_rd_u32(data + 48); + u16 e_shentsize = elf_rd_u16(data + 58); + u16 e_shnum = elf_rd_u16(data + 60); + u16 e_shstrndx = elf_rd_u16(data + 62); + + if (e_shentsize != ELF64_SHDR_SIZE) + compiler_panic(c, no_loc(), "read_elf: unexpected e_shentsize %u", + (u32)e_shentsize); + if (e_shoff + (u64)e_shnum * ELF64_SHDR_SIZE > len) + compiler_panic(c, no_loc(), "read_elf: section header table out of range"); + if (e_shstrndx >= e_shnum) + compiler_panic(c, no_loc(), "read_elf: e_shstrndx %u >= e_shnum %u", + (u32)e_shstrndx, (u32)e_shnum); + + /* Parse all shdrs into scratch. */ + ShdrRec* shdrs = arena_array(c->scratch, ShdrRec, e_shnum); + for (u32 i = 0; i < e_shnum; ++i) + parse_shdr(data + e_shoff + (u64)i * ELF64_SHDR_SIZE, &shdrs[i]); + + const ShdrRec* shstr_sh = &shdrs[e_shstrndx]; + if (shstr_sh->sh_offset + shstr_sh->sh_size > len) + compiler_panic(c, no_loc(), "read_elf: .shstrtab out of range"); + const u8* shstrtab = data + shstr_sh->sh_offset; + u64 shstrtab_sz = shstr_sh->sh_size; + + /* Build the ObjBuilder. */ + ObjBuilder* ob = obj_new(c); + if (!ob) compiler_panic(c, no_loc(), "read_elf: obj_new failed"); + obj_set_elf_e_flags(ob, e_flags); + + /* elf_to_obj[shndx] -> ObjSecId, OBJ_SEC_NONE for skipped sections. */ + u32* elf_to_obj = arena_zarray(c->scratch, u32, e_shnum); + + /* Pass 1: create obj sections for every non-NULL shdr that carries + * load-bearing model state. SYMTAB / STRTAB / RELA / REL are + * consumed below for symbols and relocations and do NOT round-trip + * as obj sections — emit_elf re-synthesizes them from the + * ObjBuilder's symbols / strtab / relocs. The shstrtab is a STRTAB + * too, so it falls out the same way. */ + for (u32 i = 1; i < e_shnum; ++i) { + const ShdrRec* sh = &shdrs[i]; + if (sh->sh_type == SHT_NULL) continue; + if (sh->sh_type == SHT_SYMTAB) continue; + if (sh->sh_type == SHT_STRTAB) continue; + if (sh->sh_type == SHT_RELA) continue; + if (sh->sh_type == SHT_REL) continue; + /* SHT_GROUP is consumed below into an ObjGroup record (signature + * symbol + member ObjSecIds). emit_elf re-synthesizes the group + * section bytes from the ObjGroup, using current section indices + * — so the original section's raw body would be stale anyway. */ + if (sh->sh_type == SHT_GROUP) continue; + + u32 nlen; + const char* nm = strtab_lookup(shstrtab, shstrtab_sz, sh->sh_name, &nlen); + Sym sym = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}); + + u16 sec_kind = elf_kind_from_name(nm, nlen, sh->sh_flags, sh->sh_type); + int type_known; + u16 sec_sem = elf_type_to_sem(sh->sh_type, &type_known); + u16 flags = elf_flags_to_obj(sh->sh_flags); + u32 align = sh->sh_addralign ? (u32)sh->sh_addralign : 1; + + ObjSecId id = + obj_section_ex(ob, sym, (SecKind)sec_kind, (SecSem)sec_sem, flags, + align, (u32)sh->sh_entsize, sh->sh_link, sh->sh_info); + if (id == OBJ_SEC_NONE) + compiler_panic(c, no_loc(), "read_elf: obj_section_ex failed for '%.*s'", + SLICE_ARG(((Slice){.s = nm, .len = nlen}))); + elf_to_obj[i] = id; + + /* Preserve format-specific bits the canonical SecSem/SecFlag + * mapping can't represent so emit_elf can write them back + * verbatim. ext_type only set when the sh_type fell through + * to the "unknown" path. */ + u32 leftover = (u32)(sh->sh_flags & ~ELF_KNOWN_FLAGS_MASK); + if (!type_known || leftover) { + obj_section_set_ext(ob, id, OBJ_EXT_ELF, type_known ? 0 : sh->sh_type, + leftover); + } + + /* Body bytes. */ + if (sh->sh_type == SHT_NOBITS) { + obj_reserve_bss(ob, id, (u32)sh->sh_size, align); + } else if (sh->sh_size) { + if (sh->sh_offset + sh->sh_size > len) + compiler_panic(c, no_loc(), + "read_elf: section '%.*s' bytes out of range", + SLICE_ARG(((Slice){.s = nm, .len = nlen}))); + /* For SYMTAB/STRTAB/RELA we still copy the raw bytes — the + * post-finalize shape contract says these sections are + * present; emit_elf will regenerate them on re-emit, so the + * preserved bytes are informational rather than load-bearing. + */ + obj_write(ob, id, data + sh->sh_offset, (size_t)sh->sh_size); + } + } + + /* Pass 2: parse the .symtab into ObjSyms, building an + * elf_sym_idx -> ObjSymId table. There may be zero or one SYMTAB in + * an ET_REL; pick the first. */ + u32 symtab_shndx = 0; + for (u32 i = 1; i < e_shnum; ++i) { + if (shdrs[i].sh_type == SHT_SYMTAB) { + symtab_shndx = i; + break; + } + } + + u32 nsyms = 0; + u32* sym_elf_to_obj = NULL; + + if (symtab_shndx) { + const ShdrRec* sh = &shdrs[symtab_shndx]; + if (sh->sh_entsize != ELF64_SYM_SIZE) + compiler_panic(c, no_loc(), "read_elf: .symtab entsize %llu != %u", + (unsigned long long)sh->sh_entsize, (u32)ELF64_SYM_SIZE); + if (sh->sh_size % ELF64_SYM_SIZE) + compiler_panic(c, no_loc(), + "read_elf: .symtab size %llu not a multiple of %u", + (unsigned long long)sh->sh_size, (u32)ELF64_SYM_SIZE); + if (sh->sh_link >= e_shnum) + compiler_panic(c, no_loc(), "read_elf: .symtab sh_link %u out of range", + sh->sh_link); + const ShdrRec* str_sh = &shdrs[sh->sh_link]; + if (str_sh->sh_offset + str_sh->sh_size > len) + compiler_panic(c, no_loc(), "read_elf: .strtab out of range"); + const u8* strtab = data + str_sh->sh_offset; + u64 strtab_sz = str_sh->sh_size; + + nsyms = (u32)(sh->sh_size / ELF64_SYM_SIZE); + sym_elf_to_obj = arena_zarray(c->scratch, u32, nsyms ? nsyms : 1); + + const u8* base = data + sh->sh_offset; + for (u32 i = 1; i < nsyms; ++i) { /* skip index 0 */ + const u8* p = base + (u64)i * ELF64_SYM_SIZE; + u32 st_name = elf_rd_u32(p + 0); + u8 st_info = p[4]; + u8 st_other = p[5]; + u16 st_shndx = elf_rd_u16(p + 6); + u64 st_value = elf_rd_u64(p + 8); + u64 st_size = elf_rd_u64(p + 16); + + u32 nlen; + const char* nm = strtab_lookup(strtab, strtab_sz, st_name, &nlen); + Sym sn = nlen + ? pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}) + : 0; + + u32 e_bind = ELF64_ST_BIND(st_info); + u32 e_type = ELF64_ST_TYPE(st_info); + u16 bind = elf_bind_to_obj(e_bind); + u16 kind = elf_type_to_kind(e_type, st_shndx); + u8 vis = elf_other_to_vis(st_other); + + ObjSecId sec_id; + u64 value; + u64 cmnalign = 0; + if (st_shndx == SHN_UNDEF) { + sec_id = OBJ_SEC_NONE; + value = st_value; + } else if (st_shndx == SHN_ABS || st_shndx == SHN_COMMON) { + sec_id = OBJ_SEC_NONE; + value = st_value; + if (st_shndx == SHN_COMMON) cmnalign = st_value; + } else if (st_shndx < e_shnum) { + sec_id = elf_to_obj[st_shndx]; + value = st_value; + } else { + compiler_panic(c, no_loc(), "read_elf: symbol shndx %u out of range", + (u32)st_shndx); + sec_id = OBJ_SEC_NONE; + value = 0; /* unreachable */ + } + + ObjSymId id = + obj_symbol_ex(ob, sn, (SymBind)bind, (SymVis)vis, (SymKind)kind, + sec_id, value, st_size, cmnalign); + obj_sym_mark_referenced(ob, id); + sym_elf_to_obj[i] = id; + } + } + + /* Pass 3: parse each SHT_RELA / SHT_REL into ObjBuilder relocations + * targeting the section the rela header's sh_info points at. */ + for (u32 i = 1; i < e_shnum; ++i) { + const ShdrRec* sh = &shdrs[i]; + int is_rela = (sh->sh_type == SHT_RELA); + int is_rel = (sh->sh_type == SHT_REL); + if (!is_rela && !is_rel) continue; + + u32 entsize = is_rela ? ELF64_RELA_SIZE : 16; + if (sh->sh_entsize != entsize) + compiler_panic(c, no_loc(), "read_elf: rela entsize %llu != %u", + (unsigned long long)sh->sh_entsize, entsize); + if (sh->sh_info == 0 || sh->sh_info >= e_shnum) + compiler_panic(c, no_loc(), "read_elf: rela sh_info %u out of range", + sh->sh_info); + ObjSecId target = elf_to_obj[sh->sh_info]; + if (target == OBJ_SEC_NONE) continue; + + u32 nrec = (u32)(sh->sh_size / entsize); + const u8* base = data + sh->sh_offset; + for (u32 j = 0; j < nrec; ++j) { + const u8* p = base + (u64)j * entsize; + u64 r_offset = elf_rd_u64(p + 0); + u64 r_info = elf_rd_u64(p + 8); + i64 r_addend = is_rela ? (i64)elf_rd_u64(p + 16) : 0; + u32 esym = ELF64_R_SYM(r_info); + u32 etype = ELF64_R_TYPE(r_info); + + u32 kind = reloc_from(etype); + if (kind == (u32)-1) + compiler_panic(c, no_loc(), + "read_elf: unsupported reloc type %u for e_machine 0x%x", + etype, (u32)e_machine); + + ObjSymId target_sym = OBJ_SYM_NONE; + if (esym && sym_elf_to_obj && esym < nsyms) + target_sym = sym_elf_to_obj[esym]; + + obj_reloc_ex(ob, target, (u32)r_offset, (RelocKind)kind, target_sym, + r_addend, is_rela ? 1 : 0, 0); + } + } + + /* Pass 4: SHT_GROUP. Each GROUP section's body is a sequence of + * 4-byte LE indices: [flags, shndx, shndx, ...]. The signature is + * the symbol named by sh_link/sh_info convention (sh_link=symtab, + * sh_info=symbol index in that symtab). */ + for (u32 i = 1; i < e_shnum; ++i) { + const ShdrRec* sh = &shdrs[i]; + if (sh->sh_type != SHT_GROUP) continue; + + if (sh->sh_size < 4 || (sh->sh_size % 4)) continue; + const u8* p = data + sh->sh_offset; + u32 flags = elf_rd_u32(p); + u32 nm_len; + const char* gnm = + strtab_lookup(shstrtab, shstrtab_sz, sh->sh_name, &nm_len); + Sym gname = pool_intern_slice(c->global, (Slice){.s = gnm, .len = nm_len}); + + ObjSymId signature = OBJ_SYM_NONE; + if (sym_elf_to_obj && sh->sh_info < nsyms) + signature = sym_elf_to_obj[sh->sh_info]; + + ObjGroupId gid = obj_group(ob, gname, signature, flags); + u32 n = (u32)(sh->sh_size / 4) - 1; + for (u32 j = 0; j < n; ++j) { + u32 shndx = elf_rd_u32(p + 4 + j * 4); + if (shndx < e_shnum && elf_to_obj[shndx] != OBJ_SEC_NONE) + obj_group_add_section(ob, gid, elf_to_obj[shndx]); + } + } + + obj_finalize(ob); + return ob; +} + +/* ---- ET_DYN (shared object) reader ---- + * + * Produces an ObjBuilder containing only the DSO's exported symbols + * (parsed from .dynsym, not .symtab). The DSO's sections, relocations, + * and groups are skipped — DSOs contribute no bytes to the output + * image. The DT_SONAME (if any) is interned and returned via + * `*soname_out` so the caller can record DT_NEEDED at link time. + * + * Symbol shape: each defined dynsym entry produces an ObjSym whose + * (bind, kind, vis) match the source. `section_id` is OBJ_SEC_NONE — + * the symbol's value is its DSO-internal vaddr, not meaningful to the + * consuming linker, so we record `value=0`. The linker layer + * (resolve_undefs) only consults the name and the defined-ness flag. + * + * Undefined dynsym entries (st_shndx==SHN_UNDEF) are imports the DSO + * itself has against other libraries; they're not relevant to a + * consumer that's linking against this DSO and are dropped. */ + +static int parse_phdr(const u8* data, size_t len, u64 e_phoff, u16 e_phentsize, + u16 e_phnum, u32 want_type, u64* out_offset, + u64* out_filesz) { + u32 i; + if (e_phentsize != ELF64_PHDR_SIZE) return 0; + if (e_phoff + (u64)e_phnum * ELF64_PHDR_SIZE > len) return 0; + for (i = 0; i < e_phnum; ++i) { + const u8* p = data + e_phoff + (u64)i * ELF64_PHDR_SIZE; + u32 p_type = elf_rd_u32(p + 0); + if (p_type != want_type) continue; + *out_offset = elf_rd_u64(p + 8); + *out_filesz = elf_rd_u64(p + 32); + return 1; + } + return 0; +} + +ObjBuilder* read_elf_dso(Compiler* c, const char* name, const u8* data, + size_t len, Sym* soname_out) { + (void)name; + if (soname_out) *soname_out = 0; + + if (len < ELF64_EHDR_SIZE) + compiler_panic(c, no_loc(), "read_elf_dso: input shorter than ELF header"); + if (data[EI_MAG0] != ELFMAG0 || data[EI_MAG1] != ELFMAG1 || + data[EI_MAG2] != ELFMAG2 || data[EI_MAG3] != ELFMAG3) + compiler_panic(c, no_loc(), "read_elf_dso: bad ELF magic"); + if (data[EI_CLASS] != ELFCLASS64) + compiler_panic(c, no_loc(), "read_elf_dso: not ELFCLASS64"); + if (data[EI_DATA] != ELFDATA2LSB) + compiler_panic(c, no_loc(), "read_elf_dso: not ELFDATA2LSB"); + + u16 e_type = elf_rd_u16(data + 16); + if (e_type != ET_DYN) + compiler_panic(c, no_loc(), "read_elf_dso: expected ET_DYN, got e_type=%u", + (u32)e_type); + + u16 e_machine = elf_rd_u16(data + 18); + { + const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_ELF); + const ObjElfArchOps* arch = + fmt && fmt->elf_machine ? fmt->elf_machine(e_machine) : NULL; + if (!arch) + compiler_panic(c, no_loc(), "read_elf_dso: unsupported e_machine 0x%x", + (u32)e_machine); + } + + u64 e_phoff = elf_rd_u64(data + 32); + u64 e_shoff = elf_rd_u64(data + 40); + u16 e_phentsize = elf_rd_u16(data + 54); + u16 e_phnum = elf_rd_u16(data + 56); + u16 e_shentsize = elf_rd_u16(data + 58); + u16 e_shnum = elf_rd_u16(data + 60); + u16 e_shstrndx = elf_rd_u16(data + 62); + + if (e_shentsize != ELF64_SHDR_SIZE) + compiler_panic(c, no_loc(), "read_elf_dso: unexpected e_shentsize %u", + (u32)e_shentsize); + if (e_shoff + (u64)e_shnum * ELF64_SHDR_SIZE > len) + compiler_panic(c, no_loc(), + "read_elf_dso: section header table out of range"); + if (e_shstrndx >= e_shnum) + compiler_panic(c, no_loc(), "read_elf_dso: e_shstrndx out of range"); + + ShdrRec* shdrs = arena_array(c->scratch, ShdrRec, e_shnum); + for (u32 i = 0; i < e_shnum; ++i) + parse_shdr(data + e_shoff + (u64)i * ELF64_SHDR_SIZE, &shdrs[i]); + + /* Locate .dynsym (preferred over .symtab — a stripped DSO carries + * only .dynsym) and its associated strtab via sh_link. */ + u32 dynsym_idx = 0, dynamic_idx = 0; + for (u32 i = 1; i < e_shnum; ++i) { + if (shdrs[i].sh_type == SHT_DYNSYM && !dynsym_idx) dynsym_idx = i; + if (shdrs[i].sh_type == SHT_DYNAMIC && !dynamic_idx) dynamic_idx = i; + } + + if (!dynsym_idx) + compiler_panic(c, no_loc(), "read_elf_dso: no SHT_DYNSYM in shared object"); + + /* Parse PT_DYNAMIC for DT_SONAME. The .dynamic section gives us the + * dynstr to resolve the SONAME's offset; if there's no .dynamic + * section we fall back to scanning the PT_DYNAMIC segment. */ + Sym soname = 0; + if (dynamic_idx) { + const ShdrRec* dsh = &shdrs[dynamic_idx]; + if (dsh->sh_link >= e_shnum) + compiler_panic(c, no_loc(), + "read_elf_dso: .dynamic sh_link %u out of range", + dsh->sh_link); + const ShdrRec* str_sh = &shdrs[dsh->sh_link]; + if (str_sh->sh_offset + str_sh->sh_size > len) + compiler_panic(c, no_loc(), "read_elf_dso: .dynamic strtab out of range"); + const u8* dynstr = data + str_sh->sh_offset; + u64 dynstr_sz = str_sh->sh_size; + + if (dsh->sh_offset + dsh->sh_size > len) + compiler_panic(c, no_loc(), "read_elf_dso: .dynamic body out of range"); + const u8* dynp = data + dsh->sh_offset; + u64 dynsz = dsh->sh_size; + /* DT entries are 16 bytes: (d_tag: u64, d_un: u64). */ + for (u64 off = 0; off + 16 <= dynsz; off += 16) { + u64 tag = elf_rd_u64(dynp + off); + u64 val = elf_rd_u64(dynp + off + 8); + if (tag == DT_NULL) break; + if (tag == DT_SONAME) { + u32 nlen; + const char* nm = strtab_lookup(dynstr, dynstr_sz, (u32)val, &nlen); + if (nlen) + soname = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}); + break; + } + } + } else if (e_phnum) { + /* Fallback: walk PT_DYNAMIC straight from program headers. We + * only need DT_SONAME, so skip if we can't find a strtab pointer + * inline (DT_STRTAB carries a vaddr, not a file offset — stripped + * DSOs without SHT_DYNAMIC are exceedingly rare in practice). */ + u64 dyn_off, dyn_sz; + (void)parse_phdr(data, len, e_phoff, e_phentsize, e_phnum, PT_DYNAMIC, + &dyn_off, &dyn_sz); + } + if (soname_out) *soname_out = soname; + + /* Now parse .dynsym. */ + const ShdrRec* sh = &shdrs[dynsym_idx]; + if (sh->sh_entsize != ELF64_SYM_SIZE) + compiler_panic(c, no_loc(), "read_elf_dso: .dynsym entsize %llu != %u", + (unsigned long long)sh->sh_entsize, (u32)ELF64_SYM_SIZE); + if (sh->sh_size % ELF64_SYM_SIZE) + compiler_panic(c, no_loc(), + "read_elf_dso: .dynsym size not multiple of entry size"); + if (sh->sh_link >= e_shnum) + compiler_panic(c, no_loc(), "read_elf_dso: .dynsym sh_link out of range"); + const ShdrRec* str_sh = &shdrs[sh->sh_link]; + if (str_sh->sh_offset + str_sh->sh_size > len) + compiler_panic(c, no_loc(), "read_elf_dso: .dynstr out of range"); + const u8* strtab = data + str_sh->sh_offset; + u64 strtab_sz = str_sh->sh_size; + + ObjBuilder* ob = obj_new(c); + if (!ob) compiler_panic(c, no_loc(), "read_elf_dso: obj_new failed"); + + u32 nsyms = (u32)(sh->sh_size / ELF64_SYM_SIZE); + const u8* base = data + sh->sh_offset; + for (u32 i = 1; i < nsyms; ++i) { /* skip index 0 */ + const u8* p = base + (u64)i * ELF64_SYM_SIZE; + u32 st_name = elf_rd_u32(p + 0); + u8 st_info = p[4]; + u8 st_other = p[5]; + u16 st_shndx = elf_rd_u16(p + 6); + + /* Skip the DSO's own undefined imports — they don't satisfy any + * undef in our consumer. Locals (STB_LOCAL) likewise aren't + * exported and would only confuse the resolver. */ + if (st_shndx == SHN_UNDEF) continue; + u32 e_bind = ELF64_ST_BIND(st_info); + if (e_bind == STB_LOCAL) continue; + + u32 nlen; + const char* nm = strtab_lookup(strtab, strtab_sz, st_name, &nlen); + if (!nlen) continue; + Sym sn = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}); + + u32 e_type_field = ELF64_ST_TYPE(st_info); + u16 bind = elf_bind_to_obj(e_bind); + u16 kind = elf_type_to_kind(e_type_field, st_shndx); + u8 vis = elf_other_to_vis(st_other); + + /* DSO exports land as defined symbols in OBJ_SEC_NONE with + * value=0. The consumer treats them as imports — see + * resolve_undefs in src/link/link_layout.c. */ + { + ObjSymId did = obj_symbol_ex(ob, sn, (SymBind)bind, (SymVis)vis, + (SymKind)kind, OBJ_SEC_NONE, 0, 0, 0); + obj_sym_mark_referenced(ob, did); + } + } + + obj_finalize(ob); + return ob; +} diff --git a/src/obj/elf/reloc_aarch64.c b/src/obj/elf/reloc_aarch64.c @@ -0,0 +1,182 @@ +/* RelocKind <-> AArch64 ELF reloc-type mapping. + * + * Cfree's RelocKind enum is arch-agnostic at its top (R_ABS, R_REL, R_PC + * variants) and arch-specific in its lower entries. On AArch64, R_REL and + * R_PC collapse to ELF_R_AARCH64_PREL32 / ELF_R_AARCH64_PREL64 — both + * mean "PC-relative relative to the symbol" once the linker has resolved + * final addresses. + * + * Returning 0 (ELF_R_AARCH64_NONE) for an unsupported kind is the signal + * to the caller to either panic (emit) or panic (read with diagnostic). */ + +#include "obj/elf/elf.h" + +u32 elf_aarch64_reloc_to(u32 kind /* RelocKind */) { + switch (kind) { + case R_NONE: + return ELF_R_AARCH64_NONE; + case R_ABS64: + return ELF_R_AARCH64_ABS64; + case R_ABS32: + return ELF_R_AARCH64_ABS32; + case R_PC64: + return ELF_R_AARCH64_PREL64; + case R_PC32: + return ELF_R_AARCH64_PREL32; + case R_REL64: + return ELF_R_AARCH64_PREL64; + case R_REL32: + return ELF_R_AARCH64_PREL32; + case R_AARCH64_JUMP26: + return ELF_R_AARCH64_JUMP26; + case R_AARCH64_CALL26: + return ELF_R_AARCH64_CALL26; + case R_AARCH64_CONDBR19: + return ELF_R_AARCH64_CONDBR19; + case R_AARCH64_TSTBR14: + return ELF_R_AARCH64_TSTBR14; + case R_AARCH64_LD_PREL_LO19: + return ELF_R_AARCH64_LD_PREL_LO19; + case R_AARCH64_ADR_PREL_LO21: + return ELF_R_AARCH64_ADR_PREL_LO21; + case R_AARCH64_ADR_PREL_PG_HI21: + return ELF_R_AARCH64_ADR_PREL_PG_HI21; + case R_AARCH64_ADR_PREL_PG_HI21_NC: + return ELF_R_AARCH64_ADR_PREL_PG_HI21_NC; + case R_AARCH64_ADD_ABS_LO12_NC: + return ELF_R_AARCH64_ADD_ABS_LO12_NC; + case R_AARCH64_ABS16: + return ELF_R_AARCH64_ABS16; + case R_AARCH64_PREL16: + return ELF_R_AARCH64_PREL16; + case R_AARCH64_LDST8_ABS_LO12_NC: + return ELF_R_AARCH64_LDST8_ABS_LO12_NC; + case R_AARCH64_LDST16_ABS_LO12_NC: + return ELF_R_AARCH64_LDST16_ABS_LO12_NC; + case R_AARCH64_LDST32_ABS_LO12_NC: + return ELF_R_AARCH64_LDST32_ABS_LO12_NC; + case R_AARCH64_LDST64_ABS_LO12_NC: + return ELF_R_AARCH64_LDST64_ABS_LO12_NC; + case R_AARCH64_LDST128_ABS_LO12_NC: + return ELF_R_AARCH64_LDST128_ABS_LO12_NC; + case R_AARCH64_ADR_GOT_PAGE: + return ELF_R_AARCH64_ADR_GOT_PAGE; + case R_AARCH64_LD64_GOT_LO12_NC: + return ELF_R_AARCH64_LD64_GOT_LO12_NC; + case R_AARCH64_TLSLE_ADD_TPREL_HI12: + return ELF_R_AARCH64_TLSLE_ADD_TPREL_HI12; + case R_AARCH64_TLSLE_ADD_TPREL_LO12: + return ELF_R_AARCH64_TLSLE_ADD_TPREL_LO12; + case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: + return ELF_R_AARCH64_TLSLE_ADD_TPREL_LO12_NC; + case R_AARCH64_TLSLE_LDST8_TPREL_LO12: + return ELF_R_AARCH64_TLSLE_LDST8_TPREL_LO12; + case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC: + return ELF_R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC; + case R_AARCH64_TLSLE_LDST16_TPREL_LO12: + return ELF_R_AARCH64_TLSLE_LDST16_TPREL_LO12; + case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC: + return ELF_R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC; + case R_AARCH64_TLSLE_LDST32_TPREL_LO12: + return ELF_R_AARCH64_TLSLE_LDST32_TPREL_LO12; + case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC: + return ELF_R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC; + case R_AARCH64_TLSLE_LDST64_TPREL_LO12: + return ELF_R_AARCH64_TLSLE_LDST64_TPREL_LO12; + case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC: + return ELF_R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC; + case R_AARCH64_GLOB_DAT: + return ELF_R_AARCH64_GLOB_DAT; + case R_AARCH64_JUMP_SLOT: + return ELF_R_AARCH64_JUMP_SLOT; + case R_AARCH64_RELATIVE: + return ELF_R_AARCH64_RELATIVE; + case R_AARCH64_COPY: + return ELF_R_AARCH64_COPY; + default: + return ELF_R_AARCH64_NONE; + } +} + +u32 elf_aarch64_reloc_from(u32 elf_type) { + switch (elf_type) { + case ELF_R_AARCH64_NONE: + return R_NONE; + case ELF_R_AARCH64_ABS64: + return R_ABS64; + case ELF_R_AARCH64_ABS32: + return R_ABS32; + case ELF_R_AARCH64_PREL64: + return R_PC64; + case ELF_R_AARCH64_PREL32: + return R_PC32; + case ELF_R_AARCH64_JUMP26: + return R_AARCH64_JUMP26; + case ELF_R_AARCH64_CALL26: + return R_AARCH64_CALL26; + case ELF_R_AARCH64_CONDBR19: + return R_AARCH64_CONDBR19; + case ELF_R_AARCH64_TSTBR14: + return R_AARCH64_TSTBR14; + case ELF_R_AARCH64_LD_PREL_LO19: + return R_AARCH64_LD_PREL_LO19; + case ELF_R_AARCH64_ADR_PREL_LO21: + return R_AARCH64_ADR_PREL_LO21; + case ELF_R_AARCH64_ADR_PREL_PG_HI21: + return R_AARCH64_ADR_PREL_PG_HI21; + case ELF_R_AARCH64_ADR_PREL_PG_HI21_NC: + return R_AARCH64_ADR_PREL_PG_HI21_NC; + case ELF_R_AARCH64_ADD_ABS_LO12_NC: + return R_AARCH64_ADD_ABS_LO12_NC; + case ELF_R_AARCH64_ABS16: + return R_AARCH64_ABS16; + case ELF_R_AARCH64_PREL16: + return R_AARCH64_PREL16; + case ELF_R_AARCH64_LDST8_ABS_LO12_NC: + return R_AARCH64_LDST8_ABS_LO12_NC; + case ELF_R_AARCH64_LDST16_ABS_LO12_NC: + return R_AARCH64_LDST16_ABS_LO12_NC; + case ELF_R_AARCH64_LDST32_ABS_LO12_NC: + return R_AARCH64_LDST32_ABS_LO12_NC; + case ELF_R_AARCH64_LDST64_ABS_LO12_NC: + return R_AARCH64_LDST64_ABS_LO12_NC; + case ELF_R_AARCH64_LDST128_ABS_LO12_NC: + return R_AARCH64_LDST128_ABS_LO12_NC; + case ELF_R_AARCH64_ADR_GOT_PAGE: + return R_AARCH64_ADR_GOT_PAGE; + case ELF_R_AARCH64_LD64_GOT_LO12_NC: + return R_AARCH64_LD64_GOT_LO12_NC; + case ELF_R_AARCH64_TLSLE_ADD_TPREL_HI12: + return R_AARCH64_TLSLE_ADD_TPREL_HI12; + case ELF_R_AARCH64_TLSLE_ADD_TPREL_LO12: + return R_AARCH64_TLSLE_ADD_TPREL_LO12; + case ELF_R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: + return R_AARCH64_TLSLE_ADD_TPREL_LO12_NC; + case ELF_R_AARCH64_TLSLE_LDST8_TPREL_LO12: + return R_AARCH64_TLSLE_LDST8_TPREL_LO12; + case ELF_R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC: + return R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC; + case ELF_R_AARCH64_TLSLE_LDST16_TPREL_LO12: + return R_AARCH64_TLSLE_LDST16_TPREL_LO12; + case ELF_R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC: + return R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC; + case ELF_R_AARCH64_TLSLE_LDST32_TPREL_LO12: + return R_AARCH64_TLSLE_LDST32_TPREL_LO12; + case ELF_R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC: + return R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC; + case ELF_R_AARCH64_TLSLE_LDST64_TPREL_LO12: + return R_AARCH64_TLSLE_LDST64_TPREL_LO12; + case ELF_R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC: + return R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC; + case ELF_R_AARCH64_GLOB_DAT: + return R_AARCH64_GLOB_DAT; + case ELF_R_AARCH64_JUMP_SLOT: + return R_AARCH64_JUMP_SLOT; + case ELF_R_AARCH64_RELATIVE: + return R_AARCH64_RELATIVE; + case ELF_R_AARCH64_COPY: + return R_AARCH64_COPY; + default: + return (u32)-1; /* sentinel */ + } +} diff --git a/src/obj/elf/reloc_riscv64.c b/src/obj/elf/reloc_riscv64.c @@ -0,0 +1,182 @@ +/* RelocKind <-> RISC-V ELF reloc-type mapping. + * + * Mirror of elf_reloc_x86_64.c for the RISC-V LP64 ABI. The arch- + * agnostic R_ABS / R_PC RelocKind entries fan out to the native + * RISC-V codes; the RISC-V-specific encodings (HI20/LO12, BRANCH, + * JAL, CALL, PCREL_*, TPREL_*, ADD/SUB/SET, RELAX, ALIGN, RVC_*) + * live in the lower band as R_RV_*. + * + * Returning ELF_R_RISCV_NONE for an unsupported kind is the signal + * to the caller to either panic (emit) or panic (read with diagnostic). */ + +#include "obj/elf/elf.h" + +u32 elf_riscv64_reloc_to(u32 kind /* RelocKind */) { + switch (kind) { + case R_NONE: + return ELF_R_RISCV_NONE; + case R_ABS64: + return ELF_R_RISCV_64; + case R_ABS32: + return ELF_R_RISCV_32; + case R_PC32: + return ELF_R_RISCV_32_PCREL; + case R_RV_HI20: + return ELF_R_RISCV_HI20; + case R_RV_LO12_I: + return ELF_R_RISCV_LO12_I; + case R_RV_LO12_S: + return ELF_R_RISCV_LO12_S; + case R_RV_BRANCH: + return ELF_R_RISCV_BRANCH; + case R_RV_JAL: + return ELF_R_RISCV_JAL; + case R_RV_CALL: + return ELF_R_RISCV_CALL; + case R_PLT32: + return ELF_R_RISCV_CALL_PLT; + case R_RV_PCREL_HI20: + return ELF_R_RISCV_PCREL_HI20; + case R_RV_PCREL_LO12_I: + return ELF_R_RISCV_PCREL_LO12_I; + case R_RV_PCREL_LO12_S: + return ELF_R_RISCV_PCREL_LO12_S; + case R_RV_GOT_HI20: + return ELF_R_RISCV_GOT_HI20; + case R_RV_TLS_GOT_HI20: + return ELF_R_RISCV_TLS_GOT_HI20; + case R_RV_TPREL_HI20: + return ELF_R_RISCV_TPREL_HI20; + case R_RV_TPREL_LO12_I: + return ELF_R_RISCV_TPREL_LO12_I; + case R_RV_TPREL_LO12_S: + return ELF_R_RISCV_TPREL_LO12_S; + case R_RV_TPREL_ADD: + return ELF_R_RISCV_TPREL_ADD; + case R_RV_ADD8: + return ELF_R_RISCV_ADD8; + case R_RV_ADD16: + return ELF_R_RISCV_ADD16; + case R_RV_ADD32: + return ELF_R_RISCV_ADD32; + case R_RV_ADD64: + return ELF_R_RISCV_ADD64; + case R_RV_SUB8: + return ELF_R_RISCV_SUB8; + case R_RV_SUB16: + return ELF_R_RISCV_SUB16; + case R_RV_SUB32: + return ELF_R_RISCV_SUB32; + case R_RV_SUB64: + return ELF_R_RISCV_SUB64; + case R_RV_ALIGN: + return ELF_R_RISCV_ALIGN; + case R_RV_RVC_BRANCH: + return ELF_R_RISCV_RVC_BRANCH; + case R_RV_RVC_JUMP: + return ELF_R_RISCV_RVC_JUMP; + case R_RV_RELAX: + return ELF_R_RISCV_RELAX; + case R_RV_SUB6: + return ELF_R_RISCV_SUB6; + case R_RV_SET6: + return ELF_R_RISCV_SET6; + case R_RV_SET8: + return ELF_R_RISCV_SET8; + case R_RV_SET16: + return ELF_R_RISCV_SET16; + case R_RV_SET32: + return ELF_R_RISCV_SET32; + case R_RV_SET_ULEB128: + return ELF_R_RISCV_SET_ULEB128; + case R_RV_SUB_ULEB128: + return ELF_R_RISCV_SUB_ULEB128; + default: + return ELF_R_RISCV_NONE; + } +} + +u32 elf_riscv64_reloc_from(u32 elf_type) { + switch (elf_type) { + case ELF_R_RISCV_NONE: + return R_NONE; + case ELF_R_RISCV_64: + return R_ABS64; + case ELF_R_RISCV_32: + return R_ABS32; + case ELF_R_RISCV_32_PCREL: + return R_PC32; + case ELF_R_RISCV_HI20: + return R_RV_HI20; + case ELF_R_RISCV_LO12_I: + return R_RV_LO12_I; + case ELF_R_RISCV_LO12_S: + return R_RV_LO12_S; + case ELF_R_RISCV_BRANCH: + return R_RV_BRANCH; + case ELF_R_RISCV_JAL: + return R_RV_JAL; + case ELF_R_RISCV_CALL: + return R_RV_CALL; + case ELF_R_RISCV_CALL_PLT: + return R_PLT32; + case ELF_R_RISCV_PCREL_HI20: + return R_RV_PCREL_HI20; + case ELF_R_RISCV_PCREL_LO12_I: + return R_RV_PCREL_LO12_I; + case ELF_R_RISCV_PCREL_LO12_S: + return R_RV_PCREL_LO12_S; + case ELF_R_RISCV_GOT_HI20: + return R_RV_GOT_HI20; + case ELF_R_RISCV_TLS_GOT_HI20: + return R_RV_TLS_GOT_HI20; + case ELF_R_RISCV_TPREL_HI20: + return R_RV_TPREL_HI20; + case ELF_R_RISCV_TPREL_LO12_I: + return R_RV_TPREL_LO12_I; + case ELF_R_RISCV_TPREL_LO12_S: + return R_RV_TPREL_LO12_S; + case ELF_R_RISCV_TPREL_ADD: + return R_RV_TPREL_ADD; + case ELF_R_RISCV_ADD8: + return R_RV_ADD8; + case ELF_R_RISCV_ADD16: + return R_RV_ADD16; + case ELF_R_RISCV_ADD32: + return R_RV_ADD32; + case ELF_R_RISCV_ADD64: + return R_RV_ADD64; + case ELF_R_RISCV_SUB8: + return R_RV_SUB8; + case ELF_R_RISCV_SUB16: + return R_RV_SUB16; + case ELF_R_RISCV_SUB32: + return R_RV_SUB32; + case ELF_R_RISCV_SUB64: + return R_RV_SUB64; + case ELF_R_RISCV_ALIGN: + return R_RV_ALIGN; + case ELF_R_RISCV_RVC_BRANCH: + return R_RV_RVC_BRANCH; + case ELF_R_RISCV_RVC_JUMP: + return R_RV_RVC_JUMP; + case ELF_R_RISCV_RELAX: + return R_RV_RELAX; + case ELF_R_RISCV_SUB6: + return R_RV_SUB6; + case ELF_R_RISCV_SET6: + return R_RV_SET6; + case ELF_R_RISCV_SET8: + return R_RV_SET8; + case ELF_R_RISCV_SET16: + return R_RV_SET16; + case ELF_R_RISCV_SET32: + return R_RV_SET32; + case ELF_R_RISCV_SET_ULEB128: + return R_RV_SET_ULEB128; + case ELF_R_RISCV_SUB_ULEB128: + return R_RV_SUB_ULEB128; + default: + return (u32)-1; /* sentinel */ + } +} diff --git a/src/obj/elf/reloc_x86_64.c b/src/obj/elf/reloc_x86_64.c @@ -0,0 +1,134 @@ +/* RelocKind <-> x86_64 ELF reloc-type mapping. + * + * Mirror of elf_reloc_aarch64.c for the x86_64 SysV ABI. The arch- + * agnostic R_ABS / R_PC / R_REL RelocKind entries fan out to the + * native x86_64 codes; the x86_64-only encodings (R_X64_PC8, PLT32, + * GOTPCREL, dynamic-only entries) live in the lower band. + * + * Returning ELF_R_X86_64_NONE for an unsupported kind is the signal + * to the caller to either panic (emit) or panic (read with diagnostic). */ + +#include "obj/elf/elf.h" + +u32 elf_x86_64_reloc_to(u32 kind /* RelocKind */) { + switch (kind) { + case R_NONE: + return ELF_R_X86_64_NONE; + case R_ABS64: + return ELF_R_X86_64_64; + case R_ABS32: + return ELF_R_X86_64_32; + case R_X64_32S: + return ELF_R_X86_64_32S; + case R_PC32: + return ELF_R_X86_64_PC32; + case R_PC64: + return ELF_R_X86_64_PC64; + case R_REL32: + return ELF_R_X86_64_PC32; + case R_REL64: + return ELF_R_X86_64_PC64; + case R_X64_PC8: + return ELF_R_X86_64_PC8; + case R_PLT32: + case R_X64_PLT32: + return ELF_R_X86_64_PLT32; + case R_GOT32: + return ELF_R_X86_64_GOT32; + case R_X64_GOTPCREL: + return ELF_R_X86_64_GOTPCREL; + case R_X64_GOTPCRELX: + return ELF_R_X86_64_GOTPCRELX; + case R_X64_REX_GOTPCRELX: + return ELF_R_X86_64_REX_GOTPCRELX; + case R_X64_GOTPC32: + return ELF_R_X86_64_GOTPC32; + case R_X64_GOTOFF64: + return ELF_R_X86_64_GOTOFF64; + case R_X64_TPOFF32: + return ELF_R_X86_64_TPOFF32; + case R_X64_TPOFF64: + return ELF_R_X86_64_TPOFF64; + case R_X64_DTPOFF32: + return ELF_R_X86_64_DTPOFF32; + case R_X64_DTPMOD64: + return ELF_R_X86_64_DTPMOD64; + case R_X64_DTPOFF64: + return ELF_R_X86_64_DTPOFF64; + case R_X64_TLSGD: + return ELF_R_X86_64_TLSGD; + case R_X64_TLSLD: + return ELF_R_X86_64_TLSLD; + case R_X64_GOTTPOFF: + return ELF_R_X86_64_GOTTPOFF; + case R_X64_GLOB_DAT: + return ELF_R_X86_64_GLOB_DAT; + case R_X64_JUMP_SLOT: + return ELF_R_X86_64_JUMP_SLOT; + case R_X64_RELATIVE: + return ELF_R_X86_64_RELATIVE; + case R_X64_COPY: + return ELF_R_X86_64_COPY; + default: + return ELF_R_X86_64_NONE; + } +} + +u32 elf_x86_64_reloc_from(u32 elf_type) { + switch (elf_type) { + case ELF_R_X86_64_NONE: + return R_NONE; + case ELF_R_X86_64_64: + return R_ABS64; + case ELF_R_X86_64_32: + return R_ABS32; + case ELF_R_X86_64_32S: + return R_X64_32S; + case ELF_R_X86_64_PC32: + return R_PC32; + case ELF_R_X86_64_PC64: + return R_PC64; + case ELF_R_X86_64_PC8: + return R_X64_PC8; + case ELF_R_X86_64_PLT32: + return R_X64_PLT32; + case ELF_R_X86_64_GOT32: + return R_GOT32; + case ELF_R_X86_64_GOTPCREL: + return R_X64_GOTPCREL; + case ELF_R_X86_64_GOTPCRELX: + return R_X64_GOTPCRELX; + case ELF_R_X86_64_REX_GOTPCRELX: + return R_X64_REX_GOTPCRELX; + case ELF_R_X86_64_GOTPC32: + return R_X64_GOTPC32; + case ELF_R_X86_64_GOTOFF64: + return R_X64_GOTOFF64; + case ELF_R_X86_64_TPOFF32: + return R_X64_TPOFF32; + case ELF_R_X86_64_TPOFF64: + return R_X64_TPOFF64; + case ELF_R_X86_64_DTPOFF32: + return R_X64_DTPOFF32; + case ELF_R_X86_64_DTPMOD64: + return R_X64_DTPMOD64; + case ELF_R_X86_64_DTPOFF64: + return R_X64_DTPOFF64; + case ELF_R_X86_64_TLSGD: + return R_X64_TLSGD; + case ELF_R_X86_64_TLSLD: + return R_X64_TLSLD; + case ELF_R_X86_64_GOTTPOFF: + return R_X64_GOTTPOFF; + case ELF_R_X86_64_GLOB_DAT: + return R_X64_GLOB_DAT; + case ELF_R_X86_64_JUMP_SLOT: + return R_X64_JUMP_SLOT; + case ELF_R_X86_64_RELATIVE: + return R_X64_RELATIVE; + case ELF_R_X86_64_COPY: + return R_X64_COPY; + default: + return (u32)-1; /* sentinel */ + } +} diff --git a/src/obj/elf_emit.c b/src/obj/elf_emit.c @@ -1,751 +0,0 @@ -/* ELF ET_REL writer. Walks a finalized ObjBuilder and emits a 64-bit - * little-endian relocatable object via the supplied Writer. - * - * Layout strategy: - * 1. plan ELF section headers (one per obj section, plus synthesized - * .symtab / .strtab / .shstrtab and one .rela.<name> per obj section - * that carries relocations); - * 2. build .symtab + .strtab content (locals first — STT_SECTION - * synthesized for every input section, then ordinary locals, then - * globals/weaks); - * 3. build .rela.* content using the per-arch reloc map (selected - * by Compiler.target.arch); - * 4. build .shstrtab; - * 5. assign file offsets sequentially, respecting per-section - * addralign; - * 6. write Ehdr, then each section's bytes (seeking to its sh_offset), - * then the section header table. - * - * 64-bit little-endian only. Per-arch reloc tables (elf_reloc_<arch>.c) - * supply the RelocKind -> ELF type mapping; e_machine is selected from - * Compiler.target.arch. Big-endian / 32-bit ELF panic at entry. - * - * See doc/DESIGN.md §5.5 for the round-trip invariant: read_elf of this - * output must produce an ObjBuilder shape-equivalent to the input, - * modulo (a) section ordering and (b) the synthesized STT_SECTION - * symbols (which are visible to read_elf but were not in the input). */ - -#include <string.h> - -#include "arch/arch.h" -#include "core/arena.h" -#include "core/buf.h" -#include "core/heap.h" -#include "core/pool.h" -#include "core/slice.h" -#include "core/util.h" -#include "obj/elf.h" - -static SrcLoc no_loc(void) { - SrcLoc l = {0, 0, 0}; - return l; -} - -/* ---- per-ELF-section plan record ---- */ - -/* Internal section descriptor used during planning. Mirrors Elf64_Shdr - * but with an explicit pointer to the source bytes (either an obj - * Section's chunked Buf or a synthesized linear buffer). NOBITS sections - * have no source bytes and consume no file space. */ -typedef struct ElfSec { - /* Final shdr fields (little-endian-encoded at write time). */ - u32 sh_name; /* offset into shstrtab */ - u32 sh_type; - u64 sh_flags; - u64 sh_addr; /* always 0 for ET_REL */ - u64 sh_offset; - u64 sh_size; - u32 sh_link; - u32 sh_info; - u64 sh_addralign; - u64 sh_entsize; - - /* Section name. The name string lives in scratch (synthesized) or in - * the global pool (obj-section names); buf-source is set for sections - * carrying obj-section bytes, raw_bytes for synthesized. */ - const char* name; - u32 name_len; - - const Buf* obj_bytes; /* one of these three is set: */ - const u8* raw_bytes; /* */ - int is_nobits; /* */ -} ElfSec; - -/* ---- emit ---- */ - -static u32 sec_flags_to_elf(u16 flags) { - u64 r = 0; - if (flags & SF_ALLOC) r |= SHF_ALLOC; - if (flags & SF_EXEC) r |= SHF_EXECINSTR; - if (flags & SF_WRITE) r |= SHF_WRITE; - if (flags & SF_TLS) r |= SHF_TLS; - if (flags & SF_MERGE) r |= SHF_MERGE; - if (flags & SF_STRINGS) r |= SHF_STRINGS; - if (flags & SF_GROUP) r |= SHF_GROUP; - if (flags & SF_LINK_ORDER) r |= SHF_LINK_ORDER; - if (flags & SF_RETAIN) r |= SHF_GNU_RETAIN; - return (u32)r; -} - -static u32 sec_sem_to_elf(u16 sem) { - switch (sem) { - case SSEM_PROGBITS: - return SHT_PROGBITS; - case SSEM_NOBITS: - return SHT_NOBITS; - case SSEM_SYMTAB: - return SHT_SYMTAB; - case SSEM_STRTAB: - return SHT_STRTAB; - case SSEM_RELA: - return SHT_RELA; - case SSEM_REL: - return SHT_REL; - case SSEM_NOTE: - return SHT_NOTE; - case SSEM_INIT_ARRAY: - return SHT_INIT_ARRAY; - case SSEM_FINI_ARRAY: - return SHT_FINI_ARRAY; - case SSEM_PREINIT_ARRAY: - return SHT_PREINIT_ARRAY; - case SSEM_GROUP: - return SHT_GROUP; - default: - return SHT_PROGBITS; - } -} - -static u8 sym_bind_to_elf(u16 bind) { - switch (bind) { - case SB_LOCAL: - return STB_LOCAL; - case SB_GLOBAL: - return STB_GLOBAL; - case SB_WEAK: - return STB_WEAK; - default: - return STB_LOCAL; - } -} - -static u8 sym_kind_to_elf(u16 kind) { - switch (kind) { - case SK_UNDEF: - return STT_NOTYPE; - case SK_FUNC: - return STT_FUNC; - case SK_OBJ: - return STT_OBJECT; - case SK_SECTION: - return STT_SECTION; - case SK_FILE: - return STT_FILE; - /* Tentative definitions: real ELF emitters (clang, gcc, GNU as) - * write these as STT_OBJECT with shndx=SHN_COMMON. STT_COMMON is - * a near-extinct convention that llvm-readelf renders as the - * literal type name "COMMON" — emitting it breaks roundtrip - * against any toolchain-produced .o. */ - case SK_COMMON: - return STT_OBJECT; - case SK_TLS: - return STT_TLS; - case SK_ABS: - return STT_NOTYPE; /* SHN_ABS, NOTYPE */ - case SK_NOTYPE: - return STT_NOTYPE; - case SK_IFUNC: - return STT_GNU_IFUNC; - default: - return STT_NOTYPE; - } -} - -static u8 sym_vis_to_elf(u8 vis) { - switch (vis) { - case SV_DEFAULT: - return STV_DEFAULT; - case SV_HIDDEN: - return STV_HIDDEN; - case SV_PROTECTED: - return STV_PROTECTED; - case SV_INTERNAL: - return STV_INTERNAL; - default: - return STV_DEFAULT; - } -} - -static u16 sym_shndx(const ObjSym* s, const u32* obj_to_elf, u32 nsec) { - if (s->kind == SK_COMMON) return (u16)SHN_COMMON; - if (s->kind == SK_ABS) return (u16)SHN_ABS; - /* STT_FILE conventionally carries SHN_ABS as its shndx — its value - * field is not an address. Match clang/binutils. */ - if (s->kind == SK_FILE) return (u16)SHN_ABS; - if (s->section_id == OBJ_SEC_NONE) return (u16)SHN_UNDEF; - if (s->section_id >= nsec) return (u16)SHN_UNDEF; - return (u16)obj_to_elf[s->section_id]; -} - -static const char* sym_to_str(Compiler* c, Sym n, u32* len_out) { - Slice sl = pool_slice(c->global, n); - const char* s = sl.s; - if (!s) { - *len_out = 0; - return ""; - } - *len_out = (u32)sl.len; - return s; -} - -/* Append `len` bytes of `s` followed by a single NUL to `b`, return - * the offset at which `s` was placed. - * - * If `s` already exists at some offset (as a NUL-terminated substring - * starting at any offset), reuse that offset — clang/binutils both - * dedupe trivially identical strings, and matching the convention - * keeps our strtab the same size as theirs. The dedupe is linear in - * the strtab; section + symbol counts are small enough that this is - * fine without a hash. */ -static u32 strtab_add(Buf* b, const char* s, u32 len) { - /* Empty string: always at offset 0 (the leading NUL). */ - if (len == 0) return 0; - - /* Linear search for an existing copy. We must scan chunk-by-chunk - * because Buf is segmented; flatten to a temp scratch buffer first - * if non-empty and search there. For our tiny strtabs, the cost is - * dominated by the writes anyway. */ - u32 total = buf_pos(b); - if (total > len) { - /* Flatten just to search — not optimal but the strtab here is - * always small (low kilobytes at most). */ - u8 stack[256]; - u8* tmp = - total <= sizeof stack ? stack : (u8*)b->heap->alloc(b->heap, total, 1); - if (tmp) { - buf_flatten(b, tmp); - for (u32 i = 0; i + len < total; ++i) { - if (tmp[i + len] == 0 && memcmp(tmp + i, s, len) == 0) { - if (tmp != stack) b->heap->free(b->heap, tmp, total); - return i; - } - } - if (tmp != stack) b->heap->free(b->heap, tmp, total); - } - } - - u32 off = total; - buf_write(b, s, len); - { - u8 z = 0; - buf_write(b, &z, 1); - } - return off; -} - -void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) { - Heap* h = (Heap*)c->ctx->heap; - - /* Run the tombstone sweep before any iteration: cascades removed - * sections into their defining symbols, drops dangling relocs, - * compacts groups, and absorbs the historical UNDEF prune. After this - * call every direct ID-based access below must skip entries whose - * `removed` bit is set. */ - obj_sweep_dead(ob); - - /* ---- target validation ------------------------------------------ */ - const ArchImpl* arch = arch_for_compiler(c); - const ArchElfOps* elf = arch ? arch->elf : NULL; - u32 e_machine; - u32 (*reloc_to)(u32); - if (!elf || !elf->reloc_to) { - compiler_panic(c, no_loc(), "emit_elf: unsupported target arch %u", - (u32)c->target.arch); - } - e_machine = elf->e_machine; - reloc_to = elf->reloc_to; - if (c->target.big_endian) { - compiler_panic(c, no_loc(), "emit_elf: big-endian ELF not supported"); - } - if (c->target.ptr_size != 8) { - compiler_panic(c, no_loc(), "emit_elf: ptr_size %u (expected 8)", - (u32)c->target.ptr_size); - } - - /* ---- pass 1: plan ELF section list ------------------------------ */ - - u32 nobjsec = obj_section_count(ob); - - u32 nobjgrp = obj_group_count(ob); - /* Upper bound on ELF section count: - * 1 (SHN_UNDEF) - * + nobjsec - 1 (one ELF entry per real obj section) - * + nobjsec - 1 (worst case: a .rela.<name> per obj section) - * + nobjgrp - 1 (one synthesized SHT_GROUP per ObjGroup) - * + 3 (.symtab, .strtab, .shstrtab) - */ - u32 max_secs = - 1 + (nobjsec - 1) + (nobjsec - 1) + (nobjgrp ? nobjgrp - 1 : 0) + 3; - if (max_secs < 4) max_secs = 4; - ElfSec* secs = arena_array(c->scratch, ElfSec, max_secs); - u32 nsecs = 0; - memset(&secs[nsecs++], 0, sizeof secs[0]); /* index 0 = SHN_UNDEF */ - - /* Map obj section id -> ELF section index. */ - u32* obj_to_elf = arena_zarray(c->scratch, u32, nobjsec); - - for (u32 i = 1; i < nobjsec; ++i) { - const Section* s = obj_section_get(ob, i); - if (s->removed) continue; /* tombstone — see obj_sweep_dead */ - ElfSec* es = &secs[nsecs]; - memset(es, 0, sizeof *es); - u32 nlen; - es->name = sym_to_str(c, s->name, &nlen); - es->name_len = nlen; - /* Honor format-specific overrides preserved by the reader for - * sh_type/sh_flags bits the canonical SecSem/SecFlag enums - * don't model (e.g. SHT_LLVM_ADDRSIG, SHF_EXCLUDE). */ - es->sh_type = (s->ext_kind == OBJ_EXT_ELF && s->ext_type) - ? s->ext_type - : sec_sem_to_elf(s->sem); - es->sh_flags = sec_flags_to_elf(s->flags); - if (s->ext_kind == OBJ_EXT_ELF) es->sh_flags |= s->ext_flags; - es->sh_addr = 0; - es->sh_addralign = s->align ? s->align : 1; - es->sh_entsize = s->entsize; - es->sh_link = 0; - es->sh_info = 0; - if (s->sem == SSEM_NOBITS) { - es->is_nobits = 1; - es->sh_size = s->bss_size; - } else { - es->obj_bytes = &s->bytes; - es->sh_size = s->bytes.total; - } - obj_to_elf[i] = nsecs++; - } - - /* ---- pass 2: build .symtab + .strtab content -------------------- */ - - /* .strtab: leading NUL byte. Then a name per emitted symbol. */ - Buf strtab; - buf_init(&strtab, h); - { - u8 z = 0; - buf_write(&strtab, &z, 1); - } - - /* The .symtab is built into a contiguous arena buffer of fixed-size - * 24-byte records. We don't know the count up front; bound by - * (nobjsec section symbols) + (obj symbol count). */ - u32 nobjsym = 0; - { - ObjSymIter* it = obj_symiter_new(ob); - ObjSymEntry e; - while (obj_symiter_next(it, &e)) ++nobjsym; - obj_symiter_free(it); - } - u32 max_syms = 1 + (nobjsec - 1) + nobjsym; - u8* symtab = (u8*)arena_alloc(c->scratch, (size_t)ELF64_SYM_SIZE * max_syms, - _Alignof(u64)); - u32 nsyms = 0; - memset(&symtab[nsyms * ELF64_SYM_SIZE], 0, ELF64_SYM_SIZE); - nsyms = 1; /* index 0: STN_UNDEF */ - -/* Helper to emit one Elf64_Sym record at index `idx` into symtab. */ -#define WRITE_SYM(idx, st_name, st_info, st_other, st_shndx, st_value, \ - st_size) \ - do { \ - u8* slot = &symtab[(idx) * ELF64_SYM_SIZE]; \ - slot[0] = (u8)((st_name)); \ - slot[1] = (u8)((st_name) >> 8); \ - slot[2] = (u8)((st_name) >> 16); \ - slot[3] = (u8)((st_name) >> 24); \ - slot[4] = (u8)((st_info)); \ - slot[5] = (u8)((st_other)); \ - slot[6] = (u8)((st_shndx)); \ - slot[7] = (u8)((st_shndx) >> 8); \ - for (int _b = 0; _b < 8; ++_b) \ - slot[8 + _b] = (u8)((u64)(st_value) >> (_b * 8)); \ - for (int _b = 0; _b < 8; ++_b) \ - slot[16 + _b] = (u8)((u64)(st_size) >> (_b * 8)); \ - } while (0) - - /* No automatic STT_SECTION synthesis. Section symbols are emitted - * iff they are present in the input ObjBuilder (typically as - * SK_SECTION ObjSyms preserved by read_elf, or added explicitly by - * a hand-built caller that needs to reference a section by sym). - * This matches clang's output: only sections referenced by section - * symbols carry one. */ - - /* Map obj symbol id -> elf symbol index. */ - u32* sym_to_elf = arena_zarray(c->scratch, u32, nobjsym + 2); - - /* Two passes over obj symbols: locals, then globals/weak. */ - for (int pass = 0; pass < 2; ++pass) { - ObjSymIter* it = obj_symiter_new(ob); - ObjSymEntry e; - while (obj_symiter_next(it, &e)) { - const ObjSym* s = e.sym; - if (s->removed) continue; /* spurious-UNDEF prune + explicit removal */ - int is_local = (s->bind == SB_LOCAL); - if ((pass == 0) != is_local) continue; - u32 nlen; - const char* nm = sym_to_str(c, s->name, &nlen); - u32 nameoff = nlen ? strtab_add(&strtab, nm, nlen) : 0; - u8 info = - ELF64_ST_INFO(sym_bind_to_elf(s->bind), sym_kind_to_elf(s->kind)); - u8 other = sym_vis_to_elf(s->vis); - u16 shndx = sym_shndx(s, obj_to_elf, nobjsec); - u64 value = (s->kind == SK_COMMON) ? s->common_align : s->value; - WRITE_SYM(nsyms, nameoff, info, other, shndx, value, s->size); - sym_to_elf[e.id] = nsyms; - nsyms++; - } - obj_symiter_free(it); - } -#undef WRITE_SYM - - /* sh_info on .symtab is the index of the first non-local symbol. - * Locals = 1 (STN_UNDEF) + count of input-side LOCAL obj symbols. */ - u32 nlocals = 1; - { - ObjSymIter* it = obj_symiter_new(ob); - ObjSymEntry e; - while (obj_symiter_next(it, &e)) { - if (e.sym->removed) continue; - if (e.sym->bind == SB_LOCAL) ++nlocals; - } - obj_symiter_free(it); - } - - /* Append .symtab + .strtab + .shstrtab planning records. - * sh_link/sh_info for .symtab and .rela.* are filled in once we know - * each section's elf index. */ - u32 idx_symtab = 0, idx_strtab = 0, idx_shstrtab = 0; - - /* ---- pass 2.5: synthesize SHT_GROUP sections from ObjGroups ---- - * Append one SHT_GROUP section per ObjGroup. The body is a 4-byte LE - * flags word followed by the elf section index of each member. - * Placed before relas so the file layout has data sections, then - * groups, then relas/symtab/strtab — matching clang's ordering and - * keeping data-section offsets independent of group presence. */ - u32* group_elf_idx = - nobjgrp > 1 ? arena_array(c->scratch, u32, nobjgrp) : NULL; - if (group_elf_idx) memset(group_elf_idx, 0, sizeof(u32) * nobjgrp); - for (u32 gi = 1; gi < nobjgrp; ++gi) { - const ObjGroup* g = obj_group_get(ob, gi); - if (!g || g->removed) continue; - - u32 body_size = 4u + 4u * g->nsections; - u8* body = (u8*)arena_alloc(c->scratch, body_size, _Alignof(u32)); - u32 gflags = g->flags ? g->flags : 1u; /* GRP_COMDAT default */ - body[0] = (u8)(gflags); - body[1] = (u8)(gflags >> 8); - body[2] = (u8)(gflags >> 16); - body[3] = (u8)(gflags >> 24); - for (u32 j = 0; j < g->nsections; ++j) { - ObjSecId sid = g->sections[j]; - u32 eidx = (sid && sid < nobjsec) ? obj_to_elf[sid] : 0; - u8* slot = body + 4 + j * 4; - slot[0] = (u8)(eidx); - slot[1] = (u8)(eidx >> 8); - slot[2] = (u8)(eidx >> 16); - slot[3] = (u8)(eidx >> 24); - } - - u32 nlen; - const char* gname = sym_to_str(c, g->name, &nlen); - if (nlen == 0) { - gname = ".group"; - nlen = 6; - } - - ElfSec* es = &secs[nsecs]; - memset(es, 0, sizeof *es); - es->name = gname; - es->name_len = nlen; - es->sh_type = SHT_GROUP; - es->sh_flags = 0; - es->sh_addralign = 4; - es->sh_entsize = 4; - es->sh_info = (g->signature && g->signature < nobjsym + 2) - ? sym_to_elf[g->signature] - : 0; - /* sh_link patched below once idx_symtab is known. */ - es->raw_bytes = body; - es->sh_size = body_size; - group_elf_idx[gi] = nsecs; - nsecs++; - } - - /* ---- pass 3: build .rela.<name> contents ------------------------ */ - - /* Allocate one .rela section per obj section that has any relocs. */ - u32 total_relocs = obj_reloc_total(ob); - - typedef struct RelaPlan { - u32 obj_section; /* obj section the rela applies to */ - u8* bytes; /* arena-allocated rela bytes */ - u32 size; /* bytes count = nrelocs * 24 */ - } RelaPlan; - - RelaPlan* rela_plans = arena_zarray(c->scratch, RelaPlan, nobjsec); - u32 nrela_plans = 0; - - for (u32 si = 1; si < nobjsec; ++si) { - const Section* host = obj_section_get(ob, si); - if (!host || host->removed) continue; - u32 nr = obj_reloc_count(ob, si); - if (!nr) continue; - u8* buf = (u8*)arena_alloc(c->scratch, (size_t)ELF64_RELA_SIZE * nr, - _Alignof(u64)); - u32 j = 0; - for (u32 i = 0; i < total_relocs; ++i) { - const Reloc* r = obj_reloc_at(ob, i); - if (r->removed) continue; - if (r->section_id != si) continue; - u32 etype = reloc_to(r->kind); - if (etype == ELF_R_AARCH64_NONE /* == ELF_R_X86_64_NONE == 0 */ && - r->kind != R_NONE) { - compiler_panic(c, no_loc(), - "emit_elf: unsupported relocation kind %u for arch %u", - (u32)r->kind, (u32)c->target.arch); - } - u32 sym_elf_idx; - if (r->sym == OBJ_SYM_NONE) { - /* Reloc against a section: use the synthesized - * STT_SECTION symbol if the obj reloc carries a - * section_id-equivalent; otherwise 0. */ - sym_elf_idx = 0; - } else { - sym_elf_idx = sym_to_elf[r->sym]; - } - u8* slot = &buf[j * ELF64_RELA_SIZE]; - for (int b = 0; b < 8; ++b) slot[b] = (u8)((u64)r->offset >> (b * 8)); - u64 info = ELF64_R_INFO(sym_elf_idx, etype); - for (int b = 0; b < 8; ++b) slot[8 + b] = (u8)(info >> (b * 8)); - for (int b = 0; b < 8; ++b) - slot[16 + b] = (u8)((u64)r->addend >> (b * 8)); - ++j; - } - rela_plans[nrela_plans].obj_section = si; - rela_plans[nrela_plans].bytes = buf; - rela_plans[nrela_plans].size = nr * ELF64_RELA_SIZE; - nrela_plans++; - } - - /* Append ElfSec entries for each .rela.<name>. Names are ".rela" + - * the obj section name; allocate in scratch. */ - u32* rela_elf_idx = arena_array(c->scratch, u32, nrela_plans + 1); - for (u32 ri = 0; ri < nrela_plans; ++ri) { - u32 si = rela_plans[ri].obj_section; - const Section* s = obj_section_get(ob, si); - u32 base_len; - const char* base = sym_to_str(c, s->name, &base_len); - u32 nlen = 5 + base_len; /* ".rela" + base */ - char* nm = (char*)arena_alloc(c->scratch, nlen + 1, 1); - memcpy(nm, ".rela", 5); - memcpy(nm + 5, base, base_len); - nm[nlen] = 0; - - ElfSec* es = &secs[nsecs]; - memset(es, 0, sizeof *es); - es->name = nm; - es->name_len = nlen; - es->sh_type = SHT_RELA; - es->sh_flags = SHF_INFO_LINK; - es->sh_addralign = 8; - es->sh_entsize = ELF64_RELA_SIZE; - es->sh_info = obj_to_elf[si]; /* section the relas apply to */ - /* sh_link filled below once we know symtab's elf index. */ - es->raw_bytes = rela_plans[ri].bytes; - es->sh_size = rela_plans[ri].size; - rela_elf_idx[ri] = nsecs; - nsecs++; - } - - /* Append .symtab. */ - { - ElfSec* es = &secs[nsecs]; - memset(es, 0, sizeof *es); - es->name = ".symtab"; - es->name_len = 7; - es->sh_type = SHT_SYMTAB; - es->sh_flags = 0; - es->sh_addralign = 8; - es->sh_entsize = ELF64_SYM_SIZE; - es->raw_bytes = symtab; - es->sh_size = (u64)nsyms * ELF64_SYM_SIZE; - es->sh_info = nlocals; /* first non-local symbol */ - idx_symtab = nsecs; - nsecs++; - } - - /* Patch sh_link on each .rela section now that we have idx_symtab. */ - for (u32 ri = 0; ri < nrela_plans; ++ri) { - secs[rela_elf_idx[ri]].sh_link = idx_symtab; - } - /* SHT_GROUP also points its sh_link at .symtab (the symtab the - * signature symbol's index in sh_info refers to). */ - for (u32 gi = 1; gi < nobjgrp; ++gi) { - if (group_elf_idx && group_elf_idx[gi]) { - secs[group_elf_idx[gi]].sh_link = idx_symtab; - } - } - - /* ---- pass 4: append section names to the same strtab and emit it. - * - * clang reuses .strtab for both symbol names and section names — - * e_shstrndx and .symtab.sh_link both point at it. Match that - * convention: continue appending into `strtab` (which already - * contains the symbol names), then emit one STRTAB section. */ - - /* secs[0] (SHN_UNDEF) carries name "" → offset 0. */ - secs[0].sh_name = 0; - for (u32 i = 1; i < nsecs; ++i) { - secs[i].sh_name = strtab_add(&strtab, secs[i].name, secs[i].name_len); - } - - /* Append the .strtab section record itself; its own name lands in - * the same buffer (so the strtab is self-describing). */ - { - const char* nm = ".strtab"; - u32 nlen = 7; - u32 nameoff = strtab_add(&strtab, nm, nlen); - u32 sz = buf_pos(&strtab); - u8* flat = (u8*)arena_alloc(c->scratch, sz, 1); - buf_flatten(&strtab, flat); - buf_fini(&strtab); - - ElfSec* es = &secs[nsecs]; - memset(es, 0, sizeof *es); - es->name = nm; - es->name_len = nlen; - es->sh_name = nameoff; - es->sh_type = SHT_STRTAB; - es->sh_addralign = 1; - es->raw_bytes = flat; - es->sh_size = sz; - idx_strtab = nsecs; - idx_shstrtab = nsecs; /* same section serves both roles */ - nsecs++; - } - secs[idx_symtab].sh_link = idx_strtab; - - /* ---- pass 5: assign file offsets -------------------------------- */ - - u64 cur = ELF64_EHDR_SIZE; - for (u32 i = 1; i < nsecs; ++i) { - ElfSec* es = &secs[i]; - if (es->is_nobits) { - /* sh_offset for NOBITS is conventionally where the next - * non-NOBITS section begins; we set it to cur without - * advancing. */ - es->sh_offset = cur; - continue; - } - u64 a = es->sh_addralign ? es->sh_addralign : 1; - cur = ALIGN_UP(cur, a); - es->sh_offset = cur; - cur += es->sh_size; - } - cur = ALIGN_UP(cur, (u64)8); - u64 e_shoff = cur; - - /* ---- pass 6: write Ehdr ----------------------------------------- */ - - u8 ident[EI_NIDENT] = {0}; - ident[EI_MAG0] = ELFMAG0; - ident[EI_MAG1] = ELFMAG1; - ident[EI_MAG2] = ELFMAG2; - ident[EI_MAG3] = ELFMAG3; - ident[EI_CLASS] = ELFCLASS64; - ident[EI_DATA] = ELFDATA2LSB; - ident[EI_VERSION] = EV_CURRENT; - /* SysV is the canonical OSABI for relocatable AArch64 .o; clang and - * GNU ld both emit it for Linux targets. Linking does not key off - * EI_OSABI for plain AArch64 ELF — it's e_machine that matters. - * - * Exception: GNU extensions (STT_GNU_IFUNC, SHF_GNU_RETAIN, ...) - * require EI_OSABI=ELFOSABI_GNU. Clang sets it for any TU using a - * GNU-flavored marker; we mirror that so roundtrip is byte-stable. */ - ident[EI_OSABI] = ELFOSABI_NONE; - { - ObjSymIter* it = obj_symiter_new(ob); - ObjSymEntry e; - u32 nsec = obj_section_count(ob), si; - while (obj_symiter_next(it, &e)) { - if (e.sym->removed) continue; - if (e.sym->kind == SK_IFUNC) { - ident[EI_OSABI] = ELFOSABI_GNU; - break; - } - } - obj_symiter_free(it); - if (ident[EI_OSABI] != ELFOSABI_GNU) { - for (si = 1; si < nsec; ++si) { - const Section* sec = obj_section_get(ob, si); - if (sec && !sec->removed && (sec->flags & SF_RETAIN)) { - ident[EI_OSABI] = ELFOSABI_GNU; - break; - } - } - } - } - /* e_flags: prefer the value preserved from a prior read (round-trip); - * else synthesize a sensible per-arch default. RV64 cfree targets the - * Linux psABI's lp64d soft-relax convention (RVC + double-float ABI). */ - u32 e_flags; - if (!obj_get_elf_e_flags(ob, &e_flags)) e_flags = elf->e_flags; - - cfree_writer_seek(w, 0); - cfree_writer_write(w, ident, EI_NIDENT); - elf_wr_u16(w, ET_REL); - elf_wr_u16(w, (u16)e_machine); - elf_wr_u32(w, EV_CURRENT); - elf_wr_u64(w, 0); /* e_entry */ - elf_wr_u64(w, 0); /* e_phoff */ - elf_wr_u64(w, e_shoff); /* e_shoff */ - elf_wr_u32(w, e_flags); /* e_flags */ - elf_wr_u16(w, ELF64_EHDR_SIZE); /* e_ehsize */ - elf_wr_u16(w, 0); /* e_phentsize */ - elf_wr_u16(w, 0); /* e_phnum */ - elf_wr_u16(w, ELF64_SHDR_SIZE); /* e_shentsize */ - elf_wr_u16(w, (u16)nsecs); /* e_shnum */ - elf_wr_u16(w, (u16)idx_shstrtab); /* e_shstrndx */ - - /* ---- pass 7: write each section's bytes ------------------------- */ - - for (u32 i = 1; i < nsecs; ++i) { - ElfSec* es = &secs[i]; - if (es->is_nobits || es->sh_size == 0) continue; - cfree_writer_seek(w, es->sh_offset); - if (es->obj_bytes) { - u32 sz = es->obj_bytes->total; - u8* tmp = (u8*)h->alloc(h, sz ? sz : 1, 1); - if (sz) buf_flatten(es->obj_bytes, tmp); - cfree_writer_write(w, tmp, sz); - h->free(h, tmp, sz ? sz : 1); - } else if (es->raw_bytes) { - cfree_writer_write(w, es->raw_bytes, (size_t)es->sh_size); - } - } - - /* ---- pass 8: write section header table ------------------------- */ - - cfree_writer_seek(w, e_shoff); - for (u32 i = 0; i < nsecs; ++i) { - const ElfSec* es = &secs[i]; - elf_wr_u32(w, es->sh_name); - elf_wr_u32(w, es->sh_type); - elf_wr_u64(w, es->sh_flags); - elf_wr_u64(w, es->sh_addr); - elf_wr_u64(w, es->sh_offset); - elf_wr_u64(w, es->sh_size); - elf_wr_u32(w, es->sh_link); - elf_wr_u32(w, es->sh_info); - elf_wr_u64(w, es->sh_addralign); - elf_wr_u64(w, es->sh_entsize); - } -} diff --git a/src/obj/elf_read.c b/src/obj/elf_read.c @@ -1,684 +0,0 @@ -/* ELF ET_REL reader. Parses a 64-bit little-endian relocatable object - * back into a fresh ObjBuilder. The post-finalize ObjBuilder shape is - * the canonical superset doc/DESIGN.md §5.5 promises: read_elf of an - * emit_elf output produces an ObjBuilder equivalent to the writer's - * input, modulo (a) section ordering and (b) STT_SECTION symbols - * synthesized by the writer. - * - * Scope: AArch64 little-endian. Other archs / endianness produce a - * compiler_panic with a diagnostic. */ - -#include <string.h> - -#include "arch/arch.h" -#include "core/heap.h" -#include "core/pool.h" -#include "core/slice.h" -#include "obj/elf.h" - -static SrcLoc no_loc(void) { - SrcLoc l = {0, 0, 0}; - return l; -} - -/* ---- shdr scratch struct ---- */ - -typedef struct ShdrRec { - u32 sh_name; - u32 sh_type; - u64 sh_flags; - u64 sh_addr; - u64 sh_offset; - u64 sh_size; - u32 sh_link; - u32 sh_info; - u64 sh_addralign; - u64 sh_entsize; -} ShdrRec; - -static void parse_shdr(const u8* p, ShdrRec* out) { - out->sh_name = elf_rd_u32(p + 0); - out->sh_type = elf_rd_u32(p + 4); - out->sh_flags = elf_rd_u64(p + 8); - out->sh_addr = elf_rd_u64(p + 16); - out->sh_offset = elf_rd_u64(p + 24); - out->sh_size = elf_rd_u64(p + 32); - out->sh_link = elf_rd_u32(p + 40); - out->sh_info = elf_rd_u32(p + 44); - out->sh_addralign = elf_rd_u64(p + 48); - out->sh_entsize = elf_rd_u64(p + 56); -} - -/* ---- mappers ---- */ - -/* The bits this function maps to SecFlag — anything outside this mask is - * treated as opaque and stashed in Section.ext_flags by the caller so the - * emitter can write it back unchanged. Examples of bits left over: - * SHF_EXCLUDE (0x80000000) on .llvm_addrsig, SHF_COMPRESSED (0x800) on - * compressed .debug_*, SHF_INFO_LINK (0x40) on .rela.* sections. */ -#define ELF_KNOWN_FLAGS_MASK \ - ((u64)(SHF_ALLOC | SHF_EXECINSTR | SHF_WRITE | SHF_TLS | SHF_MERGE | \ - SHF_STRINGS | SHF_GROUP | SHF_LINK_ORDER | SHF_GNU_RETAIN)) - -static u16 elf_flags_to_obj(u64 f) { - u16 r = 0; - if (f & SHF_ALLOC) r |= SF_ALLOC; - if (f & SHF_EXECINSTR) r |= SF_EXEC; - if (f & SHF_WRITE) r |= SF_WRITE; - if (f & SHF_TLS) r |= SF_TLS; - if (f & SHF_MERGE) r |= SF_MERGE; - if (f & SHF_STRINGS) r |= SF_STRINGS; - if (f & SHF_GROUP) r |= SF_GROUP; - if (f & SHF_LINK_ORDER) r |= SF_LINK_ORDER; - if (f & SHF_GNU_RETAIN) r |= SF_RETAIN; - return r; -} - -/* Map ELF sh_type -> SecSem. Sets *known to 1 if the value is one of - * the canonical types the cfree model knows about; 0 means the caller - * fell through to the SSEM_PROGBITS fallback and should preserve the - * raw sh_type via Section.ext_type so emit_elf can write it back. */ -static u16 elf_type_to_sem(u32 t, int* known) { - *known = 1; - switch (t) { - case SHT_PROGBITS: - return SSEM_PROGBITS; - case SHT_NOBITS: - return SSEM_NOBITS; - case SHT_SYMTAB: - return SSEM_SYMTAB; - case SHT_STRTAB: - return SSEM_STRTAB; - case SHT_RELA: - return SSEM_RELA; - case SHT_REL: - return SSEM_REL; - case SHT_NOTE: - return SSEM_NOTE; - case SHT_INIT_ARRAY: - return SSEM_INIT_ARRAY; - case SHT_FINI_ARRAY: - return SSEM_FINI_ARRAY; - case SHT_PREINIT_ARRAY: - return SSEM_PREINIT_ARRAY; - case SHT_GROUP: - return SSEM_GROUP; - default: - *known = 0; - return SSEM_PROGBITS; - } -} - -static u16 elf_kind_from_name(const char* name, u32 nlen, u64 sh_flags, - u32 sh_type) { - if (sh_type == SHT_NOBITS) return SEC_BSS; - if (nlen >= 5 && memcmp(name, ".text", 5) == 0) return SEC_TEXT; - if (nlen >= 7 && memcmp(name, ".rodata", 7) == 0) return SEC_RODATA; - if (nlen >= 5 && memcmp(name, ".data", 5) == 0) return SEC_DATA; - if (nlen >= 4 && memcmp(name, ".bss", 4) == 0) return SEC_BSS; - if (nlen >= 7 && memcmp(name, ".debug_", 7) == 0) return SEC_DEBUG; - /* Fallback: classify by flags. */ - if (sh_flags & SHF_EXECINSTR) return SEC_TEXT; - if (sh_flags & SHF_WRITE) return SEC_DATA; - if (sh_flags & SHF_ALLOC) return SEC_RODATA; - return SEC_OTHER; -} - -static u16 elf_bind_to_obj(u32 b) { - switch (b) { - case STB_GLOBAL: - return SB_GLOBAL; - case STB_WEAK: - return SB_WEAK; - default: - return SB_LOCAL; - } -} - -static u16 elf_type_to_kind(u32 t, u16 shndx) { - if (shndx == SHN_UNDEF) return SK_UNDEF; - if (shndx == SHN_COMMON) return SK_COMMON; - /* SHN_ABS is the convention for STT_FILE and a few other defined - * symbols whose value is not an address. Don't smother the type - * with SK_ABS when the type field carries real information — only - * fall through to SK_ABS for STT_NOTYPE-at-SHN_ABS. */ - if (shndx == SHN_ABS && t == STT_NOTYPE) return SK_ABS; - switch (t) { - case STT_FUNC: - return SK_FUNC; - case STT_OBJECT: - return SK_OBJ; - case STT_SECTION: - return SK_SECTION; - case STT_FILE: - return SK_FILE; - case STT_TLS: - return SK_TLS; - case STT_COMMON: - return SK_COMMON; - case STT_GNU_IFUNC: - return SK_IFUNC; - default: - /* STT_NOTYPE on a defined symbol (e.g. AArch64 mapping symbols - * `$x` / `$d`, or assembly labels) round-trips as SK_NOTYPE. - * The linker keeps definedness keyed on SK_UNDEF; SK_NOTYPE is - * "defined but typeless". */ - return SK_NOTYPE; - } -} - -static u8 elf_other_to_vis(u32 other) { - switch (other & 3) { - case STV_HIDDEN: - return SV_HIDDEN; - case STV_PROTECTED: - return SV_PROTECTED; - case STV_INTERNAL: - return SV_INTERNAL; - default: - return SV_DEFAULT; - } -} - -/* Bounds-checked C-string slice from a strtab section. Returns "" on - * out-of-range so callers don't have to special-case it. `len_out` is - * set to the result's byte length. */ -static const char* strtab_lookup(const u8* tab, u64 tab_size, u32 off, - u32* len_out) { - if (off >= tab_size) { - *len_out = 0; - return ""; - } - const char* s = (const char*)(tab + off); - u32 max = (u32)(tab_size - off); - u32 n = 0; - while (n < max && s[n] != '\0') ++n; - *len_out = n; - return s; -} - -ObjBuilder* read_elf(Compiler* c, const char* name, const u8* data, - size_t len) { - (void)name; - - if (len < ELF64_EHDR_SIZE) - compiler_panic(c, no_loc(), "read_elf: input shorter than ELF header"); - - if (data[EI_MAG0] != ELFMAG0 || data[EI_MAG1] != ELFMAG1 || - data[EI_MAG2] != ELFMAG2 || data[EI_MAG3] != ELFMAG3) - compiler_panic(c, no_loc(), "read_elf: bad ELF magic"); - - if (data[EI_CLASS] != ELFCLASS64) - compiler_panic(c, no_loc(), "read_elf: not ELFCLASS64 (got %u)", - data[EI_CLASS]); - if (data[EI_DATA] != ELFDATA2LSB) - compiler_panic(c, no_loc(), "read_elf: not ELFDATA2LSB (got %u)", - data[EI_DATA]); - - u16 e_type = elf_rd_u16(data + 16); - if (e_type != ET_REL) - compiler_panic( - c, no_loc(), - "read_elf: only ET_REL inputs are accepted by read_elf " - "(got e_type=%u); use read_elf_dso for ET_DYN shared objects", - (u32)e_type); - - u16 e_machine = elf_rd_u16(data + 18); - const ArchImpl* arch = arch_lookup_elf_machine(e_machine); - u32 (*reloc_from)(u32); - if (!arch || !arch->elf || !arch->elf->reloc_from) { - compiler_panic(c, no_loc(), "read_elf: unsupported e_machine 0x%x", - (u32)e_machine); - } - reloc_from = arch->elf->reloc_from; - - u64 e_shoff = elf_rd_u64(data + 40); - u32 e_flags = elf_rd_u32(data + 48); - u16 e_shentsize = elf_rd_u16(data + 58); - u16 e_shnum = elf_rd_u16(data + 60); - u16 e_shstrndx = elf_rd_u16(data + 62); - - if (e_shentsize != ELF64_SHDR_SIZE) - compiler_panic(c, no_loc(), "read_elf: unexpected e_shentsize %u", - (u32)e_shentsize); - if (e_shoff + (u64)e_shnum * ELF64_SHDR_SIZE > len) - compiler_panic(c, no_loc(), "read_elf: section header table out of range"); - if (e_shstrndx >= e_shnum) - compiler_panic(c, no_loc(), "read_elf: e_shstrndx %u >= e_shnum %u", - (u32)e_shstrndx, (u32)e_shnum); - - /* Parse all shdrs into scratch. */ - ShdrRec* shdrs = arena_array(c->scratch, ShdrRec, e_shnum); - for (u32 i = 0; i < e_shnum; ++i) - parse_shdr(data + e_shoff + (u64)i * ELF64_SHDR_SIZE, &shdrs[i]); - - const ShdrRec* shstr_sh = &shdrs[e_shstrndx]; - if (shstr_sh->sh_offset + shstr_sh->sh_size > len) - compiler_panic(c, no_loc(), "read_elf: .shstrtab out of range"); - const u8* shstrtab = data + shstr_sh->sh_offset; - u64 shstrtab_sz = shstr_sh->sh_size; - - /* Build the ObjBuilder. */ - ObjBuilder* ob = obj_new(c); - if (!ob) compiler_panic(c, no_loc(), "read_elf: obj_new failed"); - obj_set_elf_e_flags(ob, e_flags); - - /* elf_to_obj[shndx] -> ObjSecId, OBJ_SEC_NONE for skipped sections. */ - u32* elf_to_obj = arena_zarray(c->scratch, u32, e_shnum); - - /* Pass 1: create obj sections for every non-NULL shdr that carries - * load-bearing model state. SYMTAB / STRTAB / RELA / REL are - * consumed below for symbols and relocations and do NOT round-trip - * as obj sections — emit_elf re-synthesizes them from the - * ObjBuilder's symbols / strtab / relocs. The shstrtab is a STRTAB - * too, so it falls out the same way. */ - for (u32 i = 1; i < e_shnum; ++i) { - const ShdrRec* sh = &shdrs[i]; - if (sh->sh_type == SHT_NULL) continue; - if (sh->sh_type == SHT_SYMTAB) continue; - if (sh->sh_type == SHT_STRTAB) continue; - if (sh->sh_type == SHT_RELA) continue; - if (sh->sh_type == SHT_REL) continue; - /* SHT_GROUP is consumed below into an ObjGroup record (signature - * symbol + member ObjSecIds). emit_elf re-synthesizes the group - * section bytes from the ObjGroup, using current section indices - * — so the original section's raw body would be stale anyway. */ - if (sh->sh_type == SHT_GROUP) continue; - - u32 nlen; - const char* nm = strtab_lookup(shstrtab, shstrtab_sz, sh->sh_name, &nlen); - Sym sym = pool_intern_slice(c->global, (Slice){ .s = nm, .len = nlen }); - - u16 sec_kind = elf_kind_from_name(nm, nlen, sh->sh_flags, sh->sh_type); - int type_known; - u16 sec_sem = elf_type_to_sem(sh->sh_type, &type_known); - u16 flags = elf_flags_to_obj(sh->sh_flags); - u32 align = sh->sh_addralign ? (u32)sh->sh_addralign : 1; - - ObjSecId id = - obj_section_ex(ob, sym, (SecKind)sec_kind, (SecSem)sec_sem, flags, - align, (u32)sh->sh_entsize, sh->sh_link, sh->sh_info); - if (id == OBJ_SEC_NONE) - compiler_panic(c, no_loc(), "read_elf: obj_section_ex failed for '%.*s'", - SLICE_ARG(((Slice){.s = nm, .len = nlen}))); - elf_to_obj[i] = id; - - /* Preserve format-specific bits the canonical SecSem/SecFlag - * mapping can't represent so emit_elf can write them back - * verbatim. ext_type only set when the sh_type fell through - * to the "unknown" path. */ - u32 leftover = (u32)(sh->sh_flags & ~ELF_KNOWN_FLAGS_MASK); - if (!type_known || leftover) { - obj_section_set_ext(ob, id, OBJ_EXT_ELF, type_known ? 0 : sh->sh_type, - leftover); - } - - /* Body bytes. */ - if (sh->sh_type == SHT_NOBITS) { - obj_reserve_bss(ob, id, (u32)sh->sh_size, align); - } else if (sh->sh_size) { - if (sh->sh_offset + sh->sh_size > len) - compiler_panic(c, no_loc(), - "read_elf: section '%.*s' bytes out of range", - SLICE_ARG(((Slice){.s = nm, .len = nlen}))); - /* For SYMTAB/STRTAB/RELA we still copy the raw bytes — the - * post-finalize shape contract says these sections are - * present; emit_elf will regenerate them on re-emit, so the - * preserved bytes are informational rather than load-bearing. - */ - obj_write(ob, id, data + sh->sh_offset, (size_t)sh->sh_size); - } - } - - /* Pass 2: parse the .symtab into ObjSyms, building an - * elf_sym_idx -> ObjSymId table. There may be zero or one SYMTAB in - * an ET_REL; pick the first. */ - u32 symtab_shndx = 0; - for (u32 i = 1; i < e_shnum; ++i) { - if (shdrs[i].sh_type == SHT_SYMTAB) { - symtab_shndx = i; - break; - } - } - - u32 nsyms = 0; - u32* sym_elf_to_obj = NULL; - - if (symtab_shndx) { - const ShdrRec* sh = &shdrs[symtab_shndx]; - if (sh->sh_entsize != ELF64_SYM_SIZE) - compiler_panic(c, no_loc(), "read_elf: .symtab entsize %llu != %u", - (unsigned long long)sh->sh_entsize, (u32)ELF64_SYM_SIZE); - if (sh->sh_size % ELF64_SYM_SIZE) - compiler_panic(c, no_loc(), - "read_elf: .symtab size %llu not a multiple of %u", - (unsigned long long)sh->sh_size, (u32)ELF64_SYM_SIZE); - if (sh->sh_link >= e_shnum) - compiler_panic(c, no_loc(), "read_elf: .symtab sh_link %u out of range", - sh->sh_link); - const ShdrRec* str_sh = &shdrs[sh->sh_link]; - if (str_sh->sh_offset + str_sh->sh_size > len) - compiler_panic(c, no_loc(), "read_elf: .strtab out of range"); - const u8* strtab = data + str_sh->sh_offset; - u64 strtab_sz = str_sh->sh_size; - - nsyms = (u32)(sh->sh_size / ELF64_SYM_SIZE); - sym_elf_to_obj = arena_zarray(c->scratch, u32, nsyms ? nsyms : 1); - - const u8* base = data + sh->sh_offset; - for (u32 i = 1; i < nsyms; ++i) { /* skip index 0 */ - const u8* p = base + (u64)i * ELF64_SYM_SIZE; - u32 st_name = elf_rd_u32(p + 0); - u8 st_info = p[4]; - u8 st_other = p[5]; - u16 st_shndx = elf_rd_u16(p + 6); - u64 st_value = elf_rd_u64(p + 8); - u64 st_size = elf_rd_u64(p + 16); - - u32 nlen; - const char* nm = strtab_lookup(strtab, strtab_sz, st_name, &nlen); - Sym sn = nlen ? pool_intern_slice(c->global, (Slice){ .s = nm, .len = nlen }) : 0; - - u32 e_bind = ELF64_ST_BIND(st_info); - u32 e_type = ELF64_ST_TYPE(st_info); - u16 bind = elf_bind_to_obj(e_bind); - u16 kind = elf_type_to_kind(e_type, st_shndx); - u8 vis = elf_other_to_vis(st_other); - - ObjSecId sec_id; - u64 value; - u64 cmnalign = 0; - if (st_shndx == SHN_UNDEF) { - sec_id = OBJ_SEC_NONE; - value = st_value; - } else if (st_shndx == SHN_ABS || st_shndx == SHN_COMMON) { - sec_id = OBJ_SEC_NONE; - value = st_value; - if (st_shndx == SHN_COMMON) cmnalign = st_value; - } else if (st_shndx < e_shnum) { - sec_id = elf_to_obj[st_shndx]; - value = st_value; - } else { - compiler_panic(c, no_loc(), "read_elf: symbol shndx %u out of range", - (u32)st_shndx); - sec_id = OBJ_SEC_NONE; - value = 0; /* unreachable */ - } - - ObjSymId id = - obj_symbol_ex(ob, sn, (SymBind)bind, (SymVis)vis, (SymKind)kind, - sec_id, value, st_size, cmnalign); - obj_sym_mark_referenced(ob, id); - sym_elf_to_obj[i] = id; - } - } - - /* Pass 3: parse each SHT_RELA / SHT_REL into ObjBuilder relocations - * targeting the section the rela header's sh_info points at. */ - for (u32 i = 1; i < e_shnum; ++i) { - const ShdrRec* sh = &shdrs[i]; - int is_rela = (sh->sh_type == SHT_RELA); - int is_rel = (sh->sh_type == SHT_REL); - if (!is_rela && !is_rel) continue; - - u32 entsize = is_rela ? ELF64_RELA_SIZE : 16; - if (sh->sh_entsize != entsize) - compiler_panic(c, no_loc(), "read_elf: rela entsize %llu != %u", - (unsigned long long)sh->sh_entsize, entsize); - if (sh->sh_info == 0 || sh->sh_info >= e_shnum) - compiler_panic(c, no_loc(), "read_elf: rela sh_info %u out of range", - sh->sh_info); - ObjSecId target = elf_to_obj[sh->sh_info]; - if (target == OBJ_SEC_NONE) continue; - - u32 nrec = (u32)(sh->sh_size / entsize); - const u8* base = data + sh->sh_offset; - for (u32 j = 0; j < nrec; ++j) { - const u8* p = base + (u64)j * entsize; - u64 r_offset = elf_rd_u64(p + 0); - u64 r_info = elf_rd_u64(p + 8); - i64 r_addend = is_rela ? (i64)elf_rd_u64(p + 16) : 0; - u32 esym = ELF64_R_SYM(r_info); - u32 etype = ELF64_R_TYPE(r_info); - - u32 kind = reloc_from(etype); - if (kind == (u32)-1) - compiler_panic(c, no_loc(), - "read_elf: unsupported reloc type %u for e_machine 0x%x", - etype, (u32)e_machine); - - ObjSymId target_sym = OBJ_SYM_NONE; - if (esym && sym_elf_to_obj && esym < nsyms) - target_sym = sym_elf_to_obj[esym]; - - obj_reloc_ex(ob, target, (u32)r_offset, (RelocKind)kind, target_sym, - r_addend, is_rela ? 1 : 0, 0); - } - } - - /* Pass 4: SHT_GROUP. Each GROUP section's body is a sequence of - * 4-byte LE indices: [flags, shndx, shndx, ...]. The signature is - * the symbol named by sh_link/sh_info convention (sh_link=symtab, - * sh_info=symbol index in that symtab). */ - for (u32 i = 1; i < e_shnum; ++i) { - const ShdrRec* sh = &shdrs[i]; - if (sh->sh_type != SHT_GROUP) continue; - - if (sh->sh_size < 4 || (sh->sh_size % 4)) continue; - const u8* p = data + sh->sh_offset; - u32 flags = elf_rd_u32(p); - u32 nm_len; - const char* gnm = - strtab_lookup(shstrtab, shstrtab_sz, sh->sh_name, &nm_len); - Sym gname = pool_intern_slice(c->global, (Slice){ .s = gnm, .len = nm_len }); - - ObjSymId signature = OBJ_SYM_NONE; - if (sym_elf_to_obj && sh->sh_info < nsyms) - signature = sym_elf_to_obj[sh->sh_info]; - - ObjGroupId gid = obj_group(ob, gname, signature, flags); - u32 n = (u32)(sh->sh_size / 4) - 1; - for (u32 j = 0; j < n; ++j) { - u32 shndx = elf_rd_u32(p + 4 + j * 4); - if (shndx < e_shnum && elf_to_obj[shndx] != OBJ_SEC_NONE) - obj_group_add_section(ob, gid, elf_to_obj[shndx]); - } - } - - obj_finalize(ob); - return ob; -} - -/* ---- ET_DYN (shared object) reader ---- - * - * Produces an ObjBuilder containing only the DSO's exported symbols - * (parsed from .dynsym, not .symtab). The DSO's sections, relocations, - * and groups are skipped — DSOs contribute no bytes to the output - * image. The DT_SONAME (if any) is interned and returned via - * `*soname_out` so the caller can record DT_NEEDED at link time. - * - * Symbol shape: each defined dynsym entry produces an ObjSym whose - * (bind, kind, vis) match the source. `section_id` is OBJ_SEC_NONE — - * the symbol's value is its DSO-internal vaddr, not meaningful to the - * consuming linker, so we record `value=0`. The linker layer - * (resolve_undefs) only consults the name and the defined-ness flag. - * - * Undefined dynsym entries (st_shndx==SHN_UNDEF) are imports the DSO - * itself has against other libraries; they're not relevant to a - * consumer that's linking against this DSO and are dropped. */ - -static int parse_phdr(const u8* data, size_t len, u64 e_phoff, u16 e_phentsize, - u16 e_phnum, u32 want_type, u64* out_offset, - u64* out_filesz) { - u32 i; - if (e_phentsize != ELF64_PHDR_SIZE) return 0; - if (e_phoff + (u64)e_phnum * ELF64_PHDR_SIZE > len) return 0; - for (i = 0; i < e_phnum; ++i) { - const u8* p = data + e_phoff + (u64)i * ELF64_PHDR_SIZE; - u32 p_type = elf_rd_u32(p + 0); - if (p_type != want_type) continue; - *out_offset = elf_rd_u64(p + 8); - *out_filesz = elf_rd_u64(p + 32); - return 1; - } - return 0; -} - -ObjBuilder* read_elf_dso(Compiler* c, const char* name, const u8* data, - size_t len, Sym* soname_out) { - (void)name; - if (soname_out) *soname_out = 0; - - if (len < ELF64_EHDR_SIZE) - compiler_panic(c, no_loc(), "read_elf_dso: input shorter than ELF header"); - if (data[EI_MAG0] != ELFMAG0 || data[EI_MAG1] != ELFMAG1 || - data[EI_MAG2] != ELFMAG2 || data[EI_MAG3] != ELFMAG3) - compiler_panic(c, no_loc(), "read_elf_dso: bad ELF magic"); - if (data[EI_CLASS] != ELFCLASS64) - compiler_panic(c, no_loc(), "read_elf_dso: not ELFCLASS64"); - if (data[EI_DATA] != ELFDATA2LSB) - compiler_panic(c, no_loc(), "read_elf_dso: not ELFDATA2LSB"); - - u16 e_type = elf_rd_u16(data + 16); - if (e_type != ET_DYN) - compiler_panic(c, no_loc(), "read_elf_dso: expected ET_DYN, got e_type=%u", - (u32)e_type); - - u16 e_machine = elf_rd_u16(data + 18); - if (!arch_lookup_elf_machine(e_machine)) - compiler_panic(c, no_loc(), "read_elf_dso: unsupported e_machine 0x%x", - (u32)e_machine); - - u64 e_phoff = elf_rd_u64(data + 32); - u64 e_shoff = elf_rd_u64(data + 40); - u16 e_phentsize = elf_rd_u16(data + 54); - u16 e_phnum = elf_rd_u16(data + 56); - u16 e_shentsize = elf_rd_u16(data + 58); - u16 e_shnum = elf_rd_u16(data + 60); - u16 e_shstrndx = elf_rd_u16(data + 62); - - if (e_shentsize != ELF64_SHDR_SIZE) - compiler_panic(c, no_loc(), "read_elf_dso: unexpected e_shentsize %u", - (u32)e_shentsize); - if (e_shoff + (u64)e_shnum * ELF64_SHDR_SIZE > len) - compiler_panic(c, no_loc(), - "read_elf_dso: section header table out of range"); - if (e_shstrndx >= e_shnum) - compiler_panic(c, no_loc(), "read_elf_dso: e_shstrndx out of range"); - - ShdrRec* shdrs = arena_array(c->scratch, ShdrRec, e_shnum); - for (u32 i = 0; i < e_shnum; ++i) - parse_shdr(data + e_shoff + (u64)i * ELF64_SHDR_SIZE, &shdrs[i]); - - /* Locate .dynsym (preferred over .symtab — a stripped DSO carries - * only .dynsym) and its associated strtab via sh_link. */ - u32 dynsym_idx = 0, dynamic_idx = 0; - for (u32 i = 1; i < e_shnum; ++i) { - if (shdrs[i].sh_type == SHT_DYNSYM && !dynsym_idx) dynsym_idx = i; - if (shdrs[i].sh_type == SHT_DYNAMIC && !dynamic_idx) dynamic_idx = i; - } - - if (!dynsym_idx) - compiler_panic(c, no_loc(), "read_elf_dso: no SHT_DYNSYM in shared object"); - - /* Parse PT_DYNAMIC for DT_SONAME. The .dynamic section gives us the - * dynstr to resolve the SONAME's offset; if there's no .dynamic - * section we fall back to scanning the PT_DYNAMIC segment. */ - Sym soname = 0; - if (dynamic_idx) { - const ShdrRec* dsh = &shdrs[dynamic_idx]; - if (dsh->sh_link >= e_shnum) - compiler_panic(c, no_loc(), - "read_elf_dso: .dynamic sh_link %u out of range", - dsh->sh_link); - const ShdrRec* str_sh = &shdrs[dsh->sh_link]; - if (str_sh->sh_offset + str_sh->sh_size > len) - compiler_panic(c, no_loc(), "read_elf_dso: .dynamic strtab out of range"); - const u8* dynstr = data + str_sh->sh_offset; - u64 dynstr_sz = str_sh->sh_size; - - if (dsh->sh_offset + dsh->sh_size > len) - compiler_panic(c, no_loc(), "read_elf_dso: .dynamic body out of range"); - const u8* dynp = data + dsh->sh_offset; - u64 dynsz = dsh->sh_size; - /* DT entries are 16 bytes: (d_tag: u64, d_un: u64). */ - for (u64 off = 0; off + 16 <= dynsz; off += 16) { - u64 tag = elf_rd_u64(dynp + off); - u64 val = elf_rd_u64(dynp + off + 8); - if (tag == DT_NULL) break; - if (tag == DT_SONAME) { - u32 nlen; - const char* nm = strtab_lookup(dynstr, dynstr_sz, (u32)val, &nlen); - if (nlen) soname = pool_intern_slice(c->global, (Slice){ .s = nm, .len = nlen }); - break; - } - } - } else if (e_phnum) { - /* Fallback: walk PT_DYNAMIC straight from program headers. We - * only need DT_SONAME, so skip if we can't find a strtab pointer - * inline (DT_STRTAB carries a vaddr, not a file offset — stripped - * DSOs without SHT_DYNAMIC are exceedingly rare in practice). */ - u64 dyn_off, dyn_sz; - (void)parse_phdr(data, len, e_phoff, e_phentsize, e_phnum, PT_DYNAMIC, - &dyn_off, &dyn_sz); - } - if (soname_out) *soname_out = soname; - - /* Now parse .dynsym. */ - const ShdrRec* sh = &shdrs[dynsym_idx]; - if (sh->sh_entsize != ELF64_SYM_SIZE) - compiler_panic(c, no_loc(), "read_elf_dso: .dynsym entsize %llu != %u", - (unsigned long long)sh->sh_entsize, (u32)ELF64_SYM_SIZE); - if (sh->sh_size % ELF64_SYM_SIZE) - compiler_panic(c, no_loc(), - "read_elf_dso: .dynsym size not multiple of entry size"); - if (sh->sh_link >= e_shnum) - compiler_panic(c, no_loc(), "read_elf_dso: .dynsym sh_link out of range"); - const ShdrRec* str_sh = &shdrs[sh->sh_link]; - if (str_sh->sh_offset + str_sh->sh_size > len) - compiler_panic(c, no_loc(), "read_elf_dso: .dynstr out of range"); - const u8* strtab = data + str_sh->sh_offset; - u64 strtab_sz = str_sh->sh_size; - - ObjBuilder* ob = obj_new(c); - if (!ob) compiler_panic(c, no_loc(), "read_elf_dso: obj_new failed"); - - u32 nsyms = (u32)(sh->sh_size / ELF64_SYM_SIZE); - const u8* base = data + sh->sh_offset; - for (u32 i = 1; i < nsyms; ++i) { /* skip index 0 */ - const u8* p = base + (u64)i * ELF64_SYM_SIZE; - u32 st_name = elf_rd_u32(p + 0); - u8 st_info = p[4]; - u8 st_other = p[5]; - u16 st_shndx = elf_rd_u16(p + 6); - - /* Skip the DSO's own undefined imports — they don't satisfy any - * undef in our consumer. Locals (STB_LOCAL) likewise aren't - * exported and would only confuse the resolver. */ - if (st_shndx == SHN_UNDEF) continue; - u32 e_bind = ELF64_ST_BIND(st_info); - if (e_bind == STB_LOCAL) continue; - - u32 nlen; - const char* nm = strtab_lookup(strtab, strtab_sz, st_name, &nlen); - if (!nlen) continue; - Sym sn = pool_intern_slice(c->global, (Slice){ .s = nm, .len = nlen }); - - u32 e_type_field = ELF64_ST_TYPE(st_info); - u16 bind = elf_bind_to_obj(e_bind); - u16 kind = elf_type_to_kind(e_type_field, st_shndx); - u8 vis = elf_other_to_vis(st_other); - - /* DSO exports land as defined symbols in OBJ_SEC_NONE with - * value=0. The consumer treats them as imports — see - * resolve_undefs in src/link/link_layout.c. */ - { - ObjSymId did = obj_symbol_ex(ob, sn, (SymBind)bind, (SymVis)vis, - (SymKind)kind, OBJ_SEC_NONE, 0, 0, 0); - obj_sym_mark_referenced(ob, did); - } - } - - obj_finalize(ob); - return ob; -} diff --git a/src/obj/elf_reloc_aarch64.c b/src/obj/elf_reloc_aarch64.c @@ -1,182 +0,0 @@ -/* RelocKind <-> AArch64 ELF reloc-type mapping. - * - * Cfree's RelocKind enum is arch-agnostic at its top (R_ABS, R_REL, R_PC - * variants) and arch-specific in its lower entries. On AArch64, R_REL and - * R_PC collapse to ELF_R_AARCH64_PREL32 / ELF_R_AARCH64_PREL64 — both - * mean "PC-relative relative to the symbol" once the linker has resolved - * final addresses. - * - * Returning 0 (ELF_R_AARCH64_NONE) for an unsupported kind is the signal - * to the caller to either panic (emit) or panic (read with diagnostic). */ - -#include "obj/elf.h" - -u32 elf_aarch64_reloc_to(u32 kind /* RelocKind */) { - switch (kind) { - case R_NONE: - return ELF_R_AARCH64_NONE; - case R_ABS64: - return ELF_R_AARCH64_ABS64; - case R_ABS32: - return ELF_R_AARCH64_ABS32; - case R_PC64: - return ELF_R_AARCH64_PREL64; - case R_PC32: - return ELF_R_AARCH64_PREL32; - case R_REL64: - return ELF_R_AARCH64_PREL64; - case R_REL32: - return ELF_R_AARCH64_PREL32; - case R_AARCH64_JUMP26: - return ELF_R_AARCH64_JUMP26; - case R_AARCH64_CALL26: - return ELF_R_AARCH64_CALL26; - case R_AARCH64_CONDBR19: - return ELF_R_AARCH64_CONDBR19; - case R_AARCH64_TSTBR14: - return ELF_R_AARCH64_TSTBR14; - case R_AARCH64_LD_PREL_LO19: - return ELF_R_AARCH64_LD_PREL_LO19; - case R_AARCH64_ADR_PREL_LO21: - return ELF_R_AARCH64_ADR_PREL_LO21; - case R_AARCH64_ADR_PREL_PG_HI21: - return ELF_R_AARCH64_ADR_PREL_PG_HI21; - case R_AARCH64_ADR_PREL_PG_HI21_NC: - return ELF_R_AARCH64_ADR_PREL_PG_HI21_NC; - case R_AARCH64_ADD_ABS_LO12_NC: - return ELF_R_AARCH64_ADD_ABS_LO12_NC; - case R_AARCH64_ABS16: - return ELF_R_AARCH64_ABS16; - case R_AARCH64_PREL16: - return ELF_R_AARCH64_PREL16; - case R_AARCH64_LDST8_ABS_LO12_NC: - return ELF_R_AARCH64_LDST8_ABS_LO12_NC; - case R_AARCH64_LDST16_ABS_LO12_NC: - return ELF_R_AARCH64_LDST16_ABS_LO12_NC; - case R_AARCH64_LDST32_ABS_LO12_NC: - return ELF_R_AARCH64_LDST32_ABS_LO12_NC; - case R_AARCH64_LDST64_ABS_LO12_NC: - return ELF_R_AARCH64_LDST64_ABS_LO12_NC; - case R_AARCH64_LDST128_ABS_LO12_NC: - return ELF_R_AARCH64_LDST128_ABS_LO12_NC; - case R_AARCH64_ADR_GOT_PAGE: - return ELF_R_AARCH64_ADR_GOT_PAGE; - case R_AARCH64_LD64_GOT_LO12_NC: - return ELF_R_AARCH64_LD64_GOT_LO12_NC; - case R_AARCH64_TLSLE_ADD_TPREL_HI12: - return ELF_R_AARCH64_TLSLE_ADD_TPREL_HI12; - case R_AARCH64_TLSLE_ADD_TPREL_LO12: - return ELF_R_AARCH64_TLSLE_ADD_TPREL_LO12; - case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: - return ELF_R_AARCH64_TLSLE_ADD_TPREL_LO12_NC; - case R_AARCH64_TLSLE_LDST8_TPREL_LO12: - return ELF_R_AARCH64_TLSLE_LDST8_TPREL_LO12; - case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC: - return ELF_R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC; - case R_AARCH64_TLSLE_LDST16_TPREL_LO12: - return ELF_R_AARCH64_TLSLE_LDST16_TPREL_LO12; - case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC: - return ELF_R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC; - case R_AARCH64_TLSLE_LDST32_TPREL_LO12: - return ELF_R_AARCH64_TLSLE_LDST32_TPREL_LO12; - case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC: - return ELF_R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC; - case R_AARCH64_TLSLE_LDST64_TPREL_LO12: - return ELF_R_AARCH64_TLSLE_LDST64_TPREL_LO12; - case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC: - return ELF_R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC; - case R_AARCH64_GLOB_DAT: - return ELF_R_AARCH64_GLOB_DAT; - case R_AARCH64_JUMP_SLOT: - return ELF_R_AARCH64_JUMP_SLOT; - case R_AARCH64_RELATIVE: - return ELF_R_AARCH64_RELATIVE; - case R_AARCH64_COPY: - return ELF_R_AARCH64_COPY; - default: - return ELF_R_AARCH64_NONE; - } -} - -u32 elf_aarch64_reloc_from(u32 elf_type) { - switch (elf_type) { - case ELF_R_AARCH64_NONE: - return R_NONE; - case ELF_R_AARCH64_ABS64: - return R_ABS64; - case ELF_R_AARCH64_ABS32: - return R_ABS32; - case ELF_R_AARCH64_PREL64: - return R_PC64; - case ELF_R_AARCH64_PREL32: - return R_PC32; - case ELF_R_AARCH64_JUMP26: - return R_AARCH64_JUMP26; - case ELF_R_AARCH64_CALL26: - return R_AARCH64_CALL26; - case ELF_R_AARCH64_CONDBR19: - return R_AARCH64_CONDBR19; - case ELF_R_AARCH64_TSTBR14: - return R_AARCH64_TSTBR14; - case ELF_R_AARCH64_LD_PREL_LO19: - return R_AARCH64_LD_PREL_LO19; - case ELF_R_AARCH64_ADR_PREL_LO21: - return R_AARCH64_ADR_PREL_LO21; - case ELF_R_AARCH64_ADR_PREL_PG_HI21: - return R_AARCH64_ADR_PREL_PG_HI21; - case ELF_R_AARCH64_ADR_PREL_PG_HI21_NC: - return R_AARCH64_ADR_PREL_PG_HI21_NC; - case ELF_R_AARCH64_ADD_ABS_LO12_NC: - return R_AARCH64_ADD_ABS_LO12_NC; - case ELF_R_AARCH64_ABS16: - return R_AARCH64_ABS16; - case ELF_R_AARCH64_PREL16: - return R_AARCH64_PREL16; - case ELF_R_AARCH64_LDST8_ABS_LO12_NC: - return R_AARCH64_LDST8_ABS_LO12_NC; - case ELF_R_AARCH64_LDST16_ABS_LO12_NC: - return R_AARCH64_LDST16_ABS_LO12_NC; - case ELF_R_AARCH64_LDST32_ABS_LO12_NC: - return R_AARCH64_LDST32_ABS_LO12_NC; - case ELF_R_AARCH64_LDST64_ABS_LO12_NC: - return R_AARCH64_LDST64_ABS_LO12_NC; - case ELF_R_AARCH64_LDST128_ABS_LO12_NC: - return R_AARCH64_LDST128_ABS_LO12_NC; - case ELF_R_AARCH64_ADR_GOT_PAGE: - return R_AARCH64_ADR_GOT_PAGE; - case ELF_R_AARCH64_LD64_GOT_LO12_NC: - return R_AARCH64_LD64_GOT_LO12_NC; - case ELF_R_AARCH64_TLSLE_ADD_TPREL_HI12: - return R_AARCH64_TLSLE_ADD_TPREL_HI12; - case ELF_R_AARCH64_TLSLE_ADD_TPREL_LO12: - return R_AARCH64_TLSLE_ADD_TPREL_LO12; - case ELF_R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: - return R_AARCH64_TLSLE_ADD_TPREL_LO12_NC; - case ELF_R_AARCH64_TLSLE_LDST8_TPREL_LO12: - return R_AARCH64_TLSLE_LDST8_TPREL_LO12; - case ELF_R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC: - return R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC; - case ELF_R_AARCH64_TLSLE_LDST16_TPREL_LO12: - return R_AARCH64_TLSLE_LDST16_TPREL_LO12; - case ELF_R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC: - return R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC; - case ELF_R_AARCH64_TLSLE_LDST32_TPREL_LO12: - return R_AARCH64_TLSLE_LDST32_TPREL_LO12; - case ELF_R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC: - return R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC; - case ELF_R_AARCH64_TLSLE_LDST64_TPREL_LO12: - return R_AARCH64_TLSLE_LDST64_TPREL_LO12; - case ELF_R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC: - return R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC; - case ELF_R_AARCH64_GLOB_DAT: - return R_AARCH64_GLOB_DAT; - case ELF_R_AARCH64_JUMP_SLOT: - return R_AARCH64_JUMP_SLOT; - case ELF_R_AARCH64_RELATIVE: - return R_AARCH64_RELATIVE; - case ELF_R_AARCH64_COPY: - return R_AARCH64_COPY; - default: - return (u32)-1; /* sentinel */ - } -} diff --git a/src/obj/elf_reloc_riscv64.c b/src/obj/elf_reloc_riscv64.c @@ -1,182 +0,0 @@ -/* RelocKind <-> RISC-V ELF reloc-type mapping. - * - * Mirror of elf_reloc_x86_64.c for the RISC-V LP64 ABI. The arch- - * agnostic R_ABS / R_PC RelocKind entries fan out to the native - * RISC-V codes; the RISC-V-specific encodings (HI20/LO12, BRANCH, - * JAL, CALL, PCREL_*, TPREL_*, ADD/SUB/SET, RELAX, ALIGN, RVC_*) - * live in the lower band as R_RV_*. - * - * Returning ELF_R_RISCV_NONE for an unsupported kind is the signal - * to the caller to either panic (emit) or panic (read with diagnostic). */ - -#include "obj/elf.h" - -u32 elf_riscv64_reloc_to(u32 kind /* RelocKind */) { - switch (kind) { - case R_NONE: - return ELF_R_RISCV_NONE; - case R_ABS64: - return ELF_R_RISCV_64; - case R_ABS32: - return ELF_R_RISCV_32; - case R_PC32: - return ELF_R_RISCV_32_PCREL; - case R_RV_HI20: - return ELF_R_RISCV_HI20; - case R_RV_LO12_I: - return ELF_R_RISCV_LO12_I; - case R_RV_LO12_S: - return ELF_R_RISCV_LO12_S; - case R_RV_BRANCH: - return ELF_R_RISCV_BRANCH; - case R_RV_JAL: - return ELF_R_RISCV_JAL; - case R_RV_CALL: - return ELF_R_RISCV_CALL; - case R_PLT32: - return ELF_R_RISCV_CALL_PLT; - case R_RV_PCREL_HI20: - return ELF_R_RISCV_PCREL_HI20; - case R_RV_PCREL_LO12_I: - return ELF_R_RISCV_PCREL_LO12_I; - case R_RV_PCREL_LO12_S: - return ELF_R_RISCV_PCREL_LO12_S; - case R_RV_GOT_HI20: - return ELF_R_RISCV_GOT_HI20; - case R_RV_TLS_GOT_HI20: - return ELF_R_RISCV_TLS_GOT_HI20; - case R_RV_TPREL_HI20: - return ELF_R_RISCV_TPREL_HI20; - case R_RV_TPREL_LO12_I: - return ELF_R_RISCV_TPREL_LO12_I; - case R_RV_TPREL_LO12_S: - return ELF_R_RISCV_TPREL_LO12_S; - case R_RV_TPREL_ADD: - return ELF_R_RISCV_TPREL_ADD; - case R_RV_ADD8: - return ELF_R_RISCV_ADD8; - case R_RV_ADD16: - return ELF_R_RISCV_ADD16; - case R_RV_ADD32: - return ELF_R_RISCV_ADD32; - case R_RV_ADD64: - return ELF_R_RISCV_ADD64; - case R_RV_SUB8: - return ELF_R_RISCV_SUB8; - case R_RV_SUB16: - return ELF_R_RISCV_SUB16; - case R_RV_SUB32: - return ELF_R_RISCV_SUB32; - case R_RV_SUB64: - return ELF_R_RISCV_SUB64; - case R_RV_ALIGN: - return ELF_R_RISCV_ALIGN; - case R_RV_RVC_BRANCH: - return ELF_R_RISCV_RVC_BRANCH; - case R_RV_RVC_JUMP: - return ELF_R_RISCV_RVC_JUMP; - case R_RV_RELAX: - return ELF_R_RISCV_RELAX; - case R_RV_SUB6: - return ELF_R_RISCV_SUB6; - case R_RV_SET6: - return ELF_R_RISCV_SET6; - case R_RV_SET8: - return ELF_R_RISCV_SET8; - case R_RV_SET16: - return ELF_R_RISCV_SET16; - case R_RV_SET32: - return ELF_R_RISCV_SET32; - case R_RV_SET_ULEB128: - return ELF_R_RISCV_SET_ULEB128; - case R_RV_SUB_ULEB128: - return ELF_R_RISCV_SUB_ULEB128; - default: - return ELF_R_RISCV_NONE; - } -} - -u32 elf_riscv64_reloc_from(u32 elf_type) { - switch (elf_type) { - case ELF_R_RISCV_NONE: - return R_NONE; - case ELF_R_RISCV_64: - return R_ABS64; - case ELF_R_RISCV_32: - return R_ABS32; - case ELF_R_RISCV_32_PCREL: - return R_PC32; - case ELF_R_RISCV_HI20: - return R_RV_HI20; - case ELF_R_RISCV_LO12_I: - return R_RV_LO12_I; - case ELF_R_RISCV_LO12_S: - return R_RV_LO12_S; - case ELF_R_RISCV_BRANCH: - return R_RV_BRANCH; - case ELF_R_RISCV_JAL: - return R_RV_JAL; - case ELF_R_RISCV_CALL: - return R_RV_CALL; - case ELF_R_RISCV_CALL_PLT: - return R_PLT32; - case ELF_R_RISCV_PCREL_HI20: - return R_RV_PCREL_HI20; - case ELF_R_RISCV_PCREL_LO12_I: - return R_RV_PCREL_LO12_I; - case ELF_R_RISCV_PCREL_LO12_S: - return R_RV_PCREL_LO12_S; - case ELF_R_RISCV_GOT_HI20: - return R_RV_GOT_HI20; - case ELF_R_RISCV_TLS_GOT_HI20: - return R_RV_TLS_GOT_HI20; - case ELF_R_RISCV_TPREL_HI20: - return R_RV_TPREL_HI20; - case ELF_R_RISCV_TPREL_LO12_I: - return R_RV_TPREL_LO12_I; - case ELF_R_RISCV_TPREL_LO12_S: - return R_RV_TPREL_LO12_S; - case ELF_R_RISCV_TPREL_ADD: - return R_RV_TPREL_ADD; - case ELF_R_RISCV_ADD8: - return R_RV_ADD8; - case ELF_R_RISCV_ADD16: - return R_RV_ADD16; - case ELF_R_RISCV_ADD32: - return R_RV_ADD32; - case ELF_R_RISCV_ADD64: - return R_RV_ADD64; - case ELF_R_RISCV_SUB8: - return R_RV_SUB8; - case ELF_R_RISCV_SUB16: - return R_RV_SUB16; - case ELF_R_RISCV_SUB32: - return R_RV_SUB32; - case ELF_R_RISCV_SUB64: - return R_RV_SUB64; - case ELF_R_RISCV_ALIGN: - return R_RV_ALIGN; - case ELF_R_RISCV_RVC_BRANCH: - return R_RV_RVC_BRANCH; - case ELF_R_RISCV_RVC_JUMP: - return R_RV_RVC_JUMP; - case ELF_R_RISCV_RELAX: - return R_RV_RELAX; - case ELF_R_RISCV_SUB6: - return R_RV_SUB6; - case ELF_R_RISCV_SET6: - return R_RV_SET6; - case ELF_R_RISCV_SET8: - return R_RV_SET8; - case ELF_R_RISCV_SET16: - return R_RV_SET16; - case ELF_R_RISCV_SET32: - return R_RV_SET32; - case ELF_R_RISCV_SET_ULEB128: - return R_RV_SET_ULEB128; - case ELF_R_RISCV_SUB_ULEB128: - return R_RV_SUB_ULEB128; - default: - return (u32)-1; /* sentinel */ - } -} diff --git a/src/obj/elf_reloc_x86_64.c b/src/obj/elf_reloc_x86_64.c @@ -1,134 +0,0 @@ -/* RelocKind <-> x86_64 ELF reloc-type mapping. - * - * Mirror of elf_reloc_aarch64.c for the x86_64 SysV ABI. The arch- - * agnostic R_ABS / R_PC / R_REL RelocKind entries fan out to the - * native x86_64 codes; the x86_64-only encodings (R_X64_PC8, PLT32, - * GOTPCREL, dynamic-only entries) live in the lower band. - * - * Returning ELF_R_X86_64_NONE for an unsupported kind is the signal - * to the caller to either panic (emit) or panic (read with diagnostic). */ - -#include "obj/elf.h" - -u32 elf_x86_64_reloc_to(u32 kind /* RelocKind */) { - switch (kind) { - case R_NONE: - return ELF_R_X86_64_NONE; - case R_ABS64: - return ELF_R_X86_64_64; - case R_ABS32: - return ELF_R_X86_64_32; - case R_X64_32S: - return ELF_R_X86_64_32S; - case R_PC32: - return ELF_R_X86_64_PC32; - case R_PC64: - return ELF_R_X86_64_PC64; - case R_REL32: - return ELF_R_X86_64_PC32; - case R_REL64: - return ELF_R_X86_64_PC64; - case R_X64_PC8: - return ELF_R_X86_64_PC8; - case R_PLT32: - case R_X64_PLT32: - return ELF_R_X86_64_PLT32; - case R_GOT32: - return ELF_R_X86_64_GOT32; - case R_X64_GOTPCREL: - return ELF_R_X86_64_GOTPCREL; - case R_X64_GOTPCRELX: - return ELF_R_X86_64_GOTPCRELX; - case R_X64_REX_GOTPCRELX: - return ELF_R_X86_64_REX_GOTPCRELX; - case R_X64_GOTPC32: - return ELF_R_X86_64_GOTPC32; - case R_X64_GOTOFF64: - return ELF_R_X86_64_GOTOFF64; - case R_X64_TPOFF32: - return ELF_R_X86_64_TPOFF32; - case R_X64_TPOFF64: - return ELF_R_X86_64_TPOFF64; - case R_X64_DTPOFF32: - return ELF_R_X86_64_DTPOFF32; - case R_X64_DTPMOD64: - return ELF_R_X86_64_DTPMOD64; - case R_X64_DTPOFF64: - return ELF_R_X86_64_DTPOFF64; - case R_X64_TLSGD: - return ELF_R_X86_64_TLSGD; - case R_X64_TLSLD: - return ELF_R_X86_64_TLSLD; - case R_X64_GOTTPOFF: - return ELF_R_X86_64_GOTTPOFF; - case R_X64_GLOB_DAT: - return ELF_R_X86_64_GLOB_DAT; - case R_X64_JUMP_SLOT: - return ELF_R_X86_64_JUMP_SLOT; - case R_X64_RELATIVE: - return ELF_R_X86_64_RELATIVE; - case R_X64_COPY: - return ELF_R_X86_64_COPY; - default: - return ELF_R_X86_64_NONE; - } -} - -u32 elf_x86_64_reloc_from(u32 elf_type) { - switch (elf_type) { - case ELF_R_X86_64_NONE: - return R_NONE; - case ELF_R_X86_64_64: - return R_ABS64; - case ELF_R_X86_64_32: - return R_ABS32; - case ELF_R_X86_64_32S: - return R_X64_32S; - case ELF_R_X86_64_PC32: - return R_PC32; - case ELF_R_X86_64_PC64: - return R_PC64; - case ELF_R_X86_64_PC8: - return R_X64_PC8; - case ELF_R_X86_64_PLT32: - return R_X64_PLT32; - case ELF_R_X86_64_GOT32: - return R_GOT32; - case ELF_R_X86_64_GOTPCREL: - return R_X64_GOTPCREL; - case ELF_R_X86_64_GOTPCRELX: - return R_X64_GOTPCRELX; - case ELF_R_X86_64_REX_GOTPCRELX: - return R_X64_REX_GOTPCRELX; - case ELF_R_X86_64_GOTPC32: - return R_X64_GOTPC32; - case ELF_R_X86_64_GOTOFF64: - return R_X64_GOTOFF64; - case ELF_R_X86_64_TPOFF32: - return R_X64_TPOFF32; - case ELF_R_X86_64_TPOFF64: - return R_X64_TPOFF64; - case ELF_R_X86_64_DTPOFF32: - return R_X64_DTPOFF32; - case ELF_R_X86_64_DTPMOD64: - return R_X64_DTPMOD64; - case ELF_R_X86_64_DTPOFF64: - return R_X64_DTPOFF64; - case ELF_R_X86_64_TLSGD: - return R_X64_TLSGD; - case ELF_R_X86_64_TLSLD: - return R_X64_TLSLD; - case ELF_R_X86_64_GOTTPOFF: - return R_X64_GOTTPOFF; - case ELF_R_X86_64_GLOB_DAT: - return R_X64_GLOB_DAT; - case ELF_R_X86_64_JUMP_SLOT: - return R_X64_JUMP_SLOT; - case ELF_R_X86_64_RELATIVE: - return R_X64_RELATIVE; - case ELF_R_X86_64_COPY: - return R_X64_COPY; - default: - return (u32)-1; /* sentinel */ - } -} diff --git a/src/obj/format.h b/src/obj/format.h @@ -1,13 +1,13 @@ #ifndef CFREE_OBJ_FORMAT_H #define CFREE_OBJ_FORMAT_H -#include <stddef.h> - #include <cfree/object.h> +#include <stddef.h> #include "core/core.h" typedef struct LinkImage LinkImage; +typedef struct Linker Linker; typedef ObjBuilder* (*ObjFormatReadFn)(Compiler*, const char* name, const u8* data, size_t len); @@ -16,6 +16,45 @@ typedef ObjBuilder* (*ObjFormatReadDsoFn)(Compiler*, const char* name, Sym* soname_out); typedef void (*ObjFormatEmitFn)(Compiler*, ObjBuilder*, Writer*); typedef void (*ObjFormatLinkEmitFn)(LinkImage*, Writer*); +typedef void (*ObjFormatLayoutDynFn)(Linker*, LinkImage*); +typedef void (*ObjFormatFreeDynFn)(LinkImage*); +typedef void (*ObjFormatMachoStubFn)(u8* dst, u64 stub_vaddr, + u64 got_slot_vaddr); +typedef void (*ObjFormatCoffStubFn)(u8* dst, u64 stub_vaddr, + u64 iat_slot_vaddr); + +typedef struct ObjElfArchOps { + CfreeArchKind arch; + u32 e_machine; + u32 e_flags; + const char* default_musl_interp; + u32 r_relative; + u32 r_glob_dat; + u32 r_jump_slot; + u32 (*reloc_to)(u32 kind); + u32 (*reloc_from)(u32 wire_type); +} ObjElfArchOps; + +typedef struct ObjMachoArchOps { + CfreeArchKind arch; + u32 cputype; + u32 cpusubtype; + u32 stub_size; + ObjFormatMachoStubFn emit_stub; + u32 (*reloc_to)(u32 kind); + u32 (*reloc_pcrel)(u32 kind); + u32 (*reloc_length)(u32 kind); + u32 (*reloc_from)(u32 wire_type); +} ObjMachoArchOps; + +typedef struct ObjCoffArchOps { + CfreeArchKind arch; + u16 machine; + u32 stub_size; + ObjFormatCoffStubFn emit_iat_stub; + u32 (*reloc_to)(u32 kind); + u32 (*reloc_from)(u32 wire_type); +} ObjCoffArchOps; typedef enum ObjFormatArchiveAction { OBJ_FORMAT_ARCHIVE_KEEP = 0, @@ -49,6 +88,15 @@ typedef struct ObjFormatImpl { ObjFormatReadFn read; ObjFormatReadDsoFn read_dso; ObjFormatLinkEmitFn link_emit; + ObjFormatLayoutDynFn layout_dyn; + ObjFormatFreeDynFn free_dyn; + + const ObjElfArchOps* (*elf_arch)(CfreeArchKind); + const ObjElfArchOps* (*elf_machine)(u32 e_machine); + const ObjMachoArchOps* (*macho_arch)(CfreeArchKind); + const ObjMachoArchOps* (*macho_cputype)(u32 cputype); + const ObjCoffArchOps* (*coff_arch)(CfreeArchKind); + const ObjCoffArchOps* (*coff_machine)(u16 machine); /* Optional format-specific linker ingestion policy. */ int (*classify_obj_input)(Compiler*, ObjBuilder*, Sym* soname_out); diff --git a/src/obj/macho/emit.c b/src/obj/macho/emit.c @@ -0,0 +1,797 @@ +/* Mach-O MH_OBJECT writer. Walks a finalized ObjBuilder and emits a + * 64-bit little-endian relocatable object via the supplied Writer. + * + * Layout strategy (MH_OBJECT — everything in one anonymous segment): + * 1. plan Mach-O sections (one per non-symtab/strtab/rela ObjSection), + * mapping cfree section names to (segname, sectname) pairs; + * 2. partition ObjSyms into local / extdef / undef and assign final + * indices for LC_DYSYMTAB; + * 3. build per-section relocation tables via the per-arch translator + * (only aarch64 is wired today); + * 4. assign file offsets sequentially: header, load commands, section + * bytes, relocation tables, symbol table, string table; + * 5. write header → load commands → section bytes → relocs → symtab + * → strtab. + * + * 64-bit little-endian only. Big-endian / 32-bit panics at entry. + * + * Round-trip invariant: read_macho of + * this output must produce an ObjBuilder shape-equivalent to the input, + * modulo (a) Mach-O's mandatory (segname, sectname) pairing and (b) + * any synthesized N_SECT symbols. The (segname,sectname) form chosen + * here is the canonical post-roundtrip shape — read_macho stores the + * comma-joined "__SEG,__sect" form in Section.name so a re-emit + * produces the same bytes. */ + +#include <string.h> + +#include "core/arena.h" +#include "core/buf.h" +#include "core/bytes.h" +#include "core/heap.h" +#include "core/pool.h" +#include "core/slice.h" +#include "core/util.h" +#include "obj/format.h" +#include "obj/macho/macho.h" + +static SrcLoc no_loc(void) { + SrcLoc l = {0, 0, 0}; + return l; +} + +/* ---- LE writer helpers (Writer-based) ---- */ + +static void wr_u32(Writer* w, u32 v) { + u8 b[4]; + wr_u32_le(b, v); + cfree_writer_write(w, b, 4); +} + +static void wr_u64(Writer* w, u64 v) { + u8 b[8]; + wr_u64_le(b, v); + cfree_writer_write(w, b, 8); +} + +static void wr_name16(Writer* w, const char* s, u32 len) { + /* Mach-O section/segment names are 16-byte zero-padded fields. Names + * longer than 16 are truncated; the on-disk format leaves no room for + * a longer encoding. */ + u8 buf[16]; + u32 n = len > 16 ? 16 : len; + memcpy(buf, s, n); + if (n < 16) memset(buf + n, 0, 16 - n); + cfree_writer_write(w, buf, 16); +} + +/* ---- (segname,sectname) derivation ---- */ + +/* Split a cfree section name into Mach-O (segname, sectname) pair. + * If `name` contains a comma, it is treated as already in + * "__SEG,__sect" form and split at the first comma. Otherwise we + * derive the pair from SecKind, ignoring `name` (the input was an + * ELF-shaped name like ".text" or ".rodata"). */ +typedef struct MSegSect { + char segname[16]; + char sectname[16]; + u32 seg_len; + u32 sect_len; +} MSegSect; + +static void copy_fixed16(char* dst, u32* len_out, const char* src, + u32 src_len) { + u32 n = src_len > 16 ? 16 : src_len; + memcpy(dst, src, n); + if (n < 16) memset(dst + n, 0, 16 - n); + *len_out = n; +} + +static void name_to_seg_sect(const char* name, u32 nlen, u16 sec_kind, + MSegSect* out) { + /* Comma-separated form: take prefix as segname, suffix as sectname. */ + for (u32 i = 0; i < nlen; ++i) { + if (name[i] == ',') { + copy_fixed16(out->segname, &out->seg_len, name, i); + copy_fixed16(out->sectname, &out->sect_len, name + i + 1, nlen - i - 1); + return; + } + } + + /* Not comma-separated. Derive from SecKind; ignore `name`. */ + const char* seg; + const char* sect; + switch (sec_kind) { + case SEC_TEXT: + seg = "__TEXT"; + sect = "__text"; + break; + case SEC_RODATA: + seg = "__TEXT"; + sect = "__const"; + break; + case SEC_DATA: + seg = "__DATA"; + sect = "__data"; + break; + case SEC_BSS: + seg = "__DATA"; + sect = "__bss"; + break; + case SEC_DEBUG: + seg = "__DWARF"; + /* Strip a leading `.` from the input name (".debug_info" → + * "__debug_info") so the dwarf section names round-trip. */ + sect = (nlen && name[0] == '.') ? name + 1 : name; + copy_fixed16(out->segname, &out->seg_len, seg, + (u32)slice_from_cstr(seg).len); + copy_fixed16(out->sectname, &out->sect_len, sect, + (u32)((nlen && name[0] == '.') ? nlen - 1 : nlen)); + return; + default: + seg = "__DATA"; + sect = "__data"; + break; + } + copy_fixed16(out->segname, &out->seg_len, seg, (u32)slice_from_cstr(seg).len); + copy_fixed16(out->sectname, &out->sect_len, sect, + (u32)slice_from_cstr(sect).len); +} + +/* ---- per-section plan ---- */ + +typedef struct MSec { + MSegSect ns; + u64 addr; /* assigned vmaddr within the segment */ + u64 size; /* bytes (or bss size) */ + u32 fileoff; /* 0 for zerofill */ + u32 align; /* power-of-two; stored as log2 in section_64.align */ + u32 reloff; /* 0 if no relocs */ + u32 nreloc; + u32 flags; /* S_TYPE | S_ATTR_* */ + u32 entsize; + u32 obj_sec; /* originating ObjSecId */ + int is_zerofill; + const Buf* obj_bytes; /* NULL when zerofill */ + u8* relocs; /* arena-allocated; nreloc * 8 bytes */ +} MSec; + +static u32 log2_align(u32 a) { + u32 r = 0; + while ((1u << r) < a) ++r; + return r; +} + +static u32 section_flags_for(u16 sec_kind, u16 sec_flags, const char* sectname, + u32 sect_len) { + u32 f = 0; + if (sec_kind == SEC_TEXT || (sec_flags & SF_EXEC)) { + f |= S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS; + } + if (sec_flags & SF_TLS) { + /* Mach-O distinguishes three TLV section types by sectname: + * __thread_data → S_THREAD_LOCAL_REGULAR (initial data) + * __thread_bss → S_THREAD_LOCAL_ZEROFILL (zero-init data) + * __thread_vars → S_THREAD_LOCAL_VARIABLES (descriptor records) + * dyld dispatches its TLV-bootstrap pass off the S_TYPE; the + * S_ATTR_* bits don't carry TLV semantics so we just emit the type. */ + if (sect_len >= 13 && memcmp(sectname, "__thread_vars", 13) == 0) + return S_THREAD_LOCAL_VARIABLES; + if (sec_kind == SEC_BSS) return S_THREAD_LOCAL_ZEROFILL; + return S_THREAD_LOCAL_REGULAR; + } + if (sec_kind == SEC_BSS || + (sect_len >= 5 && memcmp(sectname, "__bss", 5) == 0)) { + f |= S_ZEROFILL; + } + if (sec_flags & SF_STRINGS) { + f = (f & ~SECTION_TYPE) | S_CSTRING_LITERALS; + } + /* Default S_REGULAR (0) for all others. */ + return f; +} + +/* ---- symbol partition ---- */ + +typedef struct MSym { + ObjSymId obj_id; + u32 strx; /* offset in string table */ + u8 n_type; + u8 n_sect; + u16 n_desc; + u64 n_value; +} MSym; + +static int sym_is_undef(const ObjSym* s) { + return s->section_id == OBJ_SEC_NONE && s->kind != SK_ABS && + s->kind != SK_COMMON; +} + +static int sym_is_extdef(const ObjSym* s) { + if (sym_is_undef(s)) return 0; + return s->bind == SB_GLOBAL || s->bind == SB_WEAK; +} + +/* ---- string table ---- + * + * Mach-O strtab: leading zero byte at offset 0 represents the empty + * string. Entries are NUL-terminated; we don't dedupe (small symbol + * counts in v1; matches the simplest llvm output). The "_" prefix on + * C symbols is added inline in the writer below. */ + +void emit_macho(Compiler* c, ObjBuilder* ob, Writer* w) { + Heap* h = (Heap*)c->ctx->heap; + + /* Tombstone sweep first — strip/objcopy mutations and the historical + * UNDEF prune are both expressed via Section.removed / ObjSym.removed + * post-sweep. See obj_sweep_dead. */ + obj_sweep_dead(ob); + + /* ---- target validation ---------------------------------------- */ + const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_MACHO); + const ObjMachoArchOps* macho = + fmt && fmt->macho_arch ? fmt->macho_arch(c->target.arch) : NULL; + u32 cputype, cpusubtype; + u32 (*reloc_to)(u32); + u32 (*reloc_pcrel)(u32); + u32 (*reloc_length)(u32); + if (!macho || !macho->reloc_to || !macho->reloc_pcrel || + !macho->reloc_length) { + compiler_panic(c, no_loc(), "emit_macho: unsupported target arch %u", + (u32)c->target.arch); + } + cputype = macho->cputype; + cpusubtype = macho->cpusubtype; + reloc_to = macho->reloc_to; + reloc_pcrel = macho->reloc_pcrel; + reloc_length = macho->reloc_length; + if (c->target.big_endian) { + compiler_panic(c, no_loc(), "emit_macho: big-endian not supported"); + } + if (c->target.ptr_size != 8) { + compiler_panic(c, no_loc(), "emit_macho: ptr_size %u (expected 8)", + (u32)c->target.ptr_size); + } + + /* ---- pass 1: plan Mach-O sections ----------------------------- */ + u32 nobjsec = obj_section_count(ob); + MSec* secs = arena_zarray(c->scratch, MSec, nobjsec ? nobjsec : 1); + u32* obj_to_msec = arena_zarray(c->scratch, u32, nobjsec ? nobjsec : 1); + u32 nsecs = 0; + + for (u32 i = 1; i < nobjsec; ++i) { + const Section* s = obj_section_get(ob, i); + if (s->removed) continue; /* see obj_sweep_dead */ + /* Skip ELF-style synthetic sections that read_elf would have + * filtered: SYMTAB / STRTAB / RELA / GROUP have no Mach-O + * representation as data sections. */ + if (s->sem == SSEM_SYMTAB || s->sem == SSEM_STRTAB || s->sem == SSEM_RELA || + s->sem == SSEM_REL || s->sem == SSEM_GROUP) { + continue; + } + Slice nm_s = pool_slice(c->global, s->name); + const char* nm = nm_s.s; + size_t nlen = nm_s.len; + MSec* m = &secs[nsecs]; + name_to_seg_sect(nm ? nm : "", (u32)nlen, s->kind, &m->ns); + m->obj_sec = i; + m->align = s->align ? s->align : 1; + m->entsize = s->entsize; + /* Mach-O reader stashes the raw section.flags (S_TYPE | S_ATTR_*) + * in Section.ext_type when reading a Mach-O input. Use it + * verbatim so attribute bits like S_ATTR_NO_DEAD_STRIP / + * S_ATTR_LIVE_SUPPORT round-trip. Fall back to the kind-derived + * default for sections originating from non-Mach-O readers (e.g. + * cfree codegen). */ + if (s->ext_kind == OBJ_EXT_MACHO && s->ext_type) { + m->flags = s->ext_type; + } else { + m->flags = + section_flags_for(s->kind, s->flags, m->ns.sectname, m->ns.sect_len); + } + if (s->sem == SSEM_NOBITS || s->kind == SEC_BSS) { + m->is_zerofill = 1; + m->size = s->bss_size; + m->obj_bytes = NULL; + /* Preserve S_THREAD_LOCAL_ZEROFILL when SF_TLS routed us there; + * a regular BSS section gets the plain S_ZEROFILL type. */ + u32 stype = m->flags & SECTION_TYPE; + if (stype != S_THREAD_LOCAL_ZEROFILL) + m->flags = (m->flags & ~SECTION_TYPE) | S_ZEROFILL; + } else { + m->is_zerofill = 0; + m->size = s->bytes.total; + m->obj_bytes = &s->bytes; + } + obj_to_msec[i] = nsecs + 1; /* 1-based: matches Mach-O n_sect. */ + nsecs++; + } + + /* ---- pass 2: assign vmaddrs (segment-relative) and per-section + * flat-layout addresses. MH_OBJECT keeps everything in + * one segment with vmaddr=0; section addr fields are + * relative offsets within the segment. + * + * Two-pass to match the conventional Mach-O `MH_OBJECT` layout: + * non-zerofill sections come first in vmaddr order, then zerofill + * sections at the tail. Apple `as` and clang `-c` both lay out + * this way, and roundtripping must reproduce it so symbol n_values + * (which are segment-relative addresses) compare equal. */ + u64 cur_addr = 0; + for (u32 i = 0; i < nsecs; ++i) { + MSec* m = &secs[i]; + if (m->is_zerofill) continue; + cur_addr = ALIGN_UP(cur_addr, (u64)m->align); + m->addr = cur_addr; + cur_addr += m->size; + } + for (u32 i = 0; i < nsecs; ++i) { + MSec* m = &secs[i]; + if (!m->is_zerofill) continue; + cur_addr = ALIGN_UP(cur_addr, (u64)m->align); + m->addr = cur_addr; + cur_addr += m->size; + } + u64 segment_vmsize = cur_addr; + + /* ---- pass 3: partition symbols (locals, extdefs, undefs) ------ */ + u32 nobjsym = 0; + { + ObjSymIter* it = obj_symiter_new(ob); + ObjSymEntry e; + while (obj_symiter_next(it, &e)) ++nobjsym; + obj_symiter_free(it); + } + + MSym* msyms = arena_zarray(c->scratch, MSym, nobjsym + 1); + u32 nmsyms = 0; + u32* sym_obj_to_macho = + arena_zarray(c->scratch, u32, nobjsym + 2); /* obj_id -> mach idx */ + + Buf strtab; + buf_init(&strtab, h); + /* Mach-O strtab convention: the first byte is " " (space) or NUL — + * llvm/Apple emit a single NUL. We start with NUL for offset 0. */ + { + u8 z = 0; + buf_write(&strtab, &z, 1); + } + + /* Emit in three passes so n_type/sect ordering matches LC_DYSYMTAB + * (locals, then extdefs, then undefs). */ + for (int pass = 0; pass < 3; ++pass) { + ObjSymIter* it = obj_symiter_new(ob); + ObjSymEntry e; + while (obj_symiter_next(it, &e)) { + const ObjSym* s = e.sym; + if (s->removed) continue; /* spurious-UNDEF prune + explicit removal */ + int undef = sym_is_undef(s); + int extdef = sym_is_extdef(s); + int local = !undef && !extdef; + int want = + (pass == 0 && local) || (pass == 1 && extdef) || (pass == 2 && undef); + if (!want) continue; + MSym* ms = &msyms[nmsyms]; + ms->obj_id = e.id; + + Slice nm_s = pool_slice(c->global, s->name); + const char* nm = nm_s.s; + size_t nlen = nm_s.len; + /* Mach-O symbol names are stored on disk verbatim — including + * the leading `_` Apple toolchains use for C-source-level + * symbols ("_main" for `int main()`). cfree treats the prefix + * as part of the on-disk name, not a transform applied at emit. + * Name-canonicalization for API callers (cfree_jit_lookup, + * link_set_entry) lives one layer up at the linker boundary + * (link.c), so emit/read stay byte-for-byte stable. */ + if (nlen && nm) { + u32 off = buf_pos(&strtab); + buf_write(&strtab, nm, nlen); + u8 z = 0; + buf_write(&strtab, &z, 1); + ms->strx = off; + } else { + ms->strx = 0; + } + + u8 type = 0; + if (extdef) type |= N_EXT; + if (s->vis == SV_HIDDEN || s->vis == SV_INTERNAL) { + /* Mach-O encodes hidden externals as N_PEXT|N_EXT. */ + type |= N_PEXT; + } + u8 n_sect = NO_SECT; + u16 n_desc = 0; + u64 value = s->value; + + if (undef) { + type |= N_UNDF; + /* Undefined symbols with non-LOCAL bind are external references + * (the common case — every `extern int x;`). Setting N_EXT + * matches what clang emits and what Apple `ld` expects. */ + if (s->bind == SB_GLOBAL || s->bind == SB_WEAK) type |= N_EXT; + if (s->bind == SB_WEAK) n_desc |= N_WEAK_REF; + value = 0; + } else if (s->kind == SK_ABS) { + type |= N_ABS; + } else if (s->kind == SK_COMMON) { + /* Mach-O common symbols are N_UNDF|N_EXT with n_value=size and + * n_desc carrying log2(align) in the GET_COMM_ALIGN bits. */ + type = N_UNDF | N_EXT; + value = s->size; + u32 a = s->common_align ? (u32)s->common_align : 1; + n_desc = (u16)(log2_align(a) << 8); /* GET_COMM_ALIGN field */ + } else { + type |= N_SECT; + u32 ms_idx = (s->section_id < nobjsec) ? obj_to_msec[s->section_id] : 0; + n_sect = (u8)ms_idx; + if (n_sect && n_sect <= nsecs) { + value = secs[n_sect - 1].addr + s->value; + } + if (s->bind == SB_WEAK) n_desc |= N_WEAK_DEF; + } + + /* OR in any pass-through n_desc bits the reader stashed in + * sym->flags (N_NO_DEAD_STRIP, etc.). The bits we already + * compute (N_WEAK_DEF / N_WEAK_REF and the common-alignment + * field) are already excluded by read_macho before stashing, + * so a plain OR can't double-count. */ + n_desc |= s->flags; + + ms->n_type = type; + ms->n_sect = n_sect; + ms->n_desc = n_desc; + ms->n_value = value; + + sym_obj_to_macho[e.id] = nmsyms + 1; /* 1-based index, 0 = none. */ + nmsyms++; + } + obj_symiter_free(it); + } + + u32 nlocals = 0, nextdefs = 0, nundefs = 0; + for (u32 i = 0; i < nmsyms; ++i) { + u8 t = msyms[i].n_type; + u8 ext = (t & N_EXT) != 0; + u8 typ = (u8)(t & N_TYPE); + if (typ == N_UNDF && ext) { + /* Could be undef or common — common has nonzero n_value. */ + if (msyms[i].n_value != 0) + ++nextdefs; /* common is conventionally extdef-shaped */ + else + ++nundefs; + } else if (ext) { + ++nextdefs; + } else { + ++nlocals; + } + } + /* Re-derive without the common fudge by counting partition pass: we + * already wrote them in (locals,extdefs,undefs) order, so the prefix + * counts are just the per-pass counts. Mirror the spurious-UNDEF + * prune from the emit loop above so the LC_DYSYMTAB index counts + * line up with the symbols we actually wrote. */ + nlocals = 0; + nextdefs = 0; + nundefs = 0; + { + ObjSymIter* it = obj_symiter_new(ob); + ObjSymEntry e; + while (obj_symiter_next(it, &e)) { + const ObjSym* s = e.sym; + if (s->removed) continue; + int undef = sym_is_undef(s); + if (undef) + ++nundefs; + else if (sym_is_extdef(s)) + ++nextdefs; + else + ++nlocals; + } + obj_symiter_free(it); + } + + /* ---- pass 4: build per-section relocation tables -------------- */ + u32 total_relocs = obj_reloc_total(ob); + for (u32 i = 0; i < nsecs; ++i) { + MSec* m = &secs[i]; + u32 nr = obj_reloc_count(ob, m->obj_sec); + if (!nr) continue; + /* Worst case: each reloc may be preceded by an ARM64_RELOC_ADDEND + * pair entry. We size the buffer for that upper bound. */ + u8* buf = (u8*)arena_alloc(c->scratch, (size_t)MACHO_RELOC_SIZE * nr * 2, + _Alignof(u32)); + u32 j = 0; + for (u32 ri = 0; ri < total_relocs; ++ri) { + const Reloc* r = obj_reloc_at(ob, ri); + if (r->removed) continue; + if (r->section_id != m->obj_sec) continue; + if ((r->kind == R_RV_ADD8 || r->kind == R_RV_ADD16 || + r->kind == R_RV_ADD32 || r->kind == R_RV_ADD64) && + ri + 1u < total_relocs) { + const Reloc* sub = obj_reloc_at(ob, ri + 1u); + int paired = sub && sub->section_id == r->section_id && + sub->offset == r->offset && + ((r->kind == R_RV_ADD8 && sub->kind == R_RV_SUB8) || + (r->kind == R_RV_ADD16 && sub->kind == R_RV_SUB16) || + (r->kind == R_RV_ADD32 && sub->kind == R_RV_SUB32) || + (r->kind == R_RV_ADD64 && sub->kind == R_RV_SUB64)); + if (paired) { + u32 length = (r->kind == R_RV_ADD64) ? 3u + : (r->kind == R_RV_ADD32) ? 2u + : (r->kind == R_RV_ADD16) ? 1u + : 0u; + u32 add_idx; + u32 sub_idx; + u32 sub_type = c->target.arch == CFREE_ARCH_ARM_64 + ? ARM64_RELOC_SUBTRACTOR + : X86_64_RELOC_SUBTRACTOR; + u32 unsigned_type = c->target.arch == CFREE_ARCH_ARM_64 + ? ARM64_RELOC_UNSIGNED + : X86_64_RELOC_UNSIGNED; + if (r->sym == OBJ_SYM_NONE || sub->sym == OBJ_SYM_NONE) { + compiler_panic(c, no_loc(), + "emit_macho: symdiff reloc without symbol"); + } + add_idx = sym_obj_to_macho[r->sym]; + sub_idx = sym_obj_to_macho[sub->sym]; + if (add_idx == 0 || sub_idx == 0) { + compiler_panic(c, no_loc(), + "emit_macho: symdiff reloc target not in symtab"); + } + { + u8* slot = buf + (size_t)j * MACHO_RELOC_SIZE; + wr_u32_le(slot + 0, (u32)r->offset); + wr_u32_le(slot + 4, ((sub_idx - 1u) & 0x00ffffffu) | + (length << 25) | (1u << 27) | + ((sub_type & 0xfu) << 28)); + ++j; + } + { + u8* slot = buf + (size_t)j * MACHO_RELOC_SIZE; + wr_u32_le(slot + 0, (u32)r->offset); + wr_u32_le(slot + 4, ((add_idx - 1u) & 0x00ffffffu) | + (length << 25) | (1u << 27) | + ((unsigned_type & 0xfu) << 28)); + ++j; + } + ++ri; + continue; + } + } + u32 mtype = reloc_to(r->kind); + if (mtype == (u32)-1) { + compiler_panic(c, no_loc(), + "emit_macho: unsupported reloc kind %u for arch %u", + (u32)r->kind, (u32)c->target.arch); + } + u32 pcrel = reloc_pcrel(r->kind); + u32 length = reloc_length(r->kind); + + /* Resolve target — extern always 1 in our model (every Reloc has + * an ObjSymId). Skip relocs without a symbol — they would map to + * a section-relative reloc which the v1 cgtarget never emits. */ + if (r->sym == OBJ_SYM_NONE) { + compiler_panic(c, no_loc(), + "emit_macho: reloc without symbol not supported " + "(sec=%u offset=%u kind=%u)", + (u32)r->section_id, (u32)r->offset, (u32)r->kind); + } + u32 mach_sym_idx = sym_obj_to_macho[r->sym]; + if (mach_sym_idx == 0) { + compiler_panic(c, no_loc(), + "emit_macho: reloc target sym %u not in symtab", + (u32)r->sym); + } + u32 r_symbolnum = mach_sym_idx - 1; /* Mach-O uses 0-based. */ + + /* Non-zero addend: emit a leading ARM64_RELOC_ADDEND pair (only + * meaningful for non-UNSIGNED types — UNSIGNED carries the addend + * inline in the patched bytes). */ + if (r->addend != 0 && mtype != ARM64_RELOC_UNSIGNED) { + u8* slot = buf + (size_t)j * MACHO_RELOC_SIZE; + wr_u32_le(slot + 0, (u32)r->offset); + u32 packed = ((u32)(i64)r->addend & 0x00ffffffu) | (0u << 24) | + (length << 25) | (1u << 27) /*extern*/ | + (ARM64_RELOC_ADDEND << 28); + wr_u32_le(slot + 4, packed); + ++j; + } + + u8* slot = buf + (size_t)j * MACHO_RELOC_SIZE; + wr_u32_le(slot + 0, (u32)r->offset); + u32 packed = (r_symbolnum & 0x00ffffffu) | ((pcrel & 1u) << 24) | + ((length & 3u) << 25) | (1u << 27) /*extern*/ | + ((mtype & 0xfu) << 28); + wr_u32_le(slot + 4, packed); + ++j; + } + m->relocs = buf; + m->nreloc = j; + } + + /* ---- pass 5: assign file offsets ------------------------------ */ + /* Layout after the load-command block: + * section bytes (in order, respecting align) + * relocation tables (per section, 4-aligned) + * symbol table (8-aligned) + * string table */ + u32 nload_cmds = + 4; /* LC_SEGMENT_64 + LC_BUILD_VERSION + LC_SYMTAB + LC_DYSYMTAB */ + u32 segcmd_size = MACHO_SEGCMD64_SIZE + nsecs * MACHO_SECT64_SIZE; + u32 build_version_size = + 24; /* fixed: cmd+cmdsize+platform+minos+sdk+ntools(0) */ + u32 sizeofcmds = segcmd_size + build_version_size + MACHO_SYMTAB_CMD_SIZE + + MACHO_DYSYMTAB_CMD_SIZE; + + u64 cur = MACHO_HDR64_SIZE + sizeofcmds; + u32 fileoff_first = (u32)cur; + for (u32 i = 0; i < nsecs; ++i) { + MSec* m = &secs[i]; + if (m->is_zerofill) { + m->fileoff = 0; + continue; + } + cur = ALIGN_UP(cur, (u64)m->align); + m->fileoff = (u32)cur; + cur += m->size; + } + + /* Reloc tables. */ + for (u32 i = 0; i < nsecs; ++i) { + MSec* m = &secs[i]; + if (!m->nreloc) { + m->reloff = 0; + continue; + } + cur = ALIGN_UP(cur, (u64)4); + m->reloff = (u32)cur; + cur += (u64)m->nreloc * MACHO_RELOC_SIZE; + } + + cur = ALIGN_UP(cur, (u64)8); + u64 symoff = cur; + cur += (u64)nmsyms * MACHO_NLIST64_SIZE; + u64 stroff = cur; + u32 strtab_size = buf_pos(&strtab); + cur += strtab_size; + + /* ---- pass 6: write the file ------------------------------------ */ + cfree_writer_seek(w, 0); + + /* mach_header_64 */ + wr_u32(w, MH_MAGIC_64); + wr_u32(w, cputype); + wr_u32(w, cpusubtype); + wr_u32(w, MH_OBJECT); + wr_u32(w, nload_cmds); + wr_u32(w, sizeofcmds); + wr_u32(w, 0); /* flags — MH_OBJECT carries none in v1 */ + wr_u32(w, 0); /* reserved */ + + /* LC_SEGMENT_64 (anonymous, contains everything) */ + wr_u32(w, LC_SEGMENT_64); + wr_u32(w, segcmd_size); + wr_name16(w, "", 0); /* segname: empty for MH_OBJECT */ + wr_u64(w, 0); /* vmaddr */ + wr_u64(w, segment_vmsize); /* vmsize */ + wr_u64(w, fileoff_first); /* fileoff */ + /* filesize = bytes covered by non-zerofill sections (post-section + * file offset minus the start). */ + u64 filesize = 0; + for (u32 i = 0; i < nsecs; ++i) { + MSec* m = &secs[i]; + if (m->is_zerofill) continue; + u64 end = (u64)m->fileoff + m->size; + u64 begin = m->fileoff; + if (end > filesize + fileoff_first) filesize = end - fileoff_first; + (void)begin; + } + wr_u64(w, filesize); + /* maxprot/initprot — VM_PROT_READ|WRITE|EXECUTE = 7 for object segs. */ + wr_u32(w, 7); + wr_u32(w, 7); + wr_u32(w, nsecs); + wr_u32(w, 0); /* flags */ + + /* sections inline within the segment command */ + for (u32 i = 0; i < nsecs; ++i) { + MSec* m = &secs[i]; + wr_name16(w, m->ns.sectname, m->ns.sect_len); + wr_name16(w, m->ns.segname, m->ns.seg_len); + wr_u64(w, m->addr); + wr_u64(w, m->size); + wr_u32(w, m->fileoff); + wr_u32(w, log2_align(m->align)); + wr_u32(w, m->reloff); + wr_u32(w, m->nreloc); + wr_u32(w, m->flags); + wr_u32(w, 0); /* reserved1 */ + wr_u32(w, m->entsize); /* reserved2 */ + wr_u32(w, 0); /* reserved3 */ + } + + /* LC_BUILD_VERSION — platform=PLATFORM_MACOS(1), minos/sdk=14.0.0, + * ntools=0. The exact min-version isn't load-bearing for MH_OBJECT, + * but Apple's `ld` warns when it's missing. */ + wr_u32(w, LC_BUILD_VERSION); + wr_u32(w, build_version_size); + wr_u32(w, 1); /* platform: PLATFORM_MACOS */ + wr_u32(w, (14u << 16) | 0); /* minos: 14.0.0 */ + wr_u32(w, (14u << 16) | 0); /* sdk: 14.0.0 */ + wr_u32(w, 0); /* ntools */ + + /* LC_SYMTAB */ + wr_u32(w, LC_SYMTAB); + wr_u32(w, MACHO_SYMTAB_CMD_SIZE); + wr_u32(w, (u32)symoff); + wr_u32(w, nmsyms); + wr_u32(w, (u32)stroff); + wr_u32(w, strtab_size); + + /* LC_DYSYMTAB */ + wr_u32(w, LC_DYSYMTAB); + wr_u32(w, MACHO_DYSYMTAB_CMD_SIZE); + wr_u32(w, 0); /* ilocalsym */ + wr_u32(w, nlocals); + wr_u32(w, nlocals); + wr_u32(w, nextdefs); + wr_u32(w, nlocals + nextdefs); + wr_u32(w, nundefs); + wr_u32(w, 0); + wr_u32(w, 0); /* tocoff, ntoc */ + wr_u32(w, 0); + wr_u32(w, 0); /* modtaboff, nmodtab */ + wr_u32(w, 0); + wr_u32(w, 0); /* extrefsymoff, nextrefsyms */ + wr_u32(w, 0); + wr_u32(w, 0); /* indirectsymoff, nindirectsyms */ + wr_u32(w, 0); + wr_u32(w, 0); /* extreloff, nextrel */ + wr_u32(w, 0); + wr_u32(w, 0); /* locreloff, nlocrel */ + + /* section bytes */ + for (u32 i = 0; i < nsecs; ++i) { + MSec* m = &secs[i]; + if (m->is_zerofill || !m->size) continue; + cfree_writer_seek(w, m->fileoff); + if (m->obj_bytes) { + u32 sz = m->obj_bytes->total; + u8* tmp = (u8*)h->alloc(h, sz ? sz : 1, 1); + if (sz) buf_flatten(m->obj_bytes, tmp); + cfree_writer_write(w, tmp, sz); + h->free(h, tmp, sz ? sz : 1); + } + } + + /* reloc tables */ + for (u32 i = 0; i < nsecs; ++i) { + MSec* m = &secs[i]; + if (!m->nreloc) continue; + cfree_writer_seek(w, m->reloff); + cfree_writer_write(w, m->relocs, (size_t)m->nreloc * MACHO_RELOC_SIZE); + } + + /* symtab */ + cfree_writer_seek(w, symoff); + for (u32 i = 0; i < nmsyms; ++i) { + const MSym* ms = &msyms[i]; + u8 entry[MACHO_NLIST64_SIZE]; + wr_u32_le(entry + 0, ms->strx); + entry[4] = ms->n_type; + entry[5] = ms->n_sect; + wr_u16_le(entry + 6, ms->n_desc); + wr_u64_le(entry + 8, ms->n_value); + cfree_writer_write(w, entry, MACHO_NLIST64_SIZE); + } + + /* strtab */ + { + u8* flat = (u8*)arena_alloc(c->scratch, strtab_size ? strtab_size : 1, 1); + if (strtab_size) buf_flatten(&strtab, flat); + cfree_writer_seek(w, stroff); + cfree_writer_write(w, flat, strtab_size); + } + buf_fini(&strtab); +} diff --git a/src/obj/macho/link.c b/src/obj/macho/link.c @@ -0,0 +1,2613 @@ +/* link_emit_macho — write a dyld-loadable arm64 MH_EXECUTE. + * + * Mach-O peer of link_emit_elf. Produces a position-independent + * MH_EXECUTE that links against libSystem.B.dylib (or any other + * dylib/.tbd input) via LC_LOAD_DYLIB + LC_DYLD_CHAINED_FIXUPS. The + * binary is ad-hoc codesigned at the tail so the kernel will exec it + * on macOS 11+. + * + * Layout (Apple's stock arm64 layout): + * + * __PAGEZERO vmaddr 0, vmsize 0x100000000, no file bytes + * __TEXT (R-X) + * mach_header_64 + * load commands + * [SF_EXEC sections — .text] + * [SF_ALLOC R-only sections — .rodata, init/fini_array, etc.] + * __stubs (12B per import-func) + * __DATA_CONST (RW initially, dyld marks R-only after fixups) + * __got (8B per import — both data and func imports) + * __DATA (R-W) + * [SF_WRITE sections — .data, .bss] + * __LINKEDIT (R) + * dyld_chained_fixups blob + * dyld_exports_trie blob + * function starts (empty) + * data in code (empty) + * symtab + * indirect symbol table (one entry per __stubs and __got slot) + * strtab + * code signature + * + * Imports are routed: + * CALL26/JUMP26 against an imported function -> __stubs entry + * GOT_LOAD_PAGE21/PAGEOFF12 against any import -> __got slot + * ABS64 against an imported symbol -> chained-bind at site + * ABS64 against a defined internal symbol -> chained-rebase at site + * + * arm64-only. x86_64-macos arrives with x64 codegen. */ + +#include "link/link.h" + +#include <string.h> + +#include "core/bytes.h" +#include "core/heap.h" +#include "core/pool.h" +#include "core/sha256.h" +#include "core/slice.h" +#include "core/util.h" +#include "core/vec.h" +#include "link/link_arch.h" +#include "link/link_internal.h" +#include "obj/format.h" +#include "obj/macho/macho.h" + +static SrcLoc no_loc(void) { + SrcLoc l = {0, 0, 0}; + return l; +} + +/* ---- constants ---- */ +#define MZ_PAGEZERO 0x100000000ULL +#define MZ_PAGE 0x4000ULL +#define MZ_GOT_SIZE 8u +/* __DATA,__thread_ptrs slot size — one pointer per unique TLV referenced + * via TLVP_LOAD_PAGE21/PAGEOFF12. Each slot holds the address of the + * matching TLV descriptor in __DATA,__thread_vars. */ +#define MZ_TLVP_SIZE 8u + +#define DYLD_CHAINED_PTR_64 2u +#define DYLD_CHAINED_IMPORT 1u + +#define VM_PROT_READ 0x1u +#define VM_PROT_WRITE 0x2u +#define VM_PROT_EXECUTE 0x4u + +#define CS_MAGIC_EMBEDDED_SIGNATURE 0xfade0cc0u +#define CS_MAGIC_CODEDIRECTORY 0xfade0c02u +#define CSSLOT_CODEDIRECTORY 0u +#define CS_HASHTYPE_SHA256 2u +#define CS_SHA256_LEN SHA256_DIGEST_LEN +#define CS_PAGE_SIZE_LOG2 12u +#define CS_EXECSEG_MAIN_BINARY 1u + +/* extra LC ids */ +#define LC_DYLD_INFO_ONLY (0x22u | 0x80000000u) +#define LC_FUNCTION_STARTS_C 0x26u +#define LC_DATA_IN_CODE_C 0x29u +#define LC_CODE_SIGNATURE_C 0x1du + +/* ---- byte buffer ---- */ + +typedef struct MByte { + Heap* heap; + u8* data; + u32 len; + u32 cap; +} MByte; + +static void mbuf_init(MByte* b, Heap* h) { + b->heap = h; + b->data = NULL; + b->len = 0; + b->cap = 0; +} +static void mbuf_fini(MByte* b) { + if (b->data) b->heap->free(b->heap, b->data, b->cap); + b->data = NULL; + b->cap = b->len = 0; +} +static void mbuf_reserve(MByte* b, u32 need) { + if (need <= b->cap) return; + (void)VEC_GROW(b->heap, b->data, b->cap, need); +} +static u32 mbuf_align(MByte* b, u32 a) { + u32 n = (u32)ALIGN_UP((u64)b->len, (u64)a); + if (n > b->len) { + mbuf_reserve(b, n); + memset(b->data + b->len, 0, n - b->len); + b->len = n; + } + return b->len; +} +static u32 mbuf_append(MByte* b, const void* src, u32 n) { + u32 off = b->len; + mbuf_reserve(b, b->len + n); + if (n) memcpy(b->data + b->len, src, n); + b->len += n; + return off; +} +static u32 mbuf_u32(MByte* b, u32 v) { + u8 t[4]; + wr_u32_le(t, v); + return mbuf_append(b, t, 4); +} +static u32 mbuf_u16(MByte* b, u16 v) { + u8 t[2]; + wr_u16_le(t, v); + return mbuf_append(b, t, 2); +} +static u32 mbuf_u64(MByte* b, u64 v) { + u8 t[8]; + wr_u64_le(t, v); + return mbuf_append(b, t, 8); +} +static u32 mbuf_u8(MByte* b, u8 v) { return mbuf_append(b, &v, 1); } +static u32 mbuf_str(MByte* b, const char* s, u32 n) { + u32 off = b->len; + mbuf_reserve(b, b->len + n + 1u); + if (n) memcpy(b->data + b->len, s, n); + b->data[b->len + n] = 0; + b->len += n + 1u; + return off; +} + +/* ---- imports + dylibs ---- */ + +typedef struct MachImp { + LinkSymId sym; + Sym name; + u32 dylib_ord; /* 1-based ordinal into LC_LOAD_DYLIB list */ + u32 stub_idx; /* 1-based index into __stubs (0 if data import) */ + u32 got_idx; /* 1-based index into __got */ + u32 imports_strx; /* offset into chained-fixups symbol pool */ + u8 is_func; + u8 weak; + /* internal=1 means this entry is an in-image symbol that's referenced + * via GOT_LOAD_PAGE21 / LD64_GOT_LO12_NC (clang emits these for any + * extern global so a single static-link can later become PIC). The + * GOT slot stores the symbol's image-relative vaddr and gets a + * chained-fixup rebase entry (or no entry at all for a weak-undef + * resolving to NULL). No dylib_ord / stub_idx / chained-fixup bind. */ + u8 internal; + u8 pad[1]; + u64 internal_vaddr; /* image-relative target vaddr; meaningful only when + internal=1 */ +} MachImp; + +typedef struct MachDylib { + Sym install; +} MachDylib; + +/* One slot in the synthetic __DATA,__thread_ptrs section per unique TLV + * descriptor referenced via TLVP_LOAD_PAGE21/PAGEOFF12. Modeled after + * MachImp's internal-GOT entries: the slot holds the descriptor address + * (REBASE for internal-to-image descriptors, BIND for dylib-imported + * ones). The descriptor itself is laid out in __DATA,__thread_vars by + * either the input objects (internal) or the providing dylib (imported). */ +typedef struct MachTlv { + LinkSymId sym; /* canonical descriptor LinkSymId */ + u32 tlv_idx; /* 1-based slot index in __thread_ptrs */ + u8 imported; /* 1 == descriptor lives in a dylib (BIND), 0 == internal + (REBASE) */ + u8 pad[3]; + u32 import_idx; /* 1-based MachImp index when imported (for chained-bind + ordinal) */ +} MachTlv; + +/* ---- planned section ---- */ + +typedef struct MSec { + /* Source: either a LinkSection (link_sec_id != 0) or a synthetic + * pre-built byte buffer (data + size). */ + LinkSectionId link_sec_id; + const u8* synth_data; + u32 synth_size; + /* Mach-O placement */ + const char* segname; + const char* sectname; + /* Inline storage for segname/sectname when split from a Mach-O + * `__SEG,__sect`-form LinkSection name. Names from string literals + * (synth sections, derived-from-flags defaults) point at .rodata + * and don't use these. 16 bytes matches the on-disk field width. */ + char segname_buf[16]; + char sectname_buf[16]; + u64 vaddr; + u64 file_offset; + u64 size; + u32 align; + u32 flags; /* S_TYPE | S_ATTR_* */ + u32 reserved1; + u32 reserved2; + u8 segidx; /* 1=__TEXT, 2=__DATA_CONST, 3=__DATA */ + u8 is_zerofill; + u8 pad[6]; +} MSec; + +static void msec_repair_name_ptrs(MSec* m) { + if (m->segname_buf[0]) m->segname = m->segname_buf; + if (m->sectname_buf[0]) m->sectname = m->sectname_buf; +} + +typedef struct MSeg { + const char* name; + u32 maxprot; + u32 initprot; + u64 vmaddr; + u64 vmsize; + u64 fileoff; + u64 filesize; + u32 nsects; /* MSec count in segment — internal layout */ + u32 first_sec; /* first index into MSec[] */ + u32 nouts; /* OutSec count in segment — what hits the file */ + u32 first_out; /* first index into OutSec[] */ +} MSeg; + +/* On-disk section view: one record per (segname, sectname) within a + * segment. Mach-O requires this — emitting one section_64 per input + * MSec yields sibling __TEXT,__text records that violate the spec. + * Built from MSec[] after vaddr placement; reloc-apply still uses + * MSec[] for byte-buffer addressing. */ +typedef struct OutSec { + const char* segname; + const char* sectname; + u64 vaddr; + u64 file_offset; + u64 size; + u32 align; + u32 flags; + u32 reserved1; + u32 reserved2; + u8 segidx; + u8 is_zerofill; +} OutSec; + +/* ---- main context ---- */ + +typedef struct MCtx { + LinkImage* img; + Compiler* c; + Heap* h; + Writer* w; + Linker* linker; + const LinkArchDesc* link_arch; + const ObjMachoArchOps* macho; + + /* imports */ + MachImp* imports; + u32 nimports; + u32 nimports_real; /* count of imports with internal=0 (== prefix length; + * collect_imports appends internal=1 entries last) */ + u32 nimport_funcs; + MachDylib* dylibs; + u32 ndylibs; + /* sym->import index, 1-based, 0 = not an import. Sized to LinkSymId space + * + 1. */ + u32* sym_to_imp; + u32 sym_to_imp_size; + + /* sections + segments */ + MSec* secs; + u32 nsecs; + OutSec* outs; + u32 nouts; + MSeg segs[5]; /* PAGEZERO, TEXT, DATA_CONST, DATA, LINKEDIT */ + u32 nsegs; + + /* Synthetic byte buffers, owned. */ + u8* stubs_bytes; + u32 stubs_size; + u8* got_bytes; + u32 got_size; + /* TLV pointer slots — one entry in __DATA,__thread_ptrs per unique + * descriptor referenced via TLVP_LOAD_PAGE21/PAGEOFF12. sym_to_tlv + * maps LinkSymId → 1-based slot index (parallel to sym_to_imp). Slot + * bytes are populated at apply_relocs time once shift_sections has + * pinned descriptor vaddrs. */ + MachTlv* tlv_slots; + u32 ntlv; + u32* sym_to_tlv; + u32 sym_to_tlv_size; + u8* tlv_ptrs_bytes; + u32 tlv_ptrs_size; + u64 tlv_ptrs_vaddr; + /* Vaddr of the first thread-local-storage section + * (__thread_data / __thread_bss). Each TLV descriptor's word 2 + * stores the symbol's offset within this image rather than an + * absolute address — see apply_relocs's S_THREAD_LOCAL_VARIABLES + * ABS64 special case. */ + u64 tls_image_vaddr; + u8 has_tls_image; + + /* Final layout (computed during plan) */ + u64 text_vaddr; + u64 text_filesz; + u64 stubs_vaddr; + u64 got_vaddr; + u64 data_const_vaddr; + u64 data_vaddr; + u64 data_const_filesz; + u64 data_filesz; + u64 data_memsz; + u64 linkedit_vaddr; + u64 linkedit_fileoff; + u32 entry_offset; /* offset of entry within __TEXT segment */ + + u64 headers_size; /* header + loadcmds */ + + /* LINKEDIT contents */ + MByte chained_fixups; + MByte exports_trie; + MByte symtab; /* binary nlist_64 array */ + MByte strtab; + MByte indirect; /* u32 array */ + MByte fn_starts; + MByte data_in_code; + MByte codesig; + + u32 chained_fixups_off; + u32 exports_trie_off; + u32 fn_starts_off; + u32 data_in_code_off; + u32 symtab_off; + u32 indirect_off; + u32 strtab_off; + u32 codesig_off; + u32 codesig_size; + u32 nsyms; + + u8 uuid[16]; +} MCtx; + +/* ---- helpers for finding LinkSymbol vaddr ---- */ + +static LinkSymbol* sym_at(LinkImage* img, LinkSymId id) { + if (id == LINK_SYM_NONE || id > LinkSyms_count(&img->syms)) return NULL; + return LinkSyms_at(&img->syms, id - 1); +} + +/* ---- pass: collect imports ---- */ + +static u32 dylib_ordinal_of(MCtx* x, Sym install) { + for (u32 j = 0; j < x->ndylibs; ++j) + if (x->dylibs[j].install == install) return j + 1u; + return 0; +} + +static void collect_imports(MCtx* x) { + LinkImage* img = x->img; + Heap* h = x->h; + + x->sym_to_imp_size = LinkSyms_count(&img->syms) + 1u; + x->sym_to_imp = + (u32*)h->alloc(h, sizeof(u32) * x->sym_to_imp_size, _Alignof(u32)); + if (!x->sym_to_imp) + compiler_panic(x->c, no_loc(), "link_macho: oom on sym_to_imp"); + memset(x->sym_to_imp, 0, sizeof(u32) * x->sym_to_imp_size); + + u32 cap = 0, cap_d = 0; + for (u32 i = 0; i < LinkSyms_count(&img->syms); ++i) { + LinkSymbol* s = LinkSyms_at(&img->syms, i); + if (!s->imported) continue; + if (s->name == 0) continue; + LinkSymId canon = symhash_get(&img->globals, s->name); + if (canon != LINK_SYM_NONE && canon != s->id) continue; + if (VEC_GROW(h, x->imports, cap, x->nimports + 1u)) + compiler_panic(x->c, no_loc(), "link_macho: oom on imports"); + MachImp* mi = &x->imports[x->nimports++]; + memset(mi, 0, sizeof(*mi)); + mi->sym = s->id; + mi->name = s->name; + mi->is_func = (s->kind == SK_FUNC || s->kind == SK_IFUNC) ? 1 : 0; + mi->weak = (s->bind == SB_WEAK) ? 1 : 0; + x->sym_to_imp[s->id] = x->nimports; + } + + /* Back-classify: any CALL26/JUMP26 reloc target -> function. */ + for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) { + LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); + if (!x->link_arch->is_branch_reloc || + !x->link_arch->is_branch_reloc(r->kind)) + continue; + if (r->target == LINK_SYM_NONE || r->target >= x->sym_to_imp_size) continue; + u32 idx = x->sym_to_imp[r->target]; + if (!idx) { + /* Resolve through canonical. */ + LinkSymbol* tgt = LinkSyms_at(&img->syms, r->target - 1); + if (tgt->name == 0) continue; + LinkSymId canon = symhash_get(&img->globals, tgt->name); + if (canon == LINK_SYM_NONE || canon >= x->sym_to_imp_size) continue; + idx = x->sym_to_imp[canon]; + if (!idx) continue; + /* Stash so future lookups skip this loop. */ + x->sym_to_imp[r->target] = idx; + } + x->imports[idx - 1].is_func = 1; + } + + /* Build dylib ordinal table. Pull soname from the providing DSO. */ + for (u32 i = 0; i < x->nimports; ++i) { + MachImp* mi = &x->imports[i]; + LinkSymbol* s = sym_at(img, mi->sym); + LinkInputId dso_id = s ? s->dso_input_id : LINK_INPUT_NONE; + Sym install = 0; + if (dso_id != LINK_INPUT_NONE && x->linker && + dso_id - 1u < LinkInputs_count(&x->linker->inputs)) { + LinkInput* in = LinkInputs_at(&x->linker->inputs, dso_id - 1u); + if (in->kind == LINK_INPUT_DSO_BYTES) install = in->soname; + } + if (install == 0) + install = pool_intern_slice(x->c->global, + SLICE_LIT("/usr/lib/libSystem.B.dylib")); + u32 ord = dylib_ordinal_of(x, install); + if (!ord) { + if (VEC_GROW(h, x->dylibs, cap_d, x->ndylibs + 1u)) + compiler_panic(x->c, no_loc(), "link_macho: oom on dylibs"); + x->dylibs[x->ndylibs].install = install; + ++x->ndylibs; + ord = x->ndylibs; + } + mi->dylib_ord = ord; + } + + /* Always include every DSO input's install-name. */ + if (x->linker) { + for (u32 ii = 0; ii < LinkInputs_count(&x->linker->inputs); ++ii) { + LinkInput* in = LinkInputs_at(&x->linker->inputs, ii); + if (in->kind != LINK_INPUT_DSO_BYTES) continue; + if (in->soname == 0) continue; + if (dylib_ordinal_of(x, in->soname)) continue; + if (VEC_GROW(h, x->dylibs, cap_d, x->ndylibs + 1u)) + compiler_panic(x->c, no_loc(), "link_macho: oom on dylibs"); + x->dylibs[x->ndylibs].install = in->soname; + ++x->ndylibs; + } + } + + /* All entries so far are real imports; remember the partition point + * so import/symtab table emit loops can skip the appended internals. */ + x->nimports_real = x->nimports; + + /* Internal GOT pass. clang on Mach-O routes every extern-global + * reference through the GOT (GOT_LOAD_PAGE21 / LD64_GOT_LO12_NC), so + * even a common symbol or weak-undef that ends up resolved within the + * image still needs a __got slot. For each such reloc whose target + * isn't an existing import, materialize a MachImp with internal=1. + * The slot's contents are filled at write time and a chained-fixup + * REBASE entry (or none, for weak undef → NULL) keeps it valid + * post-ASLR. */ + for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) { + LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); + if (!x->link_arch->is_got_load_reloc || + !x->link_arch->is_got_load_reloc(r->kind)) + continue; + if (r->target == LINK_SYM_NONE || r->target >= x->sym_to_imp_size) continue; + if (x->sym_to_imp[r->target]) continue; + LinkSymbol* t = sym_at(img, r->target); + if (!t) continue; + /* Resolve through canonical so we share a single slot per symbol. */ + LinkSymId canon = r->target; + if (t->name != 0) { + LinkSymId hit = symhash_get(&img->globals, t->name); + if (hit != LINK_SYM_NONE) { + canon = hit; + if (x->sym_to_imp[canon]) { + x->sym_to_imp[r->target] = x->sym_to_imp[canon]; + continue; + } + t = sym_at(img, canon); + if (!t) continue; + } + } + if (VEC_GROW(h, x->imports, cap, x->nimports + 1u)) + compiler_panic(x->c, no_loc(), "link_macho: oom on internal got"); + MachImp* mi = &x->imports[x->nimports++]; + memset(mi, 0, sizeof(*mi)); + mi->sym = canon; + mi->name = t->name; + mi->is_func = (t->kind == SK_FUNC || t->kind == SK_IFUNC) ? 1 : 0; + mi->weak = (t->bind == SB_WEAK) ? 1 : 0; + mi->internal = 1; + /* internal_vaddr is read fresh from the LinkSymbol when the slot + * gets initialized — collect_imports runs before shift_sections + * rebases section vaddrs to Mach-O layout, so capturing here would + * be stale by the time __got bytes are written. */ + mi->internal_vaddr = 0; + x->sym_to_imp[canon] = x->nimports; + if (canon != r->target) x->sym_to_imp[r->target] = x->nimports; + } + + /* Assign stub_idx + got_idx. Internal entries get a slot but no stub: + * the call site (CALL26) on internal funcs goes direct, not via stub. */ + u32 stub_run = 0; + for (u32 i = 0; i < x->nimports; ++i) { + MachImp* mi = &x->imports[i]; + mi->got_idx = i + 1u; + if (mi->is_func && !mi->internal) mi->stub_idx = ++stub_run; + } + x->nimport_funcs = stub_run; +} + +/* ---- pass: collect TLV pointer slots ---- + * + * Mirror of collect_imports' internal-GOT pass, but for TLV descriptors: + * each unique descriptor referenced via ARM64_RELOC_TLVP_LOAD_PAGE21 / + * PAGEOFF12 gets one slot in the synthetic __DATA,__thread_ptrs section. + * The slot's runtime value is the descriptor's address; we patch it at + * apply_relocs time (REBASE for in-image descriptors, BIND for ones in + * a dylib). + * + * Slots are deduplicated by canonical LinkSymId so a single descriptor + * referenced from N call sites shares one __thread_ptrs entry. */ +static void collect_tlv(MCtx* x) { + LinkImage* img = x->img; + Heap* h = x->h; + x->sym_to_tlv_size = LinkSyms_count(&img->syms) + 1u; + x->sym_to_tlv = + (u32*)h->alloc(h, sizeof(u32) * x->sym_to_tlv_size, _Alignof(u32)); + if (!x->sym_to_tlv) + compiler_panic(x->c, no_loc(), "link_macho: oom on sym_to_tlv"); + memset(x->sym_to_tlv, 0, sizeof(u32) * x->sym_to_tlv_size); + + u32 cap = 0; + for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) { + LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); + if (!x->link_arch->is_tlvp_reloc || !x->link_arch->is_tlvp_reloc(r->kind)) + continue; + if (r->target == LINK_SYM_NONE || r->target >= x->sym_to_tlv_size) continue; + /* Resolve through canonical so multiple per-input duplicate undefs + * collapse onto one __thread_ptrs slot. */ + LinkSymId canon = r->target; + LinkSymbol* t = sym_at(img, r->target); + if (!t) continue; + if (t->name != 0) { + LinkSymId hit = symhash_get(&img->globals, t->name); + if (hit != LINK_SYM_NONE) { + canon = hit; + t = sym_at(img, canon); + if (!t) continue; + } + } + if (x->sym_to_tlv[canon]) { + if (canon != r->target) x->sym_to_tlv[r->target] = x->sym_to_tlv[canon]; + continue; + } + if (VEC_GROW(h, x->tlv_slots, cap, x->ntlv + 1u)) + compiler_panic(x->c, no_loc(), "link_macho: oom on tlv_slots"); + MachTlv* ts = &x->tlv_slots[x->ntlv++]; + memset(ts, 0, sizeof(*ts)); + ts->sym = canon; + ts->tlv_idx = x->ntlv; + ts->imported = t->imported ? 1u : 0u; + /* If the descriptor is imported we route the bind through the + * symbol's MachImp slot — that's where dyld's chained-import index + * comes from. When this loop fires the imp pass has already + * materialized the entry (real imports were processed first); the + * lookup may also have stashed an alias for non-canonical ids. */ + if (ts->imported) { + u32 idx = (canon < x->sym_to_imp_size) ? x->sym_to_imp[canon] : 0u; + if (!idx && t->name != 0) { + LinkSymId hit2 = symhash_get(&img->globals, t->name); + if (hit2 != LINK_SYM_NONE && hit2 < x->sym_to_imp_size) + idx = x->sym_to_imp[hit2]; + } + ts->import_idx = idx; + } + x->sym_to_tlv[canon] = x->ntlv; + if (canon != r->target) x->sym_to_tlv[r->target] = x->ntlv; + } +} + +/* ---- pass: plan Mach-O sections ---- + * + * Walks LinkImage sections. Each non-zero-size LinkSection becomes one + * MSec. Synthetic __stubs and __got are appended at the right segment + * boundaries. Vaddr and file_offset are assigned in a single forward + * pass starting at __TEXT base; __PAGEZERO and __LINKEDIT are special. */ + +static void seg_init(MSeg* s, const char* name, u32 maxp, u32 initp) { + memset(s, 0, sizeof(*s)); + s->name = name; + s->maxprot = maxp; + s->initprot = initp; +} + +static int sec_is_writable(const LinkSection* ls) { + return (ls->flags & SF_WRITE) != 0u; +} +static int sec_is_exec(const LinkSection* ls) { + return (ls->flags & SF_EXEC) != 0u; +} +static int sec_is_zerofill(const LinkSection* ls) { + return ls->sem == SSEM_NOBITS; +} + +static int section_has_abs64_reloc(const LinkImage* img, LinkSectionId id) { + for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) { + const LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); + if (r->link_section_id == id && r->kind == R_ABS64) return 1; + } + return 0; +} + +static int sec_needs_data_const(const LinkImage* img, const LinkSection* ls) { + if (!ls || !ls->size || sec_is_exec(ls) || sec_is_writable(ls) || + sec_is_zerofill(ls)) { + return 0; + } + return section_has_abs64_reloc(img, ls->id); +} + +/* Pick (segname, sectname) for a LinkSection. Comma-form Mach-O names + * round-trip into MSec's inline 16-byte buffers; literal defaults point + * at .rodata strings. Caller passes the MSec for per-section storage — + * a previous version used a shared static buffer which aliased all + * sections to whichever name was set last. */ +static void pick_macho_names(const LinkSection* ls, Compiler* c, MSec* m) { + Slice nm_s = pool_slice(c->global, ls->name); + const char* nm = nm_s.s; + size_t nlen = nm_s.len; + if (nm) { + /* Comma-form: "__SEG,__sect" round-tripped from a Mach-O input. */ + for (size_t i = 0; i < nlen; ++i) { + if (nm[i] == ',') { + u32 seg_n = (u32)(i > 15 ? 15 : i); + memcpy(m->segname_buf, nm, seg_n); + m->segname_buf[seg_n] = 0; + u32 sect_n = (u32)((nlen - i - 1) > 15 ? 15 : (nlen - i - 1)); + memcpy(m->sectname_buf, nm + i + 1, sect_n); + m->sectname_buf[sect_n] = 0; + m->segname = m->segname_buf; + m->sectname = m->sectname_buf; + return; + } + } + } + /* Derive from flags. */ + if (sec_is_exec(ls)) { + m->segname = "__TEXT"; + m->sectname = "__text"; + } else if (sec_is_writable(ls)) { + m->segname = "__DATA"; + m->sectname = sec_is_zerofill(ls) ? "__bss" : "__data"; + } else { + m->segname = "__TEXT"; + m->sectname = "__const"; + } +} + +static void plan_layout(MCtx* x) { + LinkImage* img = x->img; + Heap* h = x->h; + + /* PAGEZERO */ + seg_init(&x->segs[0], "__PAGEZERO", 0, 0); + x->segs[0].vmaddr = 0; + x->segs[0].vmsize = MZ_PAGEZERO; + x->segs[0].fileoff = 0; + x->segs[0].filesize = 0; + x->segs[0].nsects = 0; + x->segs[0].first_sec = 0; + + /* Segments 1..4 */ + seg_init(&x->segs[1], "__TEXT", VM_PROT_READ | VM_PROT_EXECUTE, + VM_PROT_READ | VM_PROT_EXECUTE); + seg_init(&x->segs[2], "__DATA_CONST", VM_PROT_READ | VM_PROT_WRITE, + VM_PROT_READ | VM_PROT_WRITE); + seg_init(&x->segs[3], "__DATA", VM_PROT_READ | VM_PROT_WRITE, + VM_PROT_READ | VM_PROT_WRITE); + seg_init(&x->segs[4], "__LINKEDIT", VM_PROT_READ, VM_PROT_READ); + x->nsegs = 5; + + /* Pre-allocate MSec capacity: every LinkSection + 2 synth (__stubs, + * __got). (LinkSections from the dynamic-link layer — .dynsym / .plt + * etc. — were synthesized by layout_dyn for ELF; we won't have them + * since pie wasn't set on this Linker. Still, oversize by a few.) */ + u32 cap = LinkRelocs_count(&img->relocs) + img->nsections + 4u; + x->secs = (MSec*)h->alloc(h, sizeof(MSec) * cap, _Alignof(MSec)); + if (!x->secs) compiler_panic(x->c, no_loc(), "link_macho: oom on MSec"); + memset(x->secs, 0, sizeof(MSec) * cap); + x->nsecs = 0; + + /* Pass 1: __TEXT segment. Header + loadcmds reserve front. */ + /* We need the exact header_size to set first sec's file_offset. We'll + * compute it later, but reserve a placeholder; for now use 0 and patch + * in pass 4 (offsets get bumped). */ + + u64 text_vaddr = MZ_PAGEZERO; + /* We'll compute headers_size after plan; stash starting vaddr only. */ + x->segs[1].vmaddr = text_vaddr; + x->segs[1].fileoff = 0; + x->text_vaddr = text_vaddr; + + /* Collect: (a) exec sections, (b) read-only allocatable sections. */ + /* (cursor advances per-segment in pass 2; nothing to track here) */ + + /* We don't know the header size yet; walk sections first to enumerate + * MSec entries, then back-fill file_offset/vaddr after we know the + * load-command count. */ + + u32 first_text_sec = x->nsecs; + + for (u32 i = 0; i < img->nsections; ++i) { + LinkSection* ls = &img->sections[i]; + if (!ls->size) continue; + if (sec_is_writable(ls)) continue; + if (sec_is_zerofill(ls)) continue; /* placed in __DATA */ + if (sec_needs_data_const(img, ls)) continue; + MSec* m = &x->secs[x->nsecs++]; + memset(m, 0, sizeof(*m)); + m->link_sec_id = ls->id; + pick_macho_names(ls, x->c, m); + /* Force into __TEXT. */ + if (!slice_eq_cstr(slice_from_cstr(m->segname), "__TEXT")) + m->segname = "__TEXT"; + m->align = ls->align ? ls->align : 1u; + m->size = ls->size; + m->segidx = 1; + m->flags = sec_is_exec(ls) ? (0x80000000u /*S_ATTR_PURE_INSTRUCTIONS*/ | + 0x00000400u /*S_ATTR_SOME_INSTRUCTIONS*/) + : 0u; + } + + /* __stubs synthetic */ + if (x->nimport_funcs) { + x->stubs_size = x->nimport_funcs * x->macho->stub_size; + x->stubs_bytes = (u8*)h->alloc(h, x->stubs_size, 4); + if (!x->stubs_bytes) + compiler_panic(x->c, no_loc(), "link_macho: oom on stubs"); + memset(x->stubs_bytes, 0, x->stubs_size); + MSec* m = &x->secs[x->nsecs++]; + memset(m, 0, sizeof(*m)); + m->synth_data = x->stubs_bytes; + m->synth_size = x->stubs_size; + m->segname = "__TEXT"; + m->sectname = "__stubs"; + m->align = 4u; + m->size = x->stubs_size; + m->segidx = 1; + m->flags = 0x80000000u | 0x00000400u | 0x00000008u /*S_SYMBOL_STUBS*/; + m->reserved1 = 0; /* fill in later: indirect-symtab base */ + m->reserved2 = x->macho->stub_size; + } + x->segs[1].nsects = x->nsecs - first_text_sec; + x->segs[1].first_sec = first_text_sec; + + /* __DATA_CONST: __got synth */ + u32 first_dc = x->nsecs; + if (x->nimports) { + x->got_size = x->nimports * MZ_GOT_SIZE; + x->got_bytes = (u8*)h->alloc(h, x->got_size, 8); + if (!x->got_bytes) compiler_panic(x->c, no_loc(), "link_macho: oom on got"); + memset(x->got_bytes, 0, x->got_size); + MSec* m = &x->secs[x->nsecs++]; + memset(m, 0, sizeof(*m)); + m->synth_data = x->got_bytes; + m->synth_size = x->got_size; + m->segname = "__DATA_CONST"; + m->sectname = "__got"; + m->align = 8u; + m->size = x->got_size; + m->segidx = 2; + m->flags = 0x00000006u /*S_NON_LAZY_SYMBOL_POINTERS*/; + m->reserved1 = 0; /* indirect-symtab base */ + } + for (u32 i = 0; i < img->nsections; ++i) { + LinkSection* ls = &img->sections[i]; + if (!sec_needs_data_const(img, ls)) continue; + MSec* m = &x->secs[x->nsecs++]; + memset(m, 0, sizeof(*m)); + m->link_sec_id = ls->id; + pick_macho_names(ls, x->c, m); + m->segname = "__DATA_CONST"; + m->align = ls->align ? ls->align : 1u; + m->size = ls->size; + m->segidx = 2; + m->flags = 0; + } + x->segs[2].nsects = x->nsecs - first_dc; + x->segs[2].first_sec = first_dc; + + /* __DATA segment: writable sections + zerofill. */ + u32 first_d = x->nsecs; + for (u32 i = 0; i < img->nsections; ++i) { + LinkSection* ls = &img->sections[i]; + if (!ls->size && !sec_is_zerofill(ls)) continue; + if (!sec_is_writable(ls)) continue; + MSec* m = &x->secs[x->nsecs++]; + memset(m, 0, sizeof(*m)); + m->link_sec_id = ls->id; + pick_macho_names(ls, x->c, m); + if (!slice_eq_cstr(slice_from_cstr(m->segname), "__DATA")) + m->segname = "__DATA"; + m->align = ls->align ? ls->align : 1u; + m->size = ls->size; + m->segidx = 3; + m->is_zerofill = sec_is_zerofill(ls) ? 1 : 0; + m->flags = m->is_zerofill ? 0x00000001u /*S_ZEROFILL*/ : 0; + /* dyld dispatches on the section type byte (low 8 bits of flags). + * __mod_init_func / __mod_term_func sections must carry the + * S_MOD_INIT_FUNC_POINTERS / S_MOD_TERM_FUNC_POINTERS type or dyld + * skips them entirely — leaving constructors unrun at startup. */ + if (slice_eq_cstr(slice_from_cstr(m->sectname), "__mod_init_func")) + m->flags = 0x00000009u /*S_MOD_INIT_FUNC_POINTERS*/; + else if (slice_eq_cstr(slice_from_cstr(m->sectname), "__mod_term_func")) + m->flags = 0x0000000au /*S_MOD_TERM_FUNC_POINTERS*/; + else if (ls->flags & SF_TLS) { + /* TLV sections: dyld dispatches by section type, not name. Map + * __thread_vars → S_THREAD_LOCAL_VARIABLES (descriptor records), + * __thread_data → S_THREAD_LOCAL_REGULAR (initial data), + * __thread_bss → S_THREAD_LOCAL_ZEROFILL (zero-init data). Done + * by sectname so per-TU inputs without a Mach-O ext_type still + * get the right section type. */ + if (slice_eq_cstr(slice_from_cstr(m->sectname), "__thread_vars")) { + m->flags = S_THREAD_LOCAL_VARIABLES; + /* Each descriptor is three pointers (24B) whose first word is + * dyld's _tlv_bootstrap thunk pointer. Clang/llvm emit + * __thread_vars with on-disk alignment 1 (relying on layout to + * land it on 8); force 8-alignment here so the descriptor + * pointers fall on 8-byte boundaries — dyld's chained-fixup + * processing assumes that. */ + if (m->align < 8u) m->align = 8u; + } else if (m->is_zerofill) + m->flags = S_THREAD_LOCAL_ZEROFILL; + else + m->flags = S_THREAD_LOCAL_REGULAR; + } + } + /* __thread_ptrs synthetic (TLV pointer slots). Emitted into __DATA + * after the user's TLV input sections so descriptors and their + * pointers share the same segment. Each slot's runtime initial + * value (= TLV descriptor address) is patched during apply_relocs. */ + if (x->ntlv) { + x->tlv_ptrs_size = x->ntlv * MZ_TLVP_SIZE; + x->tlv_ptrs_bytes = (u8*)h->alloc(h, x->tlv_ptrs_size, 8); + if (!x->tlv_ptrs_bytes) + compiler_panic(x->c, no_loc(), "link_macho: oom on tlv_ptrs"); + memset(x->tlv_ptrs_bytes, 0, x->tlv_ptrs_size); + MSec* m = &x->secs[x->nsecs++]; + memset(m, 0, sizeof(*m)); + m->synth_data = x->tlv_ptrs_bytes; + m->synth_size = x->tlv_ptrs_size; + m->segname = "__DATA"; + m->sectname = "__thread_ptrs"; + m->align = 8u; + m->size = x->tlv_ptrs_size; + m->segidx = 3; + m->flags = S_THREAD_LOCAL_VARIABLE_POINTERS; + } + x->segs[3].nsects = x->nsecs - first_d; + x->segs[3].first_sec = first_d; + + /* Group MSecs by (segname, sectname) within each segment so vaddr + * placement keeps same-named runs contiguous. Otherwise Phase B's + * adjacency-based coalescing splits a single Mach-O section into + * multiple OutSecs (e.g. `.text` from an in-memory ObjBuilder and + * `__TEXT,__text` from a Mach-O .o input both map to `__TEXT,__text` + * but arrive in separate link_layout groups, interleaved with other + * sections from each input). Stable insertion sort preserves input + * order within a name, which matters for synth __stubs/__thread_ptrs + * order relative to peers. */ + for (u32 i = 0; i < x->nsegs; ++i) { + MSeg* sg = &x->segs[i]; + if (sg->nsects < 2) continue; + u32 base = sg->first_sec; + u32 n = sg->nsects; + for (u32 a = 1; a < n; ++a) { + MSec key = x->secs[base + a]; + msec_repair_name_ptrs(&key); + u32 j = a; + while (j > 0) { + MSec* prev = &x->secs[base + j - 1]; + /* Ordering compare for stable sort: slices don't order, keep strcmp. */ + int cmp = strcmp(prev->segname, key.segname); /* ordering */ + if (cmp == 0) cmp = strcmp(prev->sectname, key.sectname); /* ordering */ + if (cmp <= 0) break; + x->secs[base + j] = x->secs[base + j - 1]; + msec_repair_name_ptrs(&x->secs[base + j]); + --j; + } + x->secs[base + j] = key; + msec_repair_name_ptrs(&x->secs[base + j]); + } + } + + /* Phase A: count OutSecs per segment (distinct sectnames) so we can + * size the load commands before placing vaddrs. Phase B builds the + * actual OutSec[] after placement, when vaddrs are final. */ + for (u32 i = 0; i < x->nsegs; ++i) { + MSeg* sg = &x->segs[i]; + u32 cnt = 0; + for (u32 a = sg->first_sec; a < sg->first_sec + sg->nsects; ++a) { + int seen = 0; + for (u32 b = sg->first_sec; b < a; ++b) { + if (slice_eq_cstr(slice_from_cstr(x->secs[a].sectname), + x->secs[b].sectname) && + slice_eq_cstr(slice_from_cstr(x->secs[a].segname), + x->secs[b].segname)) { + seen = 1; + break; + } + } + if (!seen) ++cnt; + } + sg->nouts = cnt; + sg->first_out = 0; /* assigned in Phase B */ + } + + /* Compute load-command count + sizeofcmds, then back-fill section + * offsets. Layout pass 2. */ + u32 nseg_real = 0; + for (u32 i = 0; i < x->nsegs; ++i) { + /* Skip __DATA_CONST or __DATA if no sections (edge case). */ + if (i == 0) { + ++nseg_real; + continue; + } /* PAGEZERO */ + if (i == 4) { + ++nseg_real; + continue; + } /* LINKEDIT always */ + if (x->segs[i].nsects > 0) ++nseg_real; + } + /* Each LC_SEGMENT_64 carries 72 + 80*nouts bytes (one section_64 + * record per coalesced (segname,sectname), not per MSec). */ + u32 sizeofcmds = 0; + for (u32 i = 0; i < x->nsegs; ++i) { + if (i == 0 || i == 4) { + sizeofcmds += MACHO_SEGCMD64_SIZE; /* no sections */ + continue; + } + if (x->segs[i].nsects == 0) continue; + sizeofcmds += MACHO_SEGCMD64_SIZE + x->segs[i].nouts * MACHO_SECT64_SIZE; + } + (void)nseg_real; + /* LC_DYLD_CHAINED_FIXUPS / LC_DYLD_EXPORTS_TRIE */ + sizeofcmds += 16u + 16u; + /* LC_SYMTAB / LC_DYSYMTAB */ + sizeofcmds += MACHO_SYMTAB_CMD_SIZE + MACHO_DYSYMTAB_CMD_SIZE; + /* LC_LOAD_DYLINKER */ + { + u32 ld_size = 12u + (u32)(sizeof("/usr/lib/dyld") - 1u) + 1u; + sizeofcmds += (u32)ALIGN_UP((u64)ld_size, 8u); + } + /* LC_UUID + LC_BUILD_VERSION + LC_MAIN */ + sizeofcmds += 24u + 24u + 24u; + /* LC_LOAD_DYLIB per dylib */ + for (u32 i = 0; i < x->ndylibs; ++i) { + size_t nl = pool_slice(x->c->global, x->dylibs[i].install).len; + u32 sz = 24u + (u32)nl + 1u; + sizeofcmds += (u32)ALIGN_UP((u64)sz, 8u); + } + /* LC_FUNCTION_STARTS / LC_DATA_IN_CODE / LC_CODE_SIGNATURE */ + sizeofcmds += 16u + 16u + 16u; + + x->headers_size = MACHO_HDR64_SIZE + sizeofcmds; + + /* Now place sections in __TEXT, __DATA_CONST, __DATA. */ + u64 vaddr = MZ_PAGEZERO + x->headers_size; + u64 fileoff = x->headers_size; + /* Pad __TEXT sections to natural alignment. */ + for (u32 i = 0; i < x->nsegs; ++i) { + if (i == 0 || i == 4) continue; + MSeg* sg = &x->segs[i]; + if (i > 1) { + /* page-align the start of __DATA_CONST and __DATA */ + vaddr = ALIGN_UP(vaddr, MZ_PAGE); + fileoff = ALIGN_UP(fileoff, MZ_PAGE); + } + sg->vmaddr = (i == 1) ? MZ_PAGEZERO : vaddr; + sg->fileoff = (i == 1) ? 0 : fileoff; + /* __TEXT carries the headers_size + sections. */ + u64 seg_start_v = sg->vmaddr; + u64 seg_start_f = sg->fileoff; + /* For __TEXT, sections begin after the header area. */ + u64 cur_v = (i == 1) ? (seg_start_v + x->headers_size) : seg_start_v; + u64 cur_f = (i == 1) ? (seg_start_f + x->headers_size) : seg_start_f; + u64 first_zerofill_v = 0; + int seen_zerofill = 0; + /* Non-zerofill first */ + for (u32 j = 0; j < sg->nsects; ++j) { + MSec* m = &x->secs[sg->first_sec + j]; + if (m->is_zerofill) continue; + cur_v = ALIGN_UP(cur_v, (u64)m->align); + cur_f = ALIGN_UP(cur_f, (u64)m->align); + m->vaddr = cur_v; + m->file_offset = cur_f; + cur_v += m->size; + cur_f += m->size; + } + first_zerofill_v = cur_v; + /* zerofill last (no file bytes) */ + for (u32 j = 0; j < sg->nsects; ++j) { + MSec* m = &x->secs[sg->first_sec + j]; + if (!m->is_zerofill) continue; + cur_v = ALIGN_UP(cur_v, (u64)m->align); + m->vaddr = cur_v; + m->file_offset = 0; + cur_v += m->size; + seen_zerofill = 1; + } + sg->filesize = (i == 1) + ? (cur_f - seg_start_f) + : (first_zerofill_v ? (first_zerofill_v - seg_start_v) + : (cur_v - seg_start_v)); + sg->vmsize = ALIGN_UP(cur_v - seg_start_v, MZ_PAGE); + if (sg->vmsize == 0 && sg->nsects > 0) sg->vmsize = MZ_PAGE; + if (i == 1) { + x->stubs_vaddr = 0; + for (u32 j = 0; j < sg->nsects; ++j) { + MSec* m = &x->secs[sg->first_sec + j]; + if (slice_eq_cstr(slice_from_cstr(m->sectname), "__stubs")) + x->stubs_vaddr = m->vaddr; + } + x->text_filesz = sg->filesize; + } + if (i == 2) { + for (u32 j = 0; j < sg->nsects; ++j) { + MSec* m = &x->secs[sg->first_sec + j]; + if (slice_eq_cstr(slice_from_cstr(m->sectname), "__got")) + x->got_vaddr = m->vaddr; + } + x->data_const_vaddr = sg->vmaddr; + x->data_const_filesz = sg->filesize; + } + if (i == 3) { + for (u32 j = 0; j < sg->nsects; ++j) { + MSec* m = &x->secs[sg->first_sec + j]; + if (slice_eq_cstr(slice_from_cstr(m->sectname), "__thread_ptrs")) + x->tlv_ptrs_vaddr = m->vaddr; + /* TLS storage image base: min vaddr across __thread_data and + * __thread_bss sections. __thread_vars is excluded — it holds + * the descriptors, not the data that maps into the per-thread + * block. */ + if ((slice_eq_cstr(slice_from_cstr(m->sectname), "__thread_data") || + slice_eq_cstr(slice_from_cstr(m->sectname), "__thread_bss")) && + (!x->has_tls_image || m->vaddr < x->tls_image_vaddr)) { + x->tls_image_vaddr = m->vaddr; + x->has_tls_image = 1; + } + } + x->data_vaddr = sg->vmaddr; + x->data_filesz = sg->filesize; + x->data_memsz = sg->vmsize; + } + vaddr = sg->vmaddr + sg->vmsize; + /* Mach-O segments are mapped in page units. If a segment's memory + * image extends past its initialized file bytes (for example + * __DATA,__bss), the following segment's fileoff must not reuse those + * pages or the kernel can map later file contents into the zero-fill + * tail. */ + fileoff = sg->fileoff + ((sg->vmsize > ALIGN_UP(sg->filesize, MZ_PAGE)) + ? sg->vmsize + : sg->filesize); + (void)seen_zerofill; + } + /* LINKEDIT placeholder; size is filled after blob assembly. */ + vaddr = ALIGN_UP(vaddr, MZ_PAGE); + fileoff = ALIGN_UP(fileoff, MZ_PAGE); + x->segs[4].vmaddr = vaddr; + x->segs[4].fileoff = fileoff; + x->linkedit_vaddr = vaddr; + x->linkedit_fileoff = fileoff; + + /* Encode __stubs bytes now that vaddrs are settled. Internal-GOT + * entries have stub_idx=0 (direct CALL26, no stub) and must be + * skipped so the (stub_idx - 1u) arithmetic doesn't wrap. */ + for (u32 i = 0; i < x->nimports; ++i) { + MachImp* mi = &x->imports[i]; + if (!mi->is_func || !mi->stub_idx) continue; + u64 stub_v = x->stubs_vaddr + (mi->stub_idx - 1u) * x->macho->stub_size; + u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE; + x->macho->emit_stub( + x->stubs_bytes + (mi->stub_idx - 1u) * x->macho->stub_size, stub_v, + got_v); + } + + /* Phase B: build OutSec[] now that all MSec vaddrs are final. Walk + * MSecs sorted by (segidx, vaddr) and coalesce adjacent same-name + * runs. Mirrors link_elf.c's OutShdr build at link_elf.c:879. */ + { + u32* order = + (u32*)h->alloc(h, sizeof(u32) * (x->nsecs + 1u), _Alignof(u32)); + if (!order && x->nsecs) + compiler_panic(x->c, no_loc(), "link_macho: oom on outsec sort"); + for (u32 i = 0; i < x->nsecs; ++i) order[i] = i; + /* Insertion sort — section count is small. */ + for (u32 i = 1; i < x->nsecs; ++i) { + u32 cur = order[i]; + MSec* a = &x->secs[cur]; + u32 j = i; + while (j > 0) { + MSec* b = &x->secs[order[j - 1]]; + if ((b->segidx < a->segidx) || + (b->segidx == a->segidx && b->vaddr <= a->vaddr)) + break; + order[j] = order[j - 1]; + --j; + } + order[j] = cur; + } + u32 cap = x->nsecs + 1u; + x->outs = (OutSec*)h->alloc(h, sizeof(OutSec) * cap, _Alignof(OutSec)); + if (!x->outs) compiler_panic(x->c, no_loc(), "link_macho: oom on OutSec"); + memset(x->outs, 0, sizeof(OutSec) * cap); + x->nouts = 0; + for (u32 i = 0; i < x->nsecs; ++i) { + MSec* m = &x->secs[order[i]]; + OutSec* tail = x->nouts ? &x->outs[x->nouts - 1] : NULL; + int merge = tail && tail->segidx == m->segidx && + slice_eq_cstr(slice_from_cstr(tail->sectname), m->sectname) && + slice_eq_cstr(slice_from_cstr(tail->segname), m->segname); + if (merge) { + if (tail->flags != m->flags || tail->is_zerofill != m->is_zerofill) + compiler_panic( + x->c, no_loc(), + "link_macho: coalesce mismatch on %.*s,%.*s (flags/zerofill)", + SLICE_ARG(slice_from_cstr(m->segname)), + SLICE_ARG(slice_from_cstr(m->sectname))); + u64 end = m->vaddr + m->size; + u64 prev_end = tail->vaddr + tail->size; + if (end > prev_end) tail->size = end - tail->vaddr; + if (m->align > tail->align) tail->align = m->align; + } else { + OutSec* o = &x->outs[x->nouts++]; + o->segname = m->segname; + o->sectname = m->sectname; + o->vaddr = m->vaddr; + o->file_offset = m->file_offset; + o->size = m->size; + o->align = m->align; + o->flags = m->flags; + o->reserved1 = m->reserved1; + o->reserved2 = m->reserved2; + o->segidx = m->segidx; + o->is_zerofill = m->is_zerofill; + } + } + h->free(h, order, sizeof(u32) * (x->nsecs + 1u)); + /* Recompute per-segment OutSec span; Phase A's count was for + * sizeofcmds sizing — recompute it here as the source of truth and + * assert agreement. */ + for (u32 i = 0; i < x->nsegs; ++i) { + x->segs[i].first_out = 0; + } + u32 prev_nouts[5]; + for (u32 i = 0; i < x->nsegs; ++i) prev_nouts[i] = x->segs[i].nouts; + for (u32 i = 0; i < x->nsegs; ++i) x->segs[i].nouts = 0; + for (u32 i = 0; i < x->nouts; ++i) { + u8 sx = x->outs[i].segidx; + if (x->segs[sx].nouts == 0) x->segs[sx].first_out = i; + ++x->segs[sx].nouts; + } + for (u32 i = 0; i < x->nsegs; ++i) { + if (prev_nouts[i] != x->segs[i].nouts) + compiler_panic(x->c, no_loc(), + "link_macho: OutSec count drift seg %u (%u vs %u)", + (u32)i, prev_nouts[i], x->segs[i].nouts); + } + } +} + +/* ---- pass: shift LinkImage into final vaddrs/file_offsets ---- + * + * The sections in img->sections are still in their original + * link_layout coordinates. Map each LinkSection -> its MSec and copy + * the final vaddr/file_offset so reloc-apply walks correctly. */ + +static void shift_sections(MCtx* x) { + LinkImage* img = x->img; + /* Build a quick lookup: link_sec_id -> MSec*. */ + for (u32 i = 0; i < x->nsecs; ++i) { + MSec* m = &x->secs[i]; + if (!m->link_sec_id) continue; + /* Walk link_section_id slot. */ + LinkSection* ls = &img->sections[m->link_sec_id - 1u]; + /* shift relocs whose write_vaddr/file_offset live within this + * section's original [old_vaddr, old_vaddr+size). */ + u64 old_v = ls->vaddr; + u64 old_f = ls->file_offset; + u64 new_v = m->vaddr; + u64 new_f = m->file_offset; + if (old_v == new_v && old_f == new_f) continue; + /* Update the LinkSection itself. */ + ls->vaddr = new_v; + ls->file_offset = new_f; + /* Update relocs that target this section. */ + for (u32 ri = 0; ri < LinkRelocs_count(&img->relocs); ++ri) { + LinkRelocApply* r = LinkRelocs_at(&img->relocs, ri); + if (r->link_section_id != ls->id) continue; + r->write_vaddr = new_v + (r->write_vaddr - old_v); + r->write_file_offset = new_f + (r->write_file_offset - old_f); + } + /* Update LinkSyms that belong to this LinkSection. Match by + * section_id rather than vaddr range — multiple input sections + * may share the same pre-shift vaddr (each bucket in + * link_layout starts at offset 0). */ + for (u32 si = 0; si < LinkSyms_count(&img->syms); ++si) { + LinkSymbol* s = LinkSyms_at(&img->syms, si); + if (!s->defined) continue; + if (s->kind == SK_ABS) continue; + if (s->section_id != ls->id) continue; + s->vaddr = new_v + (s->vaddr - old_v); + } + } +} + +/* ---- pass: apply relocations + collect chained-fixup sites ---- + * + * Reloc dispatch: + * target=imported func + CALL26/JUMP26 -> S = stub vaddr + * target=import + GOT_LOAD_PAGE21/PAGEOFF12 -> S = got slot vaddr + * target=import + ABS64 -> write 0; collect bind site + * target=internal + ABS64 -> write target VA; collect rebase site + * everything else -> standard apply + * + * Patch sites for chained fixups are 8-byte slots; for ABS32 we do not + * support fixups (no chained-fixup format for 32-bit pointers in + * standard arm64 — would need DYLD_CHAINED_PTR_32). Internal R_ABS32 + * still works (no slide adjustment is wrong technically, but for + * compile-time-known offsets it suffices). + */ + +typedef struct FixSite { + u8 segidx; /* 2 = __DATA_CONST, 3 = __DATA */ + u8 is_bind; /* 0 = rebase, 1 = bind */ + u8 pad[2]; + u32 import_idx; /* 1-based import index for binds, 0 for rebases */ + u64 vaddr; /* absolute VA of the slot */ + u64 rebase_target; /* unslid target VA; only used for rebases */ +} FixSite; + +typedef struct FixList { + Heap* heap; + FixSite* a; + u32 n; + u32 cap; +} FixList; + +static void fix_init(FixList* fl, Heap* h) { + fl->heap = h; + fl->a = NULL; + fl->n = 0; + fl->cap = 0; +} +static void fix_fini(FixList* fl) { + if (fl->a) fl->heap->free(fl->heap, fl->a, sizeof(*fl->a) * fl->cap); + fl->a = NULL; + fl->n = fl->cap = 0; +} +static void fix_push(FixList* fl, const FixSite* s) { + if (VEC_GROW(fl->heap, fl->a, fl->cap, fl->n + 1u)) return; + fl->a[fl->n++] = *s; +} + +/* find MSec covering an absolute vaddr */ +static MSec* msec_for_vaddr(MCtx* x, u64 v) { + for (u32 i = 0; i < x->nsecs; ++i) { + MSec* m = &x->secs[i]; + if (v >= m->vaddr && v < m->vaddr + m->size) return m; + } + return NULL; +} + +static u8* bytes_for_section(MCtx* x, MSec* m, LinkImage* img) { + if (m->synth_data) { + /* Synthetic — caller reads/writes via x->stubs_bytes / x->got_bytes. */ + if (m->synth_data == x->stubs_bytes) return x->stubs_bytes; + if (m->synth_data == x->got_bytes) return x->got_bytes; + return NULL; + } + /* Backed by a LinkSection: find the LinkSegment buffer that section + * sits in (link_layout.c stored input section bytes there). */ + LinkSection* ls = &img->sections[m->link_sec_id - 1u]; + u32 segid = ls->segment_id; + if (segid == LINK_SEG_NONE) return NULL; + return img->segment_bytes[segid - 1u]; +} + +/* Map the LinkSection that backs a write_vaddr to an MSec, then to the + * underlying byte buffer. */ +static u8* patch_ptr(MCtx* x, LinkImage* img, const LinkRelocApply* r, + MSec** out_msec) { + /* Look up via the LinkSection. After shift_sections the section + * vaddr is the Mach-O vaddr; the corresponding MSec backs it. */ + if (r->link_section_id == LINK_SEC_NONE) return NULL; + LinkSection* ls = &img->sections[r->link_section_id - 1u]; + /* Find the MSec by link_sec_id. */ + MSec* m = NULL; + for (u32 i = 0; i < x->nsecs; ++i) { + if (x->secs[i].link_sec_id == ls->id) { + m = &x->secs[i]; + break; + } + } + if (!m) return NULL; + /* The LinkSegment's bytes are valid (not shifted), but the offset + * within them is the original input_offset. Use input_offset for + * the byte offset, since the LinkSegment buffer wasn't reshuffled. */ + /* link_layout.c set ls->file_offset = seg.file_offset + input_offset + * originally. ls->vaddr similarly. After our shift, they're new. + * The byte offset within the segment buffer is still input_offset. */ + u8* base = bytes_for_section(x, m, img); + if (!base) return NULL; + u32 within_section = (u32)(r->write_vaddr - m->vaddr); + /* The segment buffer's first byte corresponds to ls->input_offset==0 + * for the FIRST section in the segment. But that's a complication. + * For simplicity we recompute the segment-relative byte offset by + * (file_offset - segment.file_offset) where segment.file_offset is + * unchanged. Wait: the original layout produced `ls->file_offset = + * seg.file_offset + input_offset`, and we may have changed + * ls->file_offset. Let's just use input_offset stored on the + * LinkSection. */ + u32 in_off = (u32)(ls->input_offset + within_section); + if (out_msec) *out_msec = m; + return base + in_off; +} + +/* Symbol-relative resolved-address S, accounting for imports. */ +static int sym_S(MCtx* x, LinkImage* img, LinkSymId id, u64* out_S, + int* out_imp_idx) { + *out_S = 0; + *out_imp_idx = 0; + if (id == LINK_SYM_NONE) return 0; + LinkSymbol* s = sym_at(img, id); + if (!s) return 0; + /* Look up the import index — real imports plus internal-GOT entries + * the collect_imports pass materialized for GOT-routed internal refs. */ + u32 idx = 0; + if (id < x->sym_to_imp_size) idx = x->sym_to_imp[id]; + if (!idx && s->name != 0) { + LinkSymId canon = symhash_get(&img->globals, s->name); + if (canon != LINK_SYM_NONE && canon < x->sym_to_imp_size) + idx = x->sym_to_imp[canon]; + } + if (s->imported) { + *out_imp_idx = (int)idx; + return 1; + } + /* Internal symbol that has a GOT slot — surface the import index so + * the GOT_LOAD reloc paths in apply_relocs find it, but also expose + * S=vaddr so non-GOT relocs (CALL26 etc.) still apply directly. */ + *out_imp_idx = (int)idx; + *out_S = s->vaddr; + return 0; +} + +static void apply_relocs(MCtx* x, FixList* fl) { + LinkImage* img = x->img; + for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) { + LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); + if (r->target == LINK_SYM_NONE) continue; + MSec* msec = NULL; + u8* P_bytes = patch_ptr(x, img, r, &msec); + if (!P_bytes) continue; + u64 P = r->write_vaddr; + + u64 S; + int imp_idx; + int is_imp = sym_S(x, img, r->target, &S, &imp_idx); + + /* TLVP relocs route through a __thread_ptrs slot regardless of + * whether the descriptor target is in-image or imported. Resolved + * before the import / internal split because an imported TLV + * descriptor doesn't use the __got slot (its address lives in + * __thread_ptrs with its own chained bind). */ + if (x->link_arch->is_tlvp_reloc && x->link_arch->is_tlvp_reloc(r->kind)) { + u32 tlv_idx = + (r->target < x->sym_to_tlv_size) ? x->sym_to_tlv[r->target] : 0u; + if (!tlv_idx) + compiler_panic(x->c, no_loc(), + "link_macho: TLVP reloc has no __thread_ptrs slot"); + u64 slot_v = x->tlv_ptrs_vaddr + (tlv_idx - 1u) * MZ_TLVP_SIZE; + link_reloc_apply(x->c, r->kind, P_bytes, slot_v, r->addend, P); + continue; + } + + if (is_imp) { + MachImp* mi = (imp_idx > 0) ? &x->imports[imp_idx - 1] : NULL; + if (x->link_arch->is_branch_reloc && + x->link_arch->is_branch_reloc(r->kind)) { + if (!mi || !mi->stub_idx) + compiler_panic(x->c, no_loc(), + "link_macho: import has no stub for branch"); + u64 stub_v = x->stubs_vaddr + (mi->stub_idx - 1u) * x->macho->stub_size; + link_reloc_apply(x->c, r->kind, P_bytes, stub_v, r->addend, P); + continue; + } + if (x->link_arch->is_got_load_reloc && + x->link_arch->is_got_load_reloc(r->kind)) { + if (!mi) + compiler_panic(x->c, no_loc(), + "link_macho: GOT reloc for unknown import"); + u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE; + link_reloc_apply(x->c, r->kind, P_bytes, got_v, r->addend, P); + continue; + } + if (x->link_arch->is_direct_page_reloc && + x->link_arch->is_direct_page_reloc(r->kind)) { + /* Direct page/lo12 against an import: route through __got. */ + if (!mi) + compiler_panic(x->c, no_loc(), + "link_macho: PAGE/LO12 against unknown import"); + u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE; + link_reloc_apply(x->c, r->kind, P_bytes, got_v, r->addend, P); + continue; + } + if (r->kind == R_ABS64) { + /* Direct 8-byte absolute against an import: bind the slot. */ + wr_u64_le(P_bytes, 0); + FixSite fs = {(u8)msec->segidx, 1, {0}, (u32)imp_idx, P, 0}; + fix_push(fl, &fs); + continue; + } + compiler_panic(x->c, no_loc(), + "link_macho: unhandled reloc kind %u against imported " + "symbol", + (u32)r->kind); + } + + /* Internal relocs. */ + if (r->kind == R_ABS64) { + /* Special case: ABS64 reloc inside a TLV descriptor record + * (__thread_vars section) targeting in-image TLS storage. This + * is the descriptor's word-2 "offset" field — dyld interprets it + * as the per-thread offset of the storage within the TLS image, + * NOT as an absolute address. Apple's ld writes the literal + * offset and emits no chained-fixup entry; replicate that so the + * chain skips over this slot (chained_fixups already does the + * right thing: no fixsite -> no chain link). */ + if (msec && (msec->flags & SECTION_TYPE) == S_THREAD_LOCAL_VARIABLES && + x->has_tls_image) { + u64 offset = (S + (u64)r->addend) - x->tls_image_vaddr; + wr_u64_le(P_bytes, offset); + continue; + } + /* Rebase site. */ + wr_u64_le(P_bytes, S + (u64)r->addend); + FixSite fs = {(u8)msec->segidx, 0, {0}, 0, P, S + (u64)r->addend}; + fix_push(fl, &fs); + continue; + } + /* Internal symbol routed through __got (clang emits GOT_LOAD_PAGE21 + * for any extern global, even if the def is in-image). imp_idx + * was populated by collect_imports' internal-GOT pass; redirect + * the page/lo12 reloc to the GOT slot's vaddr. */ + if (imp_idx > 0 && x->link_arch->is_got_load_reloc && + x->link_arch->is_got_load_reloc(r->kind)) { + MachImp* mi = &x->imports[imp_idx - 1]; + u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE; + link_reloc_apply(x->c, r->kind, P_bytes, got_v, r->addend, P); + continue; + } + /* Generic apply. */ + link_reloc_apply(x->c, r->kind, P_bytes, S, r->addend, P); + } + + /* Per-slot chained fixup. Real imports → bind (dyld resolves at + * load). Internal GOT entries → rebase pointing at the symbol's + * image-relative vaddr; a target vaddr of 0 (weak undef → NULL) gets + * no fixup, just a literal zero slot — chained fixups treat 0 as a + * gap and won't disturb it. */ + for (u32 i = 0; i < x->nimports; ++i) { + MachImp* mi = &x->imports[i]; + u64 slot_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE; + if (mi->internal) { + /* Re-read the symbol's final vaddr now that shift_sections has + * rebased every defined symbol into the Mach-O image layout + * (collect_imports snapshotted too early). */ + LinkSymbol* s = sym_at(img, mi->sym); + u64 tgt_v = s ? s->vaddr : 0; + u8* slot = x->got_bytes + (mi->got_idx - 1u) * MZ_GOT_SIZE; + wr_u64_le(slot, tgt_v); + if (tgt_v == 0) continue; /* weak-undef → NULL */ + FixSite fs = {2u, 0, {0}, 0, slot_v, tgt_v}; + fix_push(fl, &fs); + } else { + /* clear slot bytes (already zero) — dyld writes via chain */ + FixSite fs = {2u, 1, {0}, i + 1u, slot_v, 0}; + fix_push(fl, &fs); + } + } + + /* Per-slot TLV pointer fixups. Mirror of the __got loop above: each + * __thread_ptrs slot points at the descriptor record. When the + * descriptor is in-image (internal) we REBASE to its final vaddr; when + * it lives in a dylib we BIND through the descriptor's MachImp. The + * slot itself lives in __DATA (segidx=3), distinct from __got's + * __DATA_CONST (segidx=2). */ + for (u32 i = 0; i < x->ntlv; ++i) { + MachTlv* ts = &x->tlv_slots[i]; + u64 slot_v = x->tlv_ptrs_vaddr + (ts->tlv_idx - 1u) * MZ_TLVP_SIZE; + u8* slot = x->tlv_ptrs_bytes + (ts->tlv_idx - 1u) * MZ_TLVP_SIZE; + if (ts->imported) { + if (!ts->import_idx) + compiler_panic(x->c, no_loc(), + "link_macho: imported TLV without matching import slot"); + wr_u64_le(slot, 0); + FixSite fs = {3u, 1, {0}, ts->import_idx, slot_v, 0}; + fix_push(fl, &fs); + } else { + LinkSymbol* s = sym_at(img, ts->sym); + u64 tgt_v = s ? s->vaddr : 0; + wr_u64_le(slot, tgt_v); + if (tgt_v == 0) continue; /* weak-undef descriptor → NULL */ + FixSite fs = {3u, 0, {0}, 0, slot_v, tgt_v}; + fix_push(fl, &fs); + } + } +} + +/* ---- chained fixups blob assembler ---- + * + * For each segment that has fixups, build a dyld_chained_starts_in_segment + * with one chain per page (MZ_PAGE). Within a page, sort sites by + * offset, encode each as DYLD_CHAINED_PTR_64, and link via the `next` + * field (4-byte units, 0 = end of chain). + */ + +typedef struct PageChain { + u32 first_offset_in_page; /* relative to page start */ + u32 nsites; + u32 first_site_idx; /* into a per-segment site array */ +} PageChain; + +static int site_cmp_by_vaddr(const void* a, const void* b) { + const FixSite* x = a; + const FixSite* y = b; + if (x->vaddr < y->vaddr) return -1; + if (x->vaddr > y->vaddr) return 1; + return 0; +} + +/* tiny insertion sort to avoid pulling qsort */ +static void sort_sites(FixSite* a, u32 n) { + for (u32 i = 1; i < n; ++i) { + FixSite tmp = a[i]; + u32 j = i; + while (j > 0 && site_cmp_by_vaddr(&a[j - 1], &tmp) > 0) { + a[j] = a[j - 1]; + --j; + } + a[j] = tmp; + } +} + +static void emit_pointer(u8* slot, int is_bind, u32 ord_or_target_lo, + u32 high_or_target_hi, u32 next4) { + /* DYLD_CHAINED_PTR_64: + * bind : ordinal:24, addend:8, reserved:19, next:12, bind:1=1 + * rebase: target:36 (vmaddr), high8:8, reserved:7, next:12, bind:1=0 + */ + u64 v = 0; + if (is_bind) { + u64 ordinal = (u64)ord_or_target_lo & 0xffffffull; /* 24 bits */ + u64 addend = 0; + u64 next = (u64)next4 & 0xfffull; + v = ordinal | (addend << 24) | (0ull /* reserved */ << 32) | (next << 51) | + ((u64)1 << 63); + } else { + /* rebase: target is full vmaddr; we get hi:lo split. */ + u64 target = ((u64)high_or_target_hi << 32) | (u64)ord_or_target_lo; + target &= ((u64)1 << 36) - 1u; /* 36 bits */ + u64 high8 = 0; + u64 next = (u64)next4 & 0xfffull; + v = target | (high8 << 36) | (0ull /* reserved */ << 44) | (next << 51) | + ((u64)0 << 63); + } + wr_u64_le(slot, v); +} + +static void build_chained_fixups(MCtx* x, FixList* fl) { + Heap* h = x->h; + MByte* out = &x->chained_fixups; + mbuf_init(out, h); + + /* Header (32 B): + * uint32 fixups_version (=0) + * uint32 starts_offset + * uint32 imports_offset + * uint32 symbols_offset + * uint32 imports_count + * uint32 imports_format (=1) + * uint32 symbols_format (=0) + */ + u32 hdr_pos = mbuf_u32(out, 0); /* fixups_version */ + (void)hdr_pos; + u32 starts_offset_pos = mbuf_u32(out, 0); + u32 imports_offset_pos = mbuf_u32(out, 0); + u32 symbols_offset_pos = mbuf_u32(out, 0); + mbuf_u32(out, x->nimports_real); + mbuf_u32(out, DYLD_CHAINED_IMPORT); + mbuf_u32(out, 0); /* symbols uncompressed */ + /* dyld expects 8-byte alignment of the starts table. */ + mbuf_align(out, 4); + + /* dyld_chained_starts_in_image: + * uint32 seg_count + * uint32 seg_info_offset[seg_count] + * + * seg_count must equal mach-O segment count (5). + * seg_info_offset[i] = 0 means no fixups in that segment. + */ + u32 starts_off = out->len; + wr_u32_le(out->data + starts_offset_pos, starts_off); + mbuf_u32(out, x->nsegs); + /* Reserve seg_info_offset[]. */ + u32 seg_info_offsets_pos = out->len; + for (u32 i = 0; i < x->nsegs; ++i) mbuf_u32(out, 0); + + /* Sort fixsites by vaddr globally. */ + sort_sites(fl->a, fl->n); + + /* Per segment, emit dyld_chained_starts_in_segment when fixups present. */ + for (u32 si = 0; si < x->nsegs; ++si) { + /* count sites in this segment */ + u32 first = (u32)-1, count = 0; + for (u32 k = 0; k < fl->n; ++k) { + if (fl->a[k].segidx == si) { + if (first == (u32)-1) first = k; + ++count; + } + } + if (!count) continue; + /* Page-align this struct to 4. */ + mbuf_align(out, 4); + u32 sis_off = out->len; + /* Patch seg_info_offset[si] to (sis_off - starts_off). */ + wr_u32_le(out->data + seg_info_offsets_pos + si * 4u, sis_off - starts_off); + + /* Compute page count for this segment. */ + u64 seg_va = x->segs[si].vmaddr; + u64 seg_size = x->segs[si].vmsize ? x->segs[si].vmsize : MZ_PAGE; + u32 page_count = (u32)((seg_size + MZ_PAGE - 1u) / MZ_PAGE); + + /* dyld_chained_starts_in_segment: + * uint32 size + * uint16 page_size + * uint16 pointer_format + * uint64 segment_offset (offset of segment's first byte from + * mach_header) + * uint32 max_valid_pointer (0 for 64-bit) + * uint16 page_count + * uint16 page_start[page_count] (0xFFFF = no fixups in page) + */ + u32 sis_size_pos = mbuf_u32(out, 0); /* fill below */ + mbuf_u16(out, (u16)MZ_PAGE); + mbuf_u16(out, (u16)DYLD_CHAINED_PTR_64); + mbuf_u64(out, (u64)x->segs[si].fileoff); /* segment file offset */ + mbuf_u32(out, 0); + mbuf_u16(out, (u16)page_count); + u32 page_starts_pos = out->len; + for (u32 p = 0; p < page_count; ++p) mbuf_u16(out, 0xFFFFu); + /* size includes the page_start array */ + u32 sis_size = out->len - sis_size_pos + 4u; + /* Hmm, the `size` field is the size of *this* struct. We measure + * from sis_off through end of page_starts. */ + sis_size = out->len - sis_off; + wr_u32_le(out->data + sis_size_pos, sis_size); + + /* Now: walk sites in this segment, group by page, write + * page_start[i] = offset_in_page of first site, and chain via + * next-field in the actual segment's bytes. */ + /* Sites are sorted globally; collect contiguous run for this seg. */ + u32 cur = first; + while (cur < first + count) { + u32 page_idx = (u32)((fl->a[cur].vaddr - seg_va) / MZ_PAGE); + u32 offset_in_page = (u32)((fl->a[cur].vaddr - seg_va) % MZ_PAGE); + wr_u16_le(out->data + page_starts_pos + page_idx * 2u, + (u16)offset_in_page); + /* Walk this page's chain. */ + u32 next_in_page = cur; + while (next_in_page + 1 < first + count) { + u64 nv = fl->a[next_in_page + 1].vaddr; + if (nv >= seg_va + (u64)(page_idx + 1) * MZ_PAGE) break; + ++next_in_page; + } + /* Encode chain pointers. */ + for (u32 k = cur; k <= next_in_page; ++k) { + FixSite* s = &fl->a[k]; + u32 next4 = 0; + if (k < next_in_page) { + u64 dist = fl->a[k + 1].vaddr - s->vaddr; + next4 = (u32)(dist / 4u); + } + /* Find segment bytes. Synthetic pointer sections have private + * buffers; file-backed sections can live in any segment, including + * pointer-bearing read-only constants in __TEXT. */ + u8* slot = NULL; + if (s->segidx == 2 && x->got_bytes && s->vaddr >= x->got_vaddr && + s->vaddr < x->got_vaddr + x->got_size) { + /* __DATA_CONST: __got slot. */ + slot = x->got_bytes + (s->vaddr - x->got_vaddr); + } else if (x->tlv_ptrs_bytes && s->vaddr >= x->tlv_ptrs_vaddr && + s->vaddr < x->tlv_ptrs_vaddr + x->tlv_ptrs_size) { + slot = x->tlv_ptrs_bytes + (s->vaddr - x->tlv_ptrs_vaddr); + } else { + MSec* m = msec_for_vaddr(x, s->vaddr); + if (m && m->link_sec_id) { + u8* base = bytes_for_section(x, m, x->img); + if (base) { + LinkSection* ls = &x->img->sections[m->link_sec_id - 1u]; + u32 in_off = (u32)(ls->input_offset + (s->vaddr - m->vaddr)); + slot = base + in_off; + } + } + } + if (!slot) + compiler_panic(x->c, no_loc(), + "link_macho: chained-fixup slot for vaddr 0x%llx not " + "in any segment buffer", + (unsigned long long)s->vaddr); + if (s->is_bind) { + /* ordinal is import index (1-based) - 1; chained-import format + * uses 0-based. */ + if (s->import_idx == 0 || s->import_idx > x->nimports_real) { + compiler_panic( + x->c, no_loc(), + "link_macho: chained bind for vaddr 0x%llx uses import index " + "%u outside real import table size %u", + (unsigned long long)s->vaddr, (unsigned)s->import_idx, + (unsigned)x->nimports_real); + } + u32 ord = s->import_idx - 1u; + emit_pointer(slot, 1, ord, 0, next4); + } else { + /* rebase target = unslid vmaddr */ + u32 lo = (u32)(s->rebase_target & 0xffffffffu); + u32 hi = (u32)(s->rebase_target >> 32); + emit_pointer(slot, 0, lo, hi, next4); + } + } + cur = next_in_page + 1u; + } + } + + /* Imports table: one dyld_chained_import (4B) per real import. + * Layout: lib_ordinal:8, weak:1, name_offset:23. Internal-GOT + * entries are not bound by dyld so they're omitted here. */ + mbuf_align(out, 4); + u32 imports_off = out->len; + wr_u32_le(out->data + imports_offset_pos, imports_off); + /* We need to first build the symbol pool to know name offsets. */ + u32 symbols_off = imports_off + x->nimports_real * 4u; + /* Reserve imports area. */ + for (u32 i = 0; i < x->nimports_real; ++i) mbuf_u32(out, 0); + /* Emit symbols (each NUL-terminated). Set name_offset on each import. */ + wr_u32_le(out->data + symbols_offset_pos, out->len); + /* Leading NUL for offset 0. */ + mbuf_u8(out, 0); + for (u32 i = 0; i < x->nimports_real; ++i) { + MachImp* mi = &x->imports[i]; + Slice nm_s = pool_slice(x->c->global, mi->name); + const char* nm = nm_s.s; + size_t nl = nm_s.len; + if (!nm || !nl || mi->dylib_ord == 0 || mi->dylib_ord > x->ndylibs) { + compiler_panic(x->c, no_loc(), + "link_macho: invalid chained import %u " + "(name=%u dylib_ord=%u ndylibs=%u)", + (unsigned)i, (unsigned)mi->name, (unsigned)mi->dylib_ord, + (unsigned)x->ndylibs); + } + u32 off = out->len - symbols_off; + mbuf_str(out, nm, (u32)nl); + /* Patch the import slot. */ + u32 packed = ((u32)mi->dylib_ord & 0xffu) | + ((u32)(mi->weak ? 1u : 0u) << 8) | ((off & 0x7fffffu) << 9); + wr_u32_le(out->data + imports_off + i * 4u, packed); + } + (void)symbols_off; +} + +/* ---- exports trie ---- * + * + * Minimal trie: one node carrying a single export "_main" with the + * entry symbol's VA-relative offset. This is enough for dyld; binaries + * with a real exports trie include more data but we don't need it. */ + +static void uleb128(MByte* out, u64 v) { + do { + u8 byte = v & 0x7fu; + v >>= 7; + if (v) byte |= 0x80u; + mbuf_u8(out, byte); + } while (v); +} + +static void build_exports_trie(MCtx* x) { + /* Format: + * node = (terminal_size: uleb128) (export_data)? (children_count: u8) + * (children: [(label NUL) (offset uleb128)]*) + * + * We emit a trie with a single leaf at "_main" with offset + * entry_offset (from __TEXT base). + * + * Easiest: single root node with children_count=1, child label = "_main", + * child offset points to a leaf node. + */ + MByte* out = &x->exports_trie; + mbuf_init(out, x->h); + + LinkImage* img = x->img; + LinkSymbol* esym = sym_at(img, img->entry_sym); + if (!esym || !esym->defined) { + /* No entry — emit a single empty terminal trie. */ + mbuf_u8(out, 0); /* terminal_size 0 */ + mbuf_u8(out, 0); /* children 0 */ + return; + } + Slice nm_s = pool_slice(x->c->global, esym->name); + const char* nm = nm_s.s; + size_t nl = nm_s.len; + if (!nm || nl == 0) { + mbuf_u8(out, 0); + mbuf_u8(out, 0); + return; + } + /* leaf node: terminal_size = sizeof(uleb(flags)+uleb(offset)) + * flags = 0 (regular export); offset = vaddr - __TEXT.vmaddr */ + u64 entry_off = esym->vaddr - x->text_vaddr; + + /* Compute leaf-node bytes length: uleb(flags=0) + uleb(offset). */ + u32 flags = 0; + u32 leaf_payload_len; + { + /* count uleb bytes for flags=0 -> 1 byte */ + u32 a = 1; + /* count uleb bytes for entry_off */ + u32 b = 0; + u64 v = entry_off; + do { + ++b; + v >>= 7; + } while (v); + leaf_payload_len = a + b; + } + /* Layout: root node first, then leaf. The root node's child entry + * carries the absolute offset of the leaf within the trie. */ + + /* root: terminal_size=0, children_count=1, "_main"\0, child_offset= + * (leaf-position uleb). */ + /* We'll back-patch child_offset after we know the leaf position. */ + mbuf_u8(out, 0); /* root terminal size */ + mbuf_u8(out, 1); /* children_count */ + mbuf_str(out, nm, (u32)nl); + /* child offset: 5 bytes max for uleb128(u32). Reserve and patch. */ + u32 child_off_pos = out->len; + /* Reserve 5 bytes. */ + for (u32 i = 0; i < 5; ++i) mbuf_u8(out, 0); + /* leaf node */ + u32 leaf_pos = out->len; + /* terminal_size byte then payload */ + mbuf_u8(out, (u8)leaf_payload_len); + uleb128(out, flags); + uleb128(out, entry_off); + mbuf_u8(out, 0); /* children_count */ + + /* Patch child_offset uleb. */ + u32 v = leaf_pos; + for (u32 i = 0; i < 5; ++i) { + u8 b = (u8)(v & 0x7fu); + v >>= 7; + if (v) b |= 0x80u; + out->data[child_off_pos + i] = b; + if (!v && i < 4) { + /* Remaining bytes need to be 0x00 — but we already wrote zeros; + * we need a continuation-zero so the consumer sees 5 bytes. Set + * top bit on lower bytes to indicate continuation, last byte = 0. */ + /* Actually: ULEB needs proper termination. Force final byte to + * 0 with no continuation by setting bit-7=0 on the last + * non-zero byte and also forcing remaining bytes to be 0x80 + * extension or trim. Simpler: set last byte explicitly. */ + out->data[child_off_pos + i] = (u8)(out->data[child_off_pos + i] & 0x7fu); + for (u32 j = i + 1; j < 5; ++j) out->data[child_off_pos + j] = 0x80; + out->data[child_off_pos + 4] = 0x00; + break; + } + } + /* Pad trie to 8 bytes. */ + mbuf_align(out, 8); +} + +/* ---- symtab + strtab + indirect symtab ---- */ + +typedef struct NlistRec { + u32 strx; + u8 type; + u8 sect; /* 1-based section index (Mach-O) */ + u16 desc; + u64 value; +} NlistRec; + +static void build_symtab(MCtx* x) { + Heap* h = x->h; + LinkImage* img = x->img; + mbuf_init(&x->symtab, h); + mbuf_init(&x->strtab, h); + mbuf_init(&x->indirect, h); + + /* strtab leading NUL */ + mbuf_u8(&x->strtab, 0); + + /* Approach: + * - Add one local nlist per defined LinkSymbol (locals + non-imported + * externs) — but to keep things simple we only emit external defined + * syms (mainly _main), plus all imports as N_UNDF|N_EXT. + * + * Mach-O dyld requires the symtab order: locals first, ext-defs next, + * undef last (matched by LC_DYSYMTAB ranges). + */ + + /* Pass A: defined externals. */ + u32 n_local = 0; + u32 n_extdef = 0; + u32 n_undef = 0; + + /* For now we emit only externals + imports. No locals. */ + /* extdef pass */ + for (u32 i = 0; i < LinkSyms_count(&img->syms); ++i) { + LinkSymbol* s = LinkSyms_at(&img->syms, i); + if (!s->defined) continue; + if (s->bind != SB_GLOBAL && s->bind != SB_WEAK) continue; + if (s->name == 0) continue; + if (s->kind == SK_ABS) continue; /* skip abs externs */ + /* Locate which OutSec contains this vaddr to figure out n_sect. + * n_sect is the 1-based index into the flat section_64 table the + * file actually contains (post-coalesce), matching what we emit + * in emit_load_command_segment. */ + u8 n_sect = 0; + for (u32 k = 0; k < x->nouts; ++k) { + OutSec* o = &x->outs[k]; + if (s->vaddr >= o->vaddr && s->vaddr < o->vaddr + o->size) { + n_sect = (u8)(k + 1u); + break; + } + if (s->vaddr == o->vaddr + o->size) { + n_sect = (u8)(k + 1u); + break; + } + } + Slice nm_s = pool_slice(x->c->global, s->name); + const char* nm = nm_s.s; + size_t nl = nm_s.len; + u32 strx = x->strtab.len; + if (nm && nl) mbuf_str(&x->strtab, nm, (u32)nl); + + u8 t[16]; + u8 nt = N_SECT | N_EXT; + if (s->bind == SB_WEAK) { + /* N_WEAK_DEF in n_desc (not a flag in n_type) */ + } + wr_u32_le(t + 0, strx); + t[4] = nt; + t[5] = n_sect; + wr_u16_le(t + 6, s->bind == SB_WEAK ? N_WEAK_DEF : 0); + wr_u64_le(t + 8, s->vaddr); + mbuf_append(&x->symtab, t, 16); + ++n_extdef; + } + + /* undef imports — real imports only. Internal-GOT entries don't get + * N_UNDF nlist records since they're defined in the image. */ + u32 imp_first_symtab_idx = n_extdef; + for (u32 i = 0; i < x->nimports_real; ++i) { + MachImp* mi = &x->imports[i]; + Slice nm_s = pool_slice(x->c->global, mi->name); + const char* nm = nm_s.s; + size_t nl = nm_s.len; + u32 strx = x->strtab.len; + if (nm && nl) mbuf_str(&x->strtab, nm, (u32)nl); + + u8 t[16]; + wr_u32_le(t + 0, strx); + t[4] = N_UNDF | N_EXT; + t[5] = 0; + /* n_desc carries dylib ordinal in high byte (REFERENCED_DYNAMICALLY etc.) + */ + u16 desc = (u16)(((u16)mi->dylib_ord & 0xff) << 8); + if (mi->weak) desc |= N_WEAK_REF; + wr_u16_le(t + 6, desc); + wr_u64_le(t + 8, 0); + mbuf_append(&x->symtab, t, 16); + ++n_undef; + } + + /* indirect symtab: one entry per __stubs slot, then one per __got + * slot. Internal-GOT slots use INDIRECT_SYMBOL_LOCAL (0x80000000) + * since they have no nlist entry. */ + u32 indirect_start = 0; + /* Patch reserved1 of each synth OutSec. __stubs and __got are each + * singleton OutSecs (synth sections never coalesce with user input), + * so a sectname match identifies them unambiguously. */ + for (u32 i = 0; i < x->nouts; ++i) { + OutSec* o = &x->outs[i]; + if (slice_eq_cstr(slice_from_cstr(o->sectname), "__stubs") && o->size) { + o->reserved1 = indirect_start; + for (u32 k = 0; k < x->nimports; ++k) { + MachImp* mi = &x->imports[k]; + if (!mi->stub_idx) continue; + u32 sym_idx = imp_first_symtab_idx + k; + mbuf_u32(&x->indirect, sym_idx); + ++indirect_start; + } + } + } + for (u32 i = 0; i < x->nouts; ++i) { + OutSec* o = &x->outs[i]; + if (slice_eq_cstr(slice_from_cstr(o->sectname), "__got") && o->size) { + o->reserved1 = indirect_start; + for (u32 k = 0; k < x->nimports; ++k) { + MachImp* mi = &x->imports[k]; + u32 sym_idx = mi->internal ? 0x80000000u /* INDIRECT_SYMBOL_LOCAL */ + : (imp_first_symtab_idx + k); + mbuf_u32(&x->indirect, sym_idx); + ++indirect_start; + } + } + } + + x->nsyms = n_local + n_extdef + n_undef; + (void)n_local; + (void)imp_first_symtab_idx; +} + +/* ---- LINKEDIT layout assembly ---- + * + * Place blobs in the order Apple prefers: + * chained_fixups, exports_trie, fn_starts, data_in_code, + * symtab, indirect, strtab, codesig + */ + +static void layout_linkedit(MCtx* x) { + /* fn_starts and data_in_code are both empty. */ + mbuf_init(&x->fn_starts, x->h); + mbuf_init(&x->data_in_code, x->h); + mbuf_init(&x->codesig, x->h); + + u64 cur = x->linkedit_fileoff; + /* chained fixups */ + cur = ALIGN_UP(cur, 8u); + x->chained_fixups_off = (u32)cur; + cur += x->chained_fixups.len; + /* exports trie */ + cur = ALIGN_UP(cur, 8u); + x->exports_trie_off = (u32)cur; + cur += x->exports_trie.len; + /* function starts (empty placeholder, but allocate one byte) */ + cur = ALIGN_UP(cur, 8u); + x->fn_starts_off = (u32)cur; + /* data in code */ + cur = ALIGN_UP(cur, 8u); + x->data_in_code_off = (u32)cur; + /* symtab */ + cur = ALIGN_UP(cur, 8u); + x->symtab_off = (u32)cur; + cur += x->symtab.len; + /* indirect symtab */ + cur = ALIGN_UP(cur, 4u); + x->indirect_off = (u32)cur; + cur += x->indirect.len; + /* strtab */ + cur = ALIGN_UP(cur, 8u); + x->strtab_off = (u32)cur; + cur += x->strtab.len; + /* code signature: end-aligned to 16 */ + cur = ALIGN_UP(cur, 16u); + x->codesig_off = (u32)cur; + + /* Linkedit segment file_size includes everything up to (but not yet + * including) codesig. Codesig is computed below. */ + u64 le_size = cur - x->linkedit_fileoff; + /* Set linkedit segment size; will be increased after codesig. */ + x->segs[4].filesize = le_size; + x->segs[4].vmsize = ALIGN_UP(le_size, MZ_PAGE); + if (!x->segs[4].vmsize) x->segs[4].vmsize = MZ_PAGE; +} + +/* ---- ad-hoc code signature (CodeDirectory + SuperBlob) ---- + * + * Produces a minimal embedded SuperBlob with a single CodeDirectory. + * The CD is sha256-hashed over CS_PAGE_SIZE_LOG2 = 4096-byte pages of + * the file (excluding the codesig itself). The kernel verifies the + * CD's hash chain on exec. + * + * Output format (in big-endian for SuperBlob/CodeDirectory headers): + * [SuperBlob] + * u32 magic (0xfade0cc0) + * u32 length + * u32 count (=1) + * [Slot] + * u32 type (=0 CSSLOT_CODEDIRECTORY) + * u32 offset (=20) -- relative to start of SuperBlob + * [CodeDirectory] + * u32 magic (0xfade0c02) + * u32 length (bytes including all hashes) + * u32 version (>=0x20400 for execSeg fields) + * u32 flags (=0 ad-hoc — actually flags must include 0x2 + * (kSecCodeSignatureAdhoc)) u32 hashOffset (offset of first slot hash) u32 + * identOffset (offset of identifier string) u32 nSpecialSlots (=0) u32 + * nCodeSlots u32 codeLimit (file bytes covered) u8 hashSize (=32) u8 + * hashType (=2 sha256) u8 platform (=0) u8 pageSize (=12 for 4096) u32 + * spare2 (=0) u32 scatterOffset (=0) u32 teamOffset (=0) u32 spare3 (=0) + * u64 codeLimit64 (=0) + * u64 execSegBase (=__TEXT.fileoff) + * u64 execSegLimit (=__TEXT.filesize) + * u64 execSegFlags (=1 main binary) + * [identifier bytes "a.out\0"] + * [codeslot hashes nCodeSlots * 32 B] + * + * Hashes computed AFTER everything else is final — including the codesig + * blob's own offset in the file (the hash range stops just before + * codeLimit). */ + +static void wr_u64_be(u8* p, u64 v) { + for (u32 i = 0; i < 8; ++i) p[7 - i] = (u8)(v >> (i * 8)); +} + +/* Build the codesig blob with placeholder hashes; size is precise so + * file layout is final after this. */ +static void build_codesig_skeleton(MCtx* x, u32 code_limit, const char* ident) { + u32 code_page = 1u << CS_PAGE_SIZE_LOG2; /* 4096 */ + u32 nslots = (code_limit + code_page - 1u) / code_page; + + /* CodeDirectory size: + * header 88 bytes through execSegFlags + * identifier (ident_len + 1) + * hashes (nslots * 32) + */ + u32 ident_len = (u32)slice_from_cstr(ident).len + 1u; + u32 cd_hdr = 88u; + u32 cd_size = cd_hdr + ident_len + nslots * CS_SHA256_LEN; + /* SuperBlob: 12 hdr + 8 slot + cd. */ + u32 sb_size = 12u + 8u + cd_size; + + MByte* out = &x->codesig; + mbuf_init(out, x->h); + mbuf_reserve(out, sb_size); + memset(out->data, 0, sb_size); + out->len = sb_size; + + u8* sb = out->data; + /* SuperBlob header */ + wr_u32_be(sb + 0, CS_MAGIC_EMBEDDED_SIGNATURE); + wr_u32_be(sb + 4, sb_size); + wr_u32_be(sb + 8, 1); /* count */ + /* slot 0: type=CSSLOT_CODEDIRECTORY, offset=20 */ + wr_u32_be(sb + 12, CSSLOT_CODEDIRECTORY); + wr_u32_be(sb + 16, 20u); + + /* CodeDirectory */ + u8* cd = sb + 20; + wr_u32_be(cd + 0, CS_MAGIC_CODEDIRECTORY); + wr_u32_be(cd + 4, cd_size); + wr_u32_be(cd + 8, 0x20400u); /* version with execSeg */ + wr_u32_be(cd + 12, 0x2u); /* flags = adhoc */ + wr_u32_be(cd + 16, cd_hdr + ident_len); /* hashOffset */ + wr_u32_be(cd + 20, cd_hdr); /* identOffset */ + wr_u32_be(cd + 24, 0); /* nSpecialSlots */ + wr_u32_be(cd + 28, nslots); + wr_u32_be(cd + 32, code_limit); + cd[36] = (u8)CS_SHA256_LEN; + cd[37] = (u8)CS_HASHTYPE_SHA256; + cd[38] = 0; /* platform */ + cd[39] = (u8)CS_PAGE_SIZE_LOG2; + wr_u32_be(cd + 40, 0); /* spare2 */ + wr_u32_be(cd + 44, 0); /* scatterOffset */ + wr_u32_be(cd + 48, 0); /* teamOffset */ + wr_u32_be(cd + 52, 0); /* spare3 */ + wr_u64_be(cd + 56, 0); /* codeLimit64 */ + wr_u64_be(cd + 64, x->segs[1].fileoff); /* execSegBase */ + wr_u64_be(cd + 72, x->segs[1].filesize); /* execSegLimit */ + wr_u64_be(cd + 80, CS_EXECSEG_MAIN_BINARY); + + /* identifier */ + memcpy(cd + cd_hdr, ident, ident_len); + + x->codesig_size = sb_size; +} + +static void compute_codesig(MCtx* x, const u8* full_file, u32 file_len_excl_cs, + const char* ident) { + u32 code_page = 1u << CS_PAGE_SIZE_LOG2; + u32 nslots = (file_len_excl_cs + code_page - 1u) / code_page; + u32 ident_len = (u32)slice_from_cstr(ident).len + 1u; + u8* cd = x->codesig.data + 12 + 8; + u8* hashes = cd + 88u + ident_len; + + for (u32 i = 0; i < nslots; ++i) { + u32 off = i * code_page; + u32 take = (off + code_page <= file_len_excl_cs) ? code_page + : (file_len_excl_cs - off); + Sha256 s; + sha256_init(&s); + sha256_update(&s, full_file + off, take); + /* Pages shorter than code_page get the standard SHA over the + * partial bytes — Apple's tools do exactly this (no zero padding + * on the tail). */ + sha256_final(&s, hashes + i * CS_SHA256_LEN); + } +} + +/* ---- final emission ---- */ + +static void emit_load_command_segment(MByte* lc, MCtx* x, u32 segidx) { + MSeg* sg = &x->segs[segidx]; + u32 seg_cmd_size = MACHO_SEGCMD64_SIZE + sg->nouts * MACHO_SECT64_SIZE; + u32 base = lc->len; + mbuf_u32(lc, LC_SEGMENT_64); + mbuf_u32(lc, seg_cmd_size); + /* segname: 16 bytes zero-padded */ + u8 nm[16]; + memset(nm, 0, 16); + size_t nlen = slice_from_cstr(sg->name).len; + if (nlen > 16) nlen = 16; + memcpy(nm, sg->name, nlen); + mbuf_append(lc, nm, 16); + mbuf_u64(lc, sg->vmaddr); + mbuf_u64(lc, sg->vmsize); + mbuf_u64(lc, sg->fileoff); + mbuf_u64(lc, sg->filesize); + mbuf_u32(lc, sg->maxprot); + mbuf_u32(lc, sg->initprot); + mbuf_u32(lc, sg->nouts); + mbuf_u32(lc, 0); /* flags */ + + for (u32 j = 0; j < sg->nouts; ++j) { + OutSec* o = &x->outs[sg->first_out + j]; + u8 sname[16], gname[16]; + memset(sname, 0, 16); + memset(gname, 0, 16); + size_t sl = o->sectname ? slice_from_cstr(o->sectname).len : 0; + if (sl > 16) sl = 16; + if (sl) memcpy(sname, o->sectname, sl); + size_t gl = slice_from_cstr(sg->name).len; /* segname must match */ + if (gl > 16) gl = 16; + memcpy(gname, sg->name, gl); + mbuf_append(lc, sname, 16); + mbuf_append(lc, gname, 16); + mbuf_u64(lc, o->vaddr); + mbuf_u64(lc, o->size); + mbuf_u32(lc, (u32)o->file_offset); + /* align is power of 2; encode as log2. */ + u32 a = o->align ? o->align : 1u; + u32 al = 0; + while ((1u << al) < a) ++al; + mbuf_u32(lc, al); + mbuf_u32(lc, 0); /* reloff */ + mbuf_u32(lc, 0); /* nreloc */ + mbuf_u32(lc, o->flags); + mbuf_u32(lc, o->reserved1); + mbuf_u32(lc, o->reserved2); + mbuf_u32(lc, 0); /* reserved3 */ + } + (void)base; +} + +void link_emit_macho(LinkImage* img, Writer* w); + +void link_emit_macho(LinkImage* img, Writer* w) { + MCtx x; + memset(&x, 0, sizeof(x)); + x.img = img; + x.c = img->c; + x.h = img->heap; + x.w = w; + x.linker = img->linker; + x.link_arch = link_arch_desc_for(img->c); + { + const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_MACHO); + x.macho = + fmt && fmt->macho_arch ? fmt->macho_arch(img->c->target.arch) : NULL; + } + + if (!x.link_arch || !x.macho || !x.macho->cputype || !x.macho->emit_stub || + !x.macho->stub_size) + compiler_panic(x.c, no_loc(), + "link_emit_macho: no Mach-O descriptor for target"); + if (img->entry_sym == LINK_SYM_NONE) + compiler_panic(x.c, no_loc(), "link_emit_macho: no resolved entry"); + + collect_imports(&x); + collect_tlv(&x); + plan_layout(&x); + shift_sections(&x); + + /* entry offset within __TEXT segment. */ + LinkSymbol* esym = sym_at(img, img->entry_sym); + if (!esym || !esym->defined) + compiler_panic(x.c, no_loc(), "link_emit_macho: entry symbol undefined"); + if (esym->vaddr < x.text_vaddr) + compiler_panic(x.c, no_loc(), + "link_emit_macho: entry symbol below __TEXT base"); + x.entry_offset = (u32)(esym->vaddr - x.text_vaddr); + + /* image-id UUID. */ + u8 image_id[LINK_IMAGE_ID_BYTES]; + link_image_id_compute(img, image_id); + memcpy(x.uuid, image_id, 16); + + /* Reloc apply collects fixsites. */ + FixList fl; + fix_init(&fl, x.h); + apply_relocs(&x, &fl); + + /* Build LINKEDIT contents. */ + build_chained_fixups(&x, &fl); + build_exports_trie(&x); + build_symtab(&x); + layout_linkedit(&x); + + /* Compute code-sig skeleton sized to file bytes excluding sig. */ + u32 code_limit = x.codesig_off; + build_codesig_skeleton(&x, code_limit, "a.out"); + /* Now extend linkedit segment to include codesig. */ + u64 le_size = (u64)x.codesig_off + (u64)x.codesig_size - x.linkedit_fileoff; + x.segs[4].filesize = le_size; + x.segs[4].vmsize = ALIGN_UP(le_size, MZ_PAGE); + + /* Build load commands buffer. */ + MByte lc; + mbuf_init(&lc, x.h); + + /* LC_SEGMENT_64 for each segment with sections (and PAGEZERO/LINKEDIT). */ + emit_load_command_segment(&lc, &x, 0); /* PAGEZERO */ + emit_load_command_segment(&lc, &x, 1); /* TEXT */ + if (x.segs[2].nsects > 0) + emit_load_command_segment(&lc, &x, 2); /* DATA_CONST */ + if (x.segs[3].nsects > 0) emit_load_command_segment(&lc, &x, 3); /* DATA */ + emit_load_command_segment(&lc, &x, 4); /* LINKEDIT */ + + /* LC_DYLD_CHAINED_FIXUPS (linkedit_data_command: 16B) */ + mbuf_u32(&lc, LC_DYLD_CHAINED_FIXUPS); + mbuf_u32(&lc, 16); + mbuf_u32(&lc, x.chained_fixups_off); + mbuf_u32(&lc, x.chained_fixups.len); + + /* LC_DYLD_EXPORTS_TRIE */ + mbuf_u32(&lc, LC_DYLD_EXPORTS_TRIE); + mbuf_u32(&lc, 16); + mbuf_u32(&lc, x.exports_trie_off); + mbuf_u32(&lc, x.exports_trie.len); + + /* LC_SYMTAB */ + mbuf_u32(&lc, LC_SYMTAB); + mbuf_u32(&lc, MACHO_SYMTAB_CMD_SIZE); + mbuf_u32(&lc, x.symtab_off); + mbuf_u32(&lc, x.nsyms); + mbuf_u32(&lc, x.strtab_off); + mbuf_u32(&lc, x.strtab.len); + + /* LC_DYSYMTAB */ + /* nlocal=0, nextdef=#defined-globals, nundef=#imports. We tracked + * those during build_symtab; recompute by inspecting strtab... easier + * to recount: defined globals are total - imports. */ + u32 nlocal = 0; + u32 nundef = x.nimports_real; + u32 nextdef = (x.nsyms > nundef) ? x.nsyms - nundef - nlocal : 0; + mbuf_u32(&lc, LC_DYSYMTAB); + mbuf_u32(&lc, MACHO_DYSYMTAB_CMD_SIZE); + mbuf_u32(&lc, 0); /* ilocalsym */ + mbuf_u32(&lc, nlocal); + mbuf_u32(&lc, nlocal); + mbuf_u32(&lc, nextdef); + mbuf_u32(&lc, nlocal + nextdef); + mbuf_u32(&lc, nundef); + mbuf_u32(&lc, 0); + mbuf_u32(&lc, 0); /* tocoff, ntoc */ + mbuf_u32(&lc, 0); + mbuf_u32(&lc, 0); /* modtaboff, nmodtab */ + mbuf_u32(&lc, 0); + mbuf_u32(&lc, 0); /* extrefsymoff, nextrefsyms */ + mbuf_u32(&lc, x.indirect_off); + mbuf_u32(&lc, x.indirect.len / 4u); + mbuf_u32(&lc, 0); + mbuf_u32(&lc, 0); /* extreloff, nextrel */ + mbuf_u32(&lc, 0); + mbuf_u32(&lc, 0); /* locreloff, nlocrel */ + + /* LC_LOAD_DYLINKER */ + { + const char* dyld = "/usr/lib/dyld"; + u32 dyld_len = (u32)slice_from_cstr(dyld).len; + u32 cmd_size = (u32)ALIGN_UP((u64)(12u + dyld_len + 1u), 8u); + mbuf_u32(&lc, LC_LOAD_DYLINKER); + mbuf_u32(&lc, cmd_size); + mbuf_u32(&lc, 12u); /* name offset within cmd */ + u32 wrote = mbuf_str(&lc, dyld, dyld_len); + (void)wrote; + /* Pad to cmd_size. */ + while (lc.len < (u32)((u64)mbuf_align(&lc, 1) + 0)) { + /* no-op */ + break; + } + /* Re-align to cmd_size. */ + u32 want = (u32)(lc.len); + /* Walk back: lc grew by 12 + (strlen+1). Pad to cmd_size. */ + u32 cmd_start_back = lc.len - (12u + dyld_len + 1u); + u32 pad_needed = cmd_size - (lc.len - cmd_start_back); + while (pad_needed-- > 0) mbuf_u8(&lc, 0); + (void)want; + } + + /* LC_UUID */ + mbuf_u32(&lc, LC_UUID); + mbuf_u32(&lc, 24); + mbuf_append(&lc, x.uuid, 16); + + /* LC_BUILD_VERSION */ + mbuf_u32(&lc, LC_BUILD_VERSION); + mbuf_u32(&lc, 24); + mbuf_u32(&lc, 1); /* PLATFORM_MACOS */ + mbuf_u32(&lc, (12u << 16) | 0); /* minos 12.0.0 */ + mbuf_u32(&lc, (12u << 16) | 0); /* sdk 12.0.0 */ + mbuf_u32(&lc, 0); /* ntools */ + + /* LC_MAIN — entryoff is offset within __TEXT segment from its file + * start (0). */ + mbuf_u32(&lc, LC_MAIN); + mbuf_u32(&lc, 24); + mbuf_u64(&lc, (u64)x.entry_offset); /* entryoff = vaddr - __TEXT.vmaddr */ + mbuf_u64(&lc, 0); /* stacksize */ + + /* LC_LOAD_DYLIB per dylib. */ + for (u32 i = 0; i < x.ndylibs; ++i) { + Slice nm_s = pool_slice(x.c->global, x.dylibs[i].install); + const char* nm = nm_s.s; + size_t nl = nm_s.len; + u32 cmd_size = (u32)ALIGN_UP((u64)(24u + (u32)nl + 1u), 8u); + u32 cmd_start = lc.len; + mbuf_u32(&lc, LC_LOAD_DYLIB); + mbuf_u32(&lc, cmd_size); + mbuf_u32(&lc, 24u); /* name offset */ + mbuf_u32(&lc, 0); /* timestamp */ + mbuf_u32(&lc, (1u << 16)); /* current_version 1.0 */ + mbuf_u32(&lc, (1u << 16)); /* compat_version 1.0 */ + mbuf_str(&lc, nm ? nm : "", (u32)nl); + while (lc.len - cmd_start < cmd_size) mbuf_u8(&lc, 0); + } + + /* LC_FUNCTION_STARTS / LC_DATA_IN_CODE — empty. */ + mbuf_u32(&lc, LC_FUNCTION_STARTS_C); + mbuf_u32(&lc, 16); + mbuf_u32(&lc, x.fn_starts_off); + mbuf_u32(&lc, 0); + + mbuf_u32(&lc, LC_DATA_IN_CODE_C); + mbuf_u32(&lc, 16); + mbuf_u32(&lc, x.data_in_code_off); + mbuf_u32(&lc, 0); + + /* LC_CODE_SIGNATURE */ + mbuf_u32(&lc, LC_CODE_SIGNATURE_C); + mbuf_u32(&lc, 16); + mbuf_u32(&lc, x.codesig_off); + mbuf_u32(&lc, x.codesig_size); + + /* Sanity: lc.len + MACHO_HDR64_SIZE must equal headers_size we + * predicted in plan_layout. If not, we mis-sized — panic. */ + if ((u64)lc.len + MACHO_HDR64_SIZE != x.headers_size) { + compiler_panic(x.c, no_loc(), + "link_macho: load-cmd size mismatch: predicted %llu got %u", + (unsigned long long)(x.headers_size - MACHO_HDR64_SIZE), + lc.len); + } + + /* ---- now stream the file ---- */ + /* The Writer in cfree allows seek; we'll write a flat buffer first + * (so we can hash it for codesig) and flush at the end. */ + MByte file; + mbuf_init(&file, x.h); + + /* mach_header_64 */ + u32 ncmds = 0; + /* Recount: PAGEZERO + TEXT + maybe DATA_CONST + maybe DATA + LINKEDIT + * + chained + exports_trie + symtab + dysymtab + dyld + uuid + + * build_version + main + nDylibs + fn_starts + data_in_code + + * codesig. */ + ncmds += 2; /* PAGEZERO + TEXT */ + if (x.segs[2].nsects > 0) ncmds++; + if (x.segs[3].nsects > 0) ncmds++; + ncmds++; /* LINKEDIT */ + ncmds += 11 + x.ndylibs; + /* (chained, exports_trie, symtab, dysymtab, dyld, uuid, build_version, + * main, fn_starts, data_in_code, codesig) = 11 */ + + mbuf_u32(&file, MH_MAGIC_64); + mbuf_u32(&file, x.macho->cputype); + mbuf_u32(&file, x.macho->cpusubtype); + mbuf_u32(&file, MH_EXECUTE); + mbuf_u32(&file, ncmds); + mbuf_u32(&file, lc.len); + { + u32 mh_flags = MH_DYLDLINK | MH_TWOLEVEL | MH_NOUNDEFS | MH_PIE; + /* dyld scans __thread_vars and allocates a pthread_key for each + * descriptor only when this flag is set; without it the descriptor's + * thunk pointer is silently patched to _tlv_bootstrap_error. Apple's + * ld sets it whenever the image contains S_THREAD_LOCAL_* sections. */ + if (x.ntlv) mh_flags |= MH_HAS_TLV_DESCRIPTORS; + mbuf_u32(&file, mh_flags); + } + mbuf_u32(&file, 0); /* reserved */ + mbuf_append(&file, lc.data, lc.len); + + /* Pad to first section's file offset. */ + /* __TEXT first section begins at headers_size; we wrote header+lc = + * headers_size, so no pad needed. Then each MSec's file_offset + * tells us where to write its bytes. */ + + /* Now emit segment payload bytes per MSec. */ + for (u32 i = 0; i < x.nsecs; ++i) { + MSec* m = &x.secs[i]; + if (m->is_zerofill || m->size == 0) continue; + /* Pad up to m->file_offset. */ + while (file.len < m->file_offset) mbuf_u8(&file, 0); + if (m->synth_data) { + mbuf_append(&file, m->synth_data, m->synth_size); + } else { + LinkSection* ls = &img->sections[m->link_sec_id - 1u]; + u32 segid = ls->segment_id; + u8* base = + (segid != LINK_SEG_NONE) ? img->segment_bytes[segid - 1u] : NULL; + if (base && ls->size) { + mbuf_append(&file, base + ls->input_offset, (u32)ls->size); + } else if (ls->size) { + for (u64 k = 0; k < ls->size; ++k) mbuf_u8(&file, 0); + } + } + } + + /* Pad to LINKEDIT start. */ + while (file.len < x.linkedit_fileoff) mbuf_u8(&file, 0); + + /* LINKEDIT contents in declared order. */ + while (file.len < x.chained_fixups_off) mbuf_u8(&file, 0); + mbuf_append(&file, x.chained_fixups.data, x.chained_fixups.len); + while (file.len < x.exports_trie_off) mbuf_u8(&file, 0); + mbuf_append(&file, x.exports_trie.data, x.exports_trie.len); + while (file.len < x.fn_starts_off) mbuf_u8(&file, 0); + /* fn_starts is empty */ + while (file.len < x.data_in_code_off) mbuf_u8(&file, 0); + /* empty */ + while (file.len < x.symtab_off) mbuf_u8(&file, 0); + mbuf_append(&file, x.symtab.data, x.symtab.len); + while (file.len < x.indirect_off) mbuf_u8(&file, 0); + mbuf_append(&file, x.indirect.data, x.indirect.len); + while (file.len < x.strtab_off) mbuf_u8(&file, 0); + mbuf_append(&file, x.strtab.data, x.strtab.len); + while (file.len < x.codesig_off) mbuf_u8(&file, 0); + + /* Compute codesig hashes over file bytes [0, codesig_off). */ + /* The codesig blob currently has zero hashes; hash now. */ + compute_codesig(&x, file.data, x.codesig_off, "a.out"); + /* Append codesig. */ + mbuf_append(&file, x.codesig.data, x.codesig.len); + + /* Stream out. */ + cfree_writer_seek(w, 0); + cfree_writer_write(w, file.data, file.len); + + /* Cleanup. */ + fix_fini(&fl); + mbuf_fini(&lc); + mbuf_fini(&file); + mbuf_fini(&x.chained_fixups); + mbuf_fini(&x.exports_trie); + mbuf_fini(&x.symtab); + mbuf_fini(&x.strtab); + mbuf_fini(&x.indirect); + mbuf_fini(&x.fn_starts); + mbuf_fini(&x.data_in_code); + mbuf_fini(&x.codesig); + if (x.imports) x.h->free(x.h, x.imports, 0); /* VEC_GROW: cap unknown */ + if (x.dylibs) x.h->free(x.h, x.dylibs, 0); + if (x.sym_to_imp) + x.h->free(x.h, x.sym_to_imp, sizeof(u32) * x.sym_to_imp_size); + if (x.secs) x.h->free(x.h, x.secs, 0); + if (x.stubs_bytes) x.h->free(x.h, x.stubs_bytes, x.stubs_size); + if (x.got_bytes) x.h->free(x.h, x.got_bytes, x.got_size); + if (x.tlv_ptrs_bytes) x.h->free(x.h, x.tlv_ptrs_bytes, x.tlv_ptrs_size); + if (x.tlv_slots) x.h->free(x.h, x.tlv_slots, 0); + if (x.sym_to_tlv) + x.h->free(x.h, x.sym_to_tlv, sizeof(u32) * x.sym_to_tlv_size); +} diff --git a/src/obj/macho.h b/src/obj/macho/macho.h diff --git a/src/obj/macho/read.c b/src/obj/macho/read.c @@ -0,0 +1,651 @@ +/* Mach-O MH_OBJECT reader. Parses a 64-bit little-endian relocatable + * object back into a fresh ObjBuilder. The post-finalize ObjBuilder + * shape is the canonical superset of the writer's input: + * read_macho of an emit_macho output produces an ObjBuilder + * shape-equivalent to the writer's input, modulo the synthesized + * "__SEG,__sect"-form section names. + * + * Scope: AArch64 little-endian, MH_OBJECT only (MH_DYLIB is a stub — + * the linker has no consumer for it yet). Other archs / endianness + * produce a compiler_panic with a diagnostic. */ + +#include <string.h> + +#include "core/arena.h" +#include "core/bytes.h" +#include "core/heap.h" +#include "core/pool.h" +#include "core/slice.h" +#include "core/util.h" +#include "obj/format.h" +#include "obj/macho/macho.h" + +static SrcLoc no_loc(void) { + SrcLoc l = {0, 0, 0}; + return l; +} + +/* ---- mach-section scratch struct ---- */ + +typedef struct MSecRec { + char segname[16]; + char sectname[16]; + u32 seg_len; + u32 sect_len; + u64 addr; + u64 size; + u32 fileoff; + u32 align_log2; + u32 reloff; + u32 nreloc; + u32 flags; + u32 reserved2; + ObjSecId obj_sec; /* assigned in pass 1 */ +} MSecRec; + +static u32 fixed16_len(const char* s) { + u32 n = 0; + while (n < 16 && s[n] != 0) ++n; + return n; +} + +static u16 sec_kind_from_seg_sect(const char* segname, u32 seg_len, + const char* sectname, u32 sect_len, + u32 flags) { + u32 stype = flags & SECTION_TYPE; + if (stype == S_ZEROFILL || stype == S_THREAD_LOCAL_ZEROFILL) return SEC_BSS; + if (flags & S_ATTR_PURE_INSTRUCTIONS) return SEC_TEXT; + + if (seg_len == 7 && memcmp(segname, "__DWARF", 7) == 0) return SEC_DEBUG; + if (seg_len == 6 && memcmp(segname, "__TEXT", 6) == 0) { + if (sect_len == 6 && memcmp(sectname, "__text", 6) == 0) return SEC_TEXT; + return SEC_RODATA; /* __const, __cstring, ... */ + } + if (seg_len == 6 && memcmp(segname, "__DATA", 6) == 0) { + if (sect_len == 5 && memcmp(sectname, "__bss", 5) == 0) return SEC_BSS; + return SEC_DATA; + } + return SEC_OTHER; +} + +static u16 sec_flags_from(u32 mflags, u16 sec_kind) { + u16 f = 0; + if (sec_kind == SEC_TEXT || (mflags & S_ATTR_PURE_INSTRUCTIONS)) { + f |= SF_ALLOC | SF_EXEC; + } else if (sec_kind == SEC_RODATA) { + f |= SF_ALLOC; + } else if (sec_kind == SEC_DATA || sec_kind == SEC_BSS) { + f |= SF_ALLOC | SF_WRITE; + } + u32 stype = mflags & SECTION_TYPE; + if (stype == S_THREAD_LOCAL_REGULAR || stype == S_THREAD_LOCAL_ZEROFILL || + stype == S_THREAD_LOCAL_VARIABLES) { + f |= SF_TLS; + } + if (stype == S_CSTRING_LITERALS) { + f |= SF_MERGE | SF_STRINGS; + } + return f; +} + +static u16 sec_sem_from(u32 mflags, u16 sec_kind) { + u32 stype = mflags & SECTION_TYPE; + if (stype == S_ZEROFILL || stype == S_THREAD_LOCAL_ZEROFILL || + sec_kind == SEC_BSS) { + return SSEM_NOBITS; + } + if (stype == S_MOD_INIT_FUNC_POINTERS) return SSEM_INIT_ARRAY; + if (stype == S_MOD_TERM_FUNC_POINTERS) return SSEM_FINI_ARRAY; + return SSEM_PROGBITS; +} + +ObjBuilder* read_macho(Compiler* c, const char* name, const u8* data, + size_t len) { + (void)name; + if (len < MACHO_HDR64_SIZE) + compiler_panic(c, no_loc(), "read_macho: input shorter than header"); + + u32 magic = rd_u32_le(data + 0); + if (magic != MH_MAGIC_64) + compiler_panic(c, no_loc(), "read_macho: bad magic 0x%x", magic); + + u32 cputype = rd_u32_le(data + 4); + const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_MACHO); + const ObjMachoArchOps* macho = + fmt && fmt->macho_cputype ? fmt->macho_cputype(cputype) : NULL; + u32 filetype = rd_u32_le(data + 12); + u32 ncmds = rd_u32_le(data + 16); + u32 sizeofcmds = rd_u32_le(data + 20); + + if (!macho || !macho->reloc_from) + compiler_panic(c, no_loc(), "read_macho: unsupported cputype 0x%x", + cputype); + if (filetype != MH_OBJECT) + compiler_panic(c, no_loc(), + "read_macho: only MH_OBJECT supported, got filetype %u", + filetype); + + if ((u64)MACHO_HDR64_SIZE + sizeofcmds > len) + compiler_panic(c, no_loc(), "read_macho: load commands exceed file"); + + /* ---- pass 1: walk load commands, collect sections, symtab cmd. */ + MSecRec* msecs = NULL; + u32 nmsecs = 0; + u32 symoff = 0, nsyms = 0, stroff = 0, strsize = 0; + + u64 pos = MACHO_HDR64_SIZE; + u64 end = pos + sizeofcmds; + for (u32 ci = 0; ci < ncmds && pos + 8 <= end; ++ci) { + u32 cmd = rd_u32_le(data + pos); + u32 cmdsize = rd_u32_le(data + pos + 4); + if (cmdsize < 8 || pos + cmdsize > end) + compiler_panic(c, no_loc(), "read_macho: malformed load command"); + + if (cmd == LC_SEGMENT_64) { + u32 nsects = rd_u32_le(data + pos + 64); + if (MACHO_SEGCMD64_SIZE + (u64)nsects * MACHO_SECT64_SIZE > cmdsize) + compiler_panic(c, no_loc(), "read_macho: segment cmd truncated"); + MSecRec* extra = arena_array(c->scratch, MSecRec, nmsecs + nsects); + if (msecs && nmsecs) memcpy(extra, msecs, sizeof(MSecRec) * nmsecs); + msecs = extra; + const u8* sp = data + pos + MACHO_SEGCMD64_SIZE; + for (u32 si = 0; si < nsects; ++si, sp += MACHO_SECT64_SIZE) { + MSecRec* m = &msecs[nmsecs++]; + memset(m, 0, sizeof *m); + memcpy(m->sectname, sp + 0, 16); + memcpy(m->segname, sp + 16, 16); + m->seg_len = fixed16_len(m->segname); + m->sect_len = fixed16_len(m->sectname); + m->addr = rd_u64_le(sp + 32); + m->size = rd_u64_le(sp + 40); + m->fileoff = rd_u32_le(sp + 48); + m->align_log2 = rd_u32_le(sp + 52); + m->reloff = rd_u32_le(sp + 56); + m->nreloc = rd_u32_le(sp + 60); + m->flags = rd_u32_le(sp + 64); + m->reserved2 = rd_u32_le(sp + 72); + } + } else if (cmd == LC_SYMTAB) { + symoff = rd_u32_le(data + pos + 8); + nsyms = rd_u32_le(data + pos + 12); + stroff = rd_u32_le(data + pos + 16); + strsize = rd_u32_le(data + pos + 20); + } + pos += cmdsize; + } + + if (stroff + (u64)strsize > len) + compiler_panic(c, no_loc(), "read_macho: string table out of range"); + if (symoff + (u64)nsyms * MACHO_NLIST64_SIZE > len) + compiler_panic(c, no_loc(), "read_macho: symbol table out of range"); + const u8* strtab = data + stroff; + + ObjBuilder* ob = obj_new(c); + if (!ob) compiler_panic(c, no_loc(), "read_macho: obj_new failed"); + + /* ---- pass 2: create ObjSecs and copy bytes. */ + for (u32 i = 0; i < nmsecs; ++i) { + MSecRec* m = &msecs[i]; + /* Build "__SEG,__sect"-form name; matches what emit_macho would + * round-trip back out. */ + char nmbuf[34]; + u32 nlen = 0; + memcpy(nmbuf + nlen, m->segname, m->seg_len); + nlen += m->seg_len; + nmbuf[nlen++] = ','; + memcpy(nmbuf + nlen, m->sectname, m->sect_len); + nlen += m->sect_len; + Sym sn = pool_intern_slice(c->global, (Slice){.s = nmbuf, .len = nlen}); + + u16 kind = sec_kind_from_seg_sect(m->segname, m->seg_len, m->sectname, + m->sect_len, m->flags); + u16 flags = sec_flags_from(m->flags, kind); + u16 sem = sec_sem_from(m->flags, kind); + u32 align = 1u << (m->align_log2 & 31); + + ObjSecId id = obj_section_ex(ob, sn, (SecKind)kind, (SecSem)sem, flags, + align, m->reserved2, 0, 0); + if (id == OBJ_SEC_NONE) + compiler_panic(c, no_loc(), "read_macho: obj_section_ex failed"); + + /* Preserve the raw mach section.flags so emit_macho can write back + * the same S_TYPE / S_ATTR_* bits. */ + obj_section_set_ext(ob, id, OBJ_EXT_MACHO, m->flags, 0); + + if (sem == SSEM_NOBITS) { + obj_reserve_bss(ob, id, (u32)m->size, align); + } else if (m->size) { + if (m->fileoff + m->size > len) + compiler_panic(c, no_loc(), "read_macho: section bytes out of range"); + obj_write(ob, id, data + m->fileoff, (size_t)m->size); + } + m->obj_sec = id; + } + + /* ---- pass 3: parse symbol table. Two-pass strategy: first pass + * creates undefs (so relocations can refer to them), second + * pass creates defined locals/extdefs. Both write into + * mach_idx -> ObjSymId so reloc resolution works. */ + ObjSymId* sym_macho_to_obj = + arena_zarray(c->scratch, ObjSymId, nsyms ? nsyms : 1); + + const u8* sbase = data + symoff; + for (u32 i = 0; i < nsyms; ++i) { + const u8* p = sbase + (u64)i * MACHO_NLIST64_SIZE; + u32 strx = rd_u32_le(p + 0); + u8 n_type = p[4]; + u8 n_sect = p[5]; + u16 n_desc = rd_u16_le(p + 6); + u64 n_value = rd_u64_le(p + 8); + + const char* nm = ""; + u32 nlen = 0; + if (strx < strsize) { + nm = (const char*)(strtab + strx); + while (strx + nlen < strsize && nm[nlen]) ++nlen; + } + /* Mach-O names round-trip verbatim — the leading `_` Apple + * toolchains apply to C symbols is part of the on-disk name as + * far as ObjBuilder is concerned. Name-canonicalization (the + * `test_main` ↔ `_test_main` mapping for API callers) happens + * one layer up at the linker API boundary (link_c_name_intern + * in link.c); the on-disk shape stays byte-for-byte stable. */ + Sym sn = + nlen ? pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}) : 0; + + u8 type_field = (u8)(n_type & N_TYPE); + u8 ext = (u8)(n_type & N_EXT); + u8 pext = (u8)(n_type & N_PEXT); + + u16 bind = ext ? SB_GLOBAL : SB_LOCAL; + /* Weak DEFs (defined symbols) carry N_WEAK_DEF; weak REFs (undef + * `__attribute__((weak))` references) carry N_WEAK_REF. Either + * one collapses to SB_WEAK in the cfree model. */ + if (ext && (n_desc & (N_WEAK_DEF | N_WEAK_REF))) bind = SB_WEAK; + u8 vis = pext ? SV_HIDDEN : SV_DEFAULT; + + u16 kind; + ObjSecId sec_id = OBJ_SEC_NONE; + u64 value = 0; + u64 size = 0; + u64 cmnalign = 0; + + if (type_field == N_UNDF) { + if (ext && n_value != 0) { + /* Common: n_value is size, n_desc encodes log2(align) in + * GET_COMM_ALIGN bits. */ + kind = SK_COMMON; + value = 0; + size = n_value; + u32 la = (u32)((n_desc >> 8) & 0xf); + cmnalign = 1u << la; + } else { + kind = SK_UNDEF; + } + } else if (type_field == N_ABS) { + kind = SK_ABS; + value = n_value; + } else if (type_field == N_SECT) { + if (n_sect == 0 || n_sect > nmsecs) { + kind = SK_NOTYPE; + } else { + sec_id = msecs[n_sect - 1].obj_sec; + /* Mach-O n_value for defined symbols is segment-relative addr; + * convert back to a section-local offset. */ + u64 base = msecs[n_sect - 1].addr; + value = (n_value >= base) ? (n_value - base) : 0; + kind = (msecs[n_sect - 1].flags & S_ATTR_PURE_INSTRUCTIONS) ? SK_FUNC + : SK_OBJ; + } + } else { + kind = SK_NOTYPE; + } + + ObjSymId id = obj_symbol_ex(ob, sn, (SymBind)bind, (SymVis)vis, + (SymKind)kind, sec_id, value, size, cmnalign); + obj_sym_mark_referenced(ob, id); + /* n_desc carries Mach-O attribute bits beyond what bind/vis/kind + * model — N_NO_DEAD_STRIP, N_REF_TO_WEAK, N_ARM_THUMB_DEF, etc. + * Mask off the bits we already round-trip via bind (N_WEAK_DEF / + * N_WEAK_REF) and the alignment field for commons (which lives + * in cmnalign), then stash the remainder so emit_macho can OR it + * back in. */ + u16 desc_pass = n_desc; + desc_pass &= (u16) ~(N_WEAK_DEF | N_WEAK_REF); + if (kind == SK_COMMON) desc_pass &= 0x00ff; /* drop align field */ + if (desc_pass) obj_symbol_set_flags(ob, id, desc_pass); + sym_macho_to_obj[i] = id; + } + + /* ---- pass 4: parse per-section relocations into ObjBuilder relocs. + * Mach-O encodes addends out-of-band as a leading + * ARM64_RELOC_ADDEND followed by the real reloc; the + * reader collapses the pair on the way in. */ + /* Lazily-populated section-start local symbols, for clang-emitted + * non-extern (section-relative) relocations. See the r_extern==0 + * branch below for the encoding. */ + ObjSymId* sec_start_sym = + arena_zarray(c->scratch, ObjSymId, nmsecs ? nmsecs : 1); + for (u32 i = 0; i < nmsecs; ++i) sec_start_sym[i] = OBJ_SYM_NONE; + for (u32 i = 0; i < nmsecs; ++i) { + MSecRec* m = &msecs[i]; + if (!m->nreloc) continue; + if (m->reloff + (u64)m->nreloc * MACHO_RELOC_SIZE > len) + compiler_panic(c, no_loc(), "read_macho: relocation table out of range"); + const u8* rp = data + m->reloff; + i64 pending_addend = 0; + int have_pending = 0; + int pending_subtractor = 0; + u32 pending_subtractor_offset = 0; + u32 pending_subtractor_length = 0; + for (u32 j = 0; j < m->nreloc; ++j) { + u32 r_address = rd_u32_le(rp + j * MACHO_RELOC_SIZE); + u32 packed = rd_u32_le(rp + j * MACHO_RELOC_SIZE + 4); + u32 r_symbolnum = packed & 0x00ffffffu; + u32 r_pcrel = (packed >> 24) & 1u; + u32 r_length = (packed >> 25) & 3u; + u32 r_extern = (packed >> 27) & 1u; + u32 r_type = (packed >> 28) & 0xfu; + + if (r_type == ARM64_RELOC_ADDEND) { + /* Sign-extend 24-bit addend. */ + i32 ad = (i32)(r_symbolnum & 0x00ffffffu); + if (ad & 0x00800000) ad |= ~0x00ffffff; + pending_addend = (i64)ad; + have_pending = 1; + continue; + } + + u32 kind; + if (r_type == ARM64_RELOC_SUBTRACTOR) { + kind = (r_length == 3) ? R_RV_SUB64 + : (r_length == 2) ? R_RV_SUB32 + : (r_length == 1) ? R_RV_SUB16 + : R_RV_SUB8; + } else { + kind = macho->reloc_from(r_type); + } + if (kind == (u32)-1) + compiler_panic(c, no_loc(), "read_macho: unsupported reloc type %u", + r_type); + + /* Refine kind by (r_pcrel, r_length) when the type field alone + * is ambiguous. ARM64_RELOC_UNSIGNED collapses R_ABS64/R_ABS32 + * and PC-relative variants. */ + if (r_type == ARM64_RELOC_UNSIGNED) { + if (pending_subtractor && pending_subtractor_offset == r_address && + pending_subtractor_length == r_length) { + kind = (r_length == 3) ? R_RV_ADD64 + : (r_length == 2) ? R_RV_ADD32 + : (r_length == 1) ? R_RV_ADD16 + : R_RV_ADD8; + pending_subtractor = 0; + } else if (r_pcrel) { + kind = (r_length == 3) ? R_PC64 : R_PC32; + } else { + kind = (r_length == 3) ? R_ABS64 : R_ABS32; + } + } else if (r_type == ARM64_RELOC_BRANCH26) { + kind = R_AARCH64_CALL26; + } else if (r_type == ARM64_RELOC_PAGEOFF12) { + /* PAGEOFF12 is access-size-agnostic in Mach-O; the linker + * applier needs to scale the immediate by the load/store size + * (or apply it raw for ADD). Inspect the patched instruction + * at r_address to pick the right RelocKind so the applier in + * link_reloc.c shifts the lo12 correctly. */ + if (m->fileoff + r_address + 4u > len) + compiler_panic(c, no_loc(), + "read_macho: PAGEOFF12 r_address %u out of range", + r_address); + u32 ins = rd_u32_le(data + m->fileoff + r_address); + /* ADD (immediate): bits 30:24 = 0010001 (W=10001 / X=10010001). + * Mask 0x7f800000 isolates sf=0/1 + the 0010001 pattern; values + * 0x11000000 (32-bit) and 0x91000000 (64-bit) — match the latter + * via the same 0x7f mask leaving bit 31 free. */ + if ((ins & 0x7f800000u) == 0x11000000u) { + kind = R_AARCH64_ADD_ABS_LO12_NC; + } else if ((ins & 0x3b000000u) == 0x39000000u) { + /* LDR/STR (immediate unsigned offset). Bits 29:27=111, bit 26=V + * (0=integer, 1=SIMD/FP), bits 25:24=01. size in [31:30] plus + * opc bit 23 for the SIMD 128-bit case (size=00, opc=11). */ + u32 sz = (ins >> 30) & 3u; + u32 v_bit = (ins >> 26) & 1u; + u32 opc1 = (ins >> 23) & 1u; + if (v_bit && sz == 0 && opc1) { + kind = R_AARCH64_LDST128_ABS_LO12_NC; + } else { + kind = (sz == 0) ? R_AARCH64_LDST8_ABS_LO12_NC + : (sz == 1) ? R_AARCH64_LDST16_ABS_LO12_NC + : (sz == 2) ? R_AARCH64_LDST32_ABS_LO12_NC + : R_AARCH64_LDST64_ABS_LO12_NC; + } + } + /* else: leave as the default R_AARCH64_ADD_ABS_LO12_NC. */ + } + + ObjSymId target = OBJ_SYM_NONE; + i64 inplace_addend_override = 0; + int use_inplace_addend = 0; + if (r_extern) { + if (r_symbolnum < nsyms) target = sym_macho_to_obj[r_symbolnum]; + if (!have_pending && r_type == ARM64_RELOC_UNSIGNED) { + u32 rsz = 1u << r_length; + if ((u64)m->fileoff + r_address + rsz > len) + compiler_panic(c, no_loc(), + "read_macho: extern unsigned reloc r_address out " + "of range"); + const u8* pv = data + m->fileoff + r_address; + u64 inplace; + if (r_length == 3) + inplace = rd_u64_le(pv); + else if (r_length == 2) + inplace = (u64)rd_u32_le(pv); + else if (r_length == 1) + inplace = (u64)rd_u16_le(pv); + else + inplace = (u64)pv[0]; + inplace_addend_override = (i64)inplace; + use_inplace_addend = 1; + } + } else { + /* Section-relative reloc — clang emits these for compact unwind, + * EH frame, and DWARF debug info. r_symbolnum is the 1-based + * section index; the in-place value at r_address is the absolute + * .o virtual address of the referent. Synthesize a local + * symbol pointing to the target section's start (lazily, once + * per section) and re-express the reloc as + * target = sec_start_sym, addend = inplace - section.addr. */ + if (r_symbolnum == 0 || r_symbolnum > nmsecs) + compiler_panic(c, no_loc(), + "read_macho: section-relative reloc references " + "invalid section index %u", + r_symbolnum); + u32 sec_idx = r_symbolnum - 1u; + MSecRec* tm = &msecs[sec_idx]; + if (sec_start_sym[sec_idx] == OBJ_SYM_NONE) { + /* Build ".Lcfree.macho_secstart.<sec_idx>" without snprintf + * (the freestanding build doesn't pull in stdio). */ + static const char prefix[] = ".Lcfree.macho_secstart."; + char nmbuf[sizeof(prefix) + 10]; + u32 nlen = (u32)(sizeof(prefix) - 1); + memcpy(nmbuf, prefix, nlen); + char dec[10]; + u32 dn = 0; + u32 v = sec_idx; + do { + dec[dn++] = (char)('0' + (v % 10u)); + v /= 10u; + } while (v); + for (u32 k = 0; k < dn; ++k) nmbuf[nlen + k] = dec[dn - 1 - k]; + nlen += dn; + Sym sn = + pool_intern_slice(c->global, (Slice){.s = nmbuf, .len = nlen}); + u16 sk = (tm->flags & S_ATTR_PURE_INSTRUCTIONS) ? SK_FUNC : SK_OBJ; + sec_start_sym[sec_idx] = + obj_symbol(ob, sn, SB_LOCAL, (SymKind)sk, tm->obj_sec, 0, 0); + } + target = sec_start_sym[sec_idx]; + u32 rsz = 1u << r_length; + if ((u64)m->fileoff + r_address + rsz > len) + compiler_panic(c, no_loc(), + "read_macho: non-extern reloc r_address out of range"); + u64 inplace; + const u8* pv = data + m->fileoff + r_address; + if (r_length == 3) + inplace = rd_u64_le(pv); + else if (r_length == 2) + inplace = (u64)rd_u32_le(pv); + else if (r_length == 1) + inplace = (u64)rd_u16_le(pv); + else + inplace = (u64)pv[0]; + inplace_addend_override = (i64)inplace - (i64)tm->addr; + use_inplace_addend = 1; + } + + i64 addend = have_pending + ? pending_addend + : (use_inplace_addend ? inplace_addend_override : 0); + int has_explicit = have_pending || use_inplace_addend || addend != 0; + have_pending = 0; + pending_addend = 0; + + obj_reloc_ex(ob, m->obj_sec, r_address, (RelocKind)kind, target, addend, + has_explicit, 0); + if (r_type == ARM64_RELOC_SUBTRACTOR) { + pending_subtractor = 1; + pending_subtractor_offset = r_address; + pending_subtractor_length = r_length; + } + } + } + + obj_finalize(ob); + return ob; +} + +/* ---- read_macho_dso ---- + * + * MH_DYLIB reader. Walks load commands once to find LC_ID_DYLIB + * (install-name) and LC_SYMTAB (symbol table + string table), then + * emits one defined ObjSym per externally-visible nlist entry. + * + * Like read_elf_dso, the produced ObjBuilder carries no sections / + * relocations / groups — only symbol definitions in OBJ_SEC_NONE. The + * consumer's resolve_undefs sees these as defined globals and marks the + * matching consumer-side undef as `imported`. The dylib's own undefs + * (its imports of other dylibs) are filtered: they don't satisfy any + * undef in the consumer. */ + +ObjBuilder* read_macho_dso(Compiler* c, const char* name, const u8* data, + size_t len, Sym* install_name_out) { + (void)name; + if (install_name_out) *install_name_out = 0; + if (len < MACHO_HDR64_SIZE) + compiler_panic(c, no_loc(), "read_macho_dso: input shorter than header"); + + u32 magic = rd_u32_le(data + 0); + if (magic != MH_MAGIC_64) + compiler_panic(c, no_loc(), "read_macho_dso: bad magic 0x%x", magic); + + u32 cputype = rd_u32_le(data + 4); + u32 filetype = rd_u32_le(data + 12); + u32 ncmds = rd_u32_le(data + 16); + u32 sizeofcmds = rd_u32_le(data + 20); + + { + const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_MACHO); + const ObjMachoArchOps* macho = + fmt && fmt->macho_cputype ? fmt->macho_cputype(cputype) : NULL; + if (!macho) + compiler_panic(c, no_loc(), "read_macho_dso: unsupported cputype 0x%x", + cputype); + } + if (filetype != MH_DYLIB && filetype != MH_BUNDLE) + compiler_panic(c, no_loc(), + "read_macho_dso: not MH_DYLIB/MH_BUNDLE (filetype=%u)", + filetype); + if ((u64)MACHO_HDR64_SIZE + sizeofcmds > len) + compiler_panic(c, no_loc(), "read_macho_dso: load commands exceed file"); + + u32 symoff = 0, nsyms = 0, stroff = 0, strsize = 0; + Sym install_name = 0; + + u64 pos = MACHO_HDR64_SIZE; + u64 end = pos + sizeofcmds; + for (u32 ci = 0; ci < ncmds && pos + 8 <= end; ++ci) { + u32 cmd = rd_u32_le(data + pos); + u32 cmdsize = rd_u32_le(data + pos + 4); + if (cmdsize < 8 || pos + cmdsize > end) + compiler_panic(c, no_loc(), "read_macho_dso: malformed load command"); + if (cmd == LC_ID_DYLIB) { + /* dylib_command: cmd, cmdsize, name(lc_str: 4-byte offset within + * the cmd), timestamp, current_version, compat_version. */ + if (cmdsize < 24) goto next; + u32 nm_off = rd_u32_le(data + pos + 8); + if (nm_off >= cmdsize) goto next; + const char* p = (const char*)(data + pos + nm_off); + u32 maxlen = cmdsize - nm_off; + u32 nlen = 0; + while (nlen < maxlen && p[nlen]) ++nlen; + if (nlen) + install_name = + pool_intern_slice(c->global, (Slice){.s = p, .len = nlen}); + } else if (cmd == LC_SYMTAB) { + symoff = rd_u32_le(data + pos + 8); + nsyms = rd_u32_le(data + pos + 12); + stroff = rd_u32_le(data + pos + 16); + strsize = rd_u32_le(data + pos + 20); + } + next: + pos += cmdsize; + } + if (install_name_out) *install_name_out = install_name; + + if (stroff + (u64)strsize > len) + compiler_panic(c, no_loc(), "read_macho_dso: string table out of range"); + if (symoff + (u64)nsyms * MACHO_NLIST64_SIZE > len) + compiler_panic(c, no_loc(), "read_macho_dso: symbol table out of range"); + + ObjBuilder* ob = obj_new(c); + if (!ob) compiler_panic(c, no_loc(), "read_macho_dso: obj_new failed"); + + const u8* strtab = data + stroff; + const u8* sbase = data + symoff; + for (u32 i = 0; i < nsyms; ++i) { + const u8* p = sbase + (u64)i * MACHO_NLIST64_SIZE; + u32 strx = rd_u32_le(p + 0); + u8 n_type = p[4]; + u16 n_desc = rd_u16_le(p + 6); + + u8 type_field = (u8)(n_type & N_TYPE); + u8 ext = (u8)(n_type & N_EXT); + /* Skip non-external (locals) and undef refs (the dylib's own imports). */ + if (!ext) continue; + if (type_field == N_UNDF) continue; + /* N_INDR / N_PBUD / N_STAB: skip — not interesting for static link. */ + if (n_type & N_STAB) continue; + + if (strx >= strsize) continue; + const char* nm = (const char*)(strtab + strx); + u32 nlen = 0; + while (strx + nlen < strsize && nm[nlen]) ++nlen; + if (!nlen) continue; + Sym sn = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}); + + SymBind bind = (n_desc & (N_WEAK_DEF | N_WEAK_REF)) ? SB_WEAK : SB_GLOBAL; + SymKind kind = SK_NOTYPE; + /* Mach-O dylib nlist doesn't carry STT_FUNC / STT_OBJECT cleanly — + * default to NOTYPE. The consuming linker uses dso_export_is_func + * to peek at this for ELF; for Mach-O the `imported` decision flows + * through synthetic __got / __stubs regardless of kind. */ + { + ObjSymId did = + obj_symbol_ex(ob, sn, bind, SV_DEFAULT, kind, OBJ_SEC_NONE, 0, 0, 0); + obj_sym_mark_referenced(ob, did); + } + } + + obj_finalize(ob); + return ob; +} diff --git a/src/obj/macho/reloc_aarch64.c b/src/obj/macho/reloc_aarch64.c @@ -0,0 +1,113 @@ +/* RelocKind <-> arm64 Mach-O reloc-type mapping. Mirror of + * elf_reloc_aarch64.c for Mach-O. + * + * Mach-O relocations carry three independent fields that the cfree + * RelocKind enum collapses into a single value: r_type (the 4-bit + * ARM64_RELOC_* code), r_pcrel, and r_length. The translator therefore + * exposes three accessors — the writer (macho_emit.c) consults all of + * them per Reloc, and the reader (macho_read.c) inverts via + * macho_aarch64_reloc_from which keys on (r_type, r_pcrel, r_length). */ + +#include "core/util.h" +#include "obj/macho/macho.h" + +u32 macho_aarch64_reloc_to(u32 kind /* RelocKind */) { + switch (kind) { + case R_NONE: + return (u32)-1; + case R_ABS64: + case R_ABS32: + return ARM64_RELOC_UNSIGNED; + case R_REL64: + case R_REL32: + case R_PC64: + case R_PC32: + /* PC-relative absolute pointer-difference; encoded as + * UNSIGNED with r_pcrel=1, length=3/2. */ + return ARM64_RELOC_UNSIGNED; + case R_AARCH64_JUMP26: + case R_AARCH64_CALL26: + return ARM64_RELOC_BRANCH26; + case R_AARCH64_ADR_PREL_PG_HI21: + case R_AARCH64_ADR_PREL_PG_HI21_NC: + return ARM64_RELOC_PAGE21; + case R_AARCH64_ADD_ABS_LO12_NC: + case R_AARCH64_LDST8_ABS_LO12_NC: + case R_AARCH64_LDST16_ABS_LO12_NC: + case R_AARCH64_LDST32_ABS_LO12_NC: + case R_AARCH64_LDST64_ABS_LO12_NC: + case R_AARCH64_LDST128_ABS_LO12_NC: + return ARM64_RELOC_PAGEOFF12; + case R_AARCH64_ADR_GOT_PAGE: + return ARM64_RELOC_GOT_LOAD_PAGE21; + case R_AARCH64_LD64_GOT_LO12_NC: + return ARM64_RELOC_GOT_LOAD_PAGEOFF12; + case R_AARCH64_TLVP_LOAD_PAGE21: + return ARM64_RELOC_TLVP_LOAD_PAGE21; + case R_AARCH64_TLVP_LOAD_PAGEOFF12: + return ARM64_RELOC_TLVP_LOAD_PAGEOFF12; + default: + return (u32)-1; + } +} + +u32 macho_aarch64_reloc_pcrel(u32 kind /* RelocKind */) { + switch (kind) { + case R_REL64: + case R_REL32: + case R_PC64: + case R_PC32: + case R_AARCH64_JUMP26: + case R_AARCH64_CALL26: + case R_AARCH64_ADR_PREL_PG_HI21: + case R_AARCH64_ADR_PREL_PG_HI21_NC: + case R_AARCH64_ADR_GOT_PAGE: + case R_AARCH64_TLVP_LOAD_PAGE21: + return 1; + default: + return 0; + } +} + +u32 macho_aarch64_reloc_length(u32 kind /* RelocKind */) { + /* log2 of the patch width in bytes: 0=byte, 1=hword, 2=word, 3=quad. + * AArch64 instructions are 4 bytes and Mach-O encodes any 32-bit fixup + * (BRANCH26, PAGE21, PAGEOFF12, ...) with length=2. */ + switch (kind) { + case R_ABS64: + case R_REL64: + case R_PC64: + return 3; + default: + return 2; + } +} + +u32 macho_aarch64_reloc_from(u32 macho_type) { + /* The (r_type, r_pcrel, r_length) tuple disambiguates several kinds + * collapsed by macho_aarch64_reloc_to. The reader inspects pcrel and + * length itself when it matters; this function only maps the type + * field, returning the most common AArch64 instance for each. Reader + * callers refine via the pcrel/length companion if they need to + * distinguish R_ABS64 vs R_PC64 (both UNSIGNED). */ + switch (macho_type) { + case ARM64_RELOC_UNSIGNED: + return R_ABS64; + case ARM64_RELOC_BRANCH26: + return R_AARCH64_CALL26; + case ARM64_RELOC_PAGE21: + return R_AARCH64_ADR_PREL_PG_HI21; + case ARM64_RELOC_PAGEOFF12: + return R_AARCH64_ADD_ABS_LO12_NC; + case ARM64_RELOC_GOT_LOAD_PAGE21: + return R_AARCH64_ADR_GOT_PAGE; + case ARM64_RELOC_GOT_LOAD_PAGEOFF12: + return R_AARCH64_LD64_GOT_LO12_NC; + case ARM64_RELOC_TLVP_LOAD_PAGE21: + return R_AARCH64_TLVP_LOAD_PAGE21; + case ARM64_RELOC_TLVP_LOAD_PAGEOFF12: + return R_AARCH64_TLVP_LOAD_PAGEOFF12; + default: + return (u32)-1; + } +} diff --git a/src/obj/macho/reloc_x86_64.c b/src/obj/macho/reloc_x86_64.c @@ -0,0 +1,84 @@ +#include "core/util.h" +#include "obj/macho/macho.h" + +u32 macho_x86_64_reloc_to(u32 kind /* RelocKind */) { + switch (kind) { + case R_NONE: + return (u32)-1; + case R_ABS64: + case R_ABS32: + return X86_64_RELOC_UNSIGNED; + case R_PC32: + case R_REL32: + case R_PC64: + case R_REL64: + case R_X64_PC8: + return X86_64_RELOC_SIGNED; + case R_PLT32: + case R_X64_PLT32: + return X86_64_RELOC_BRANCH; + case R_X64_GOTPCRELX: + case R_X64_REX_GOTPCRELX: + return X86_64_RELOC_GOT_LOAD; + case R_X64_GOTPCREL: + return X86_64_RELOC_GOT; + case R_X64_TPOFF32: + return X86_64_RELOC_TLV; + default: + return (u32)-1; + } +} + +u32 macho_x86_64_reloc_pcrel(u32 kind /* RelocKind */) { + switch (kind) { + case R_PC32: + case R_REL32: + case R_PC64: + case R_REL64: + case R_X64_PC8: + case R_PLT32: + case R_X64_PLT32: + case R_X64_GOTPCREL: + case R_X64_GOTPCRELX: + case R_X64_REX_GOTPCRELX: + case R_X64_TPOFF32: + return 1; + default: + return 0; + } +} + +u32 macho_x86_64_reloc_length(u32 kind /* RelocKind */) { + switch (kind) { + case R_ABS64: + case R_PC64: + case R_REL64: + return 3; + case R_X64_PC8: + return 0; + default: + return 2; + } +} + +u32 macho_x86_64_reloc_from(u32 macho_type) { + switch (macho_type) { + case X86_64_RELOC_UNSIGNED: + return R_ABS64; + case X86_64_RELOC_SIGNED: + case X86_64_RELOC_SIGNED_1: + case X86_64_RELOC_SIGNED_2: + case X86_64_RELOC_SIGNED_4: + return R_PC32; + case X86_64_RELOC_BRANCH: + return R_X64_PLT32; + case X86_64_RELOC_GOT_LOAD: + return R_X64_REX_GOTPCRELX; + case X86_64_RELOC_GOT: + return R_X64_GOTPCREL; + case X86_64_RELOC_TLV: + return R_X64_TPOFF32; + default: + return (u32)-1; + } +} diff --git a/src/obj/tbd_read.c b/src/obj/macho/tbd_read.c diff --git a/src/obj/macho_emit.c b/src/obj/macho_emit.c @@ -1,791 +0,0 @@ -/* Mach-O MH_OBJECT writer. Walks a finalized ObjBuilder and emits a - * 64-bit little-endian relocatable object via the supplied Writer. - * - * Layout strategy (MH_OBJECT — everything in one anonymous segment): - * 1. plan Mach-O sections (one per non-symtab/strtab/rela ObjSection), - * mapping cfree section names to (segname, sectname) pairs; - * 2. partition ObjSyms into local / extdef / undef and assign final - * indices for LC_DYSYMTAB; - * 3. build per-section relocation tables via the per-arch translator - * (only aarch64 is wired today); - * 4. assign file offsets sequentially: header, load commands, section - * bytes, relocation tables, symbol table, string table; - * 5. write header → load commands → section bytes → relocs → symtab - * → strtab. - * - * 64-bit little-endian only. Big-endian / 32-bit panics at entry. - * - * Round-trip invariant: read_macho of - * this output must produce an ObjBuilder shape-equivalent to the input, - * modulo (a) Mach-O's mandatory (segname, sectname) pairing and (b) - * any synthesized N_SECT symbols. The (segname,sectname) form chosen - * here is the canonical post-roundtrip shape — read_macho stores the - * comma-joined "__SEG,__sect" form in Section.name so a re-emit - * produces the same bytes. */ - -#include <string.h> - -#include "arch/arch.h" -#include "core/arena.h" -#include "core/buf.h" -#include "core/bytes.h" -#include "core/heap.h" -#include "core/pool.h" -#include "core/slice.h" -#include "core/util.h" -#include "obj/macho.h" - -static SrcLoc no_loc(void) { - SrcLoc l = {0, 0, 0}; - return l; -} - -/* ---- LE writer helpers (Writer-based) ---- */ - -static void wr_u32(Writer* w, u32 v) { - u8 b[4]; - wr_u32_le(b, v); - cfree_writer_write(w, b, 4); -} - -static void wr_u64(Writer* w, u64 v) { - u8 b[8]; - wr_u64_le(b, v); - cfree_writer_write(w, b, 8); -} - -static void wr_name16(Writer* w, const char* s, u32 len) { - /* Mach-O section/segment names are 16-byte zero-padded fields. Names - * longer than 16 are truncated; the on-disk format leaves no room for - * a longer encoding. */ - u8 buf[16]; - u32 n = len > 16 ? 16 : len; - memcpy(buf, s, n); - if (n < 16) memset(buf + n, 0, 16 - n); - cfree_writer_write(w, buf, 16); -} - -/* ---- (segname,sectname) derivation ---- */ - -/* Split a cfree section name into Mach-O (segname, sectname) pair. - * If `name` contains a comma, it is treated as already in - * "__SEG,__sect" form and split at the first comma. Otherwise we - * derive the pair from SecKind, ignoring `name` (the input was an - * ELF-shaped name like ".text" or ".rodata"). */ -typedef struct MSegSect { - char segname[16]; - char sectname[16]; - u32 seg_len; - u32 sect_len; -} MSegSect; - -static void copy_fixed16(char* dst, u32* len_out, const char* src, u32 src_len) { - u32 n = src_len > 16 ? 16 : src_len; - memcpy(dst, src, n); - if (n < 16) memset(dst + n, 0, 16 - n); - *len_out = n; -} - -static void name_to_seg_sect(const char* name, u32 nlen, u16 sec_kind, - MSegSect* out) { - /* Comma-separated form: take prefix as segname, suffix as sectname. */ - for (u32 i = 0; i < nlen; ++i) { - if (name[i] == ',') { - copy_fixed16(out->segname, &out->seg_len, name, i); - copy_fixed16(out->sectname, &out->sect_len, name + i + 1, - nlen - i - 1); - return; - } - } - - /* Not comma-separated. Derive from SecKind; ignore `name`. */ - const char* seg; - const char* sect; - switch (sec_kind) { - case SEC_TEXT: - seg = "__TEXT"; - sect = "__text"; - break; - case SEC_RODATA: - seg = "__TEXT"; - sect = "__const"; - break; - case SEC_DATA: - seg = "__DATA"; - sect = "__data"; - break; - case SEC_BSS: - seg = "__DATA"; - sect = "__bss"; - break; - case SEC_DEBUG: - seg = "__DWARF"; - /* Strip a leading `.` from the input name (".debug_info" → - * "__debug_info") so the dwarf section names round-trip. */ - sect = (nlen && name[0] == '.') ? name + 1 : name; - copy_fixed16(out->segname, &out->seg_len, seg, - (u32)slice_from_cstr(seg).len); - copy_fixed16(out->sectname, &out->sect_len, sect, - (u32)((nlen && name[0] == '.') ? nlen - 1 : nlen)); - return; - default: - seg = "__DATA"; - sect = "__data"; - break; - } - copy_fixed16(out->segname, &out->seg_len, seg, (u32)slice_from_cstr(seg).len); - copy_fixed16(out->sectname, &out->sect_len, sect, - (u32)slice_from_cstr(sect).len); -} - -/* ---- per-section plan ---- */ - -typedef struct MSec { - MSegSect ns; - u64 addr; /* assigned vmaddr within the segment */ - u64 size; /* bytes (or bss size) */ - u32 fileoff; /* 0 for zerofill */ - u32 align; /* power-of-two; stored as log2 in section_64.align */ - u32 reloff; /* 0 if no relocs */ - u32 nreloc; - u32 flags; /* S_TYPE | S_ATTR_* */ - u32 entsize; - u32 obj_sec; /* originating ObjSecId */ - int is_zerofill; - const Buf* obj_bytes; /* NULL when zerofill */ - u8* relocs; /* arena-allocated; nreloc * 8 bytes */ -} MSec; - -static u32 log2_align(u32 a) { - u32 r = 0; - while ((1u << r) < a) ++r; - return r; -} - -static u32 section_flags_for(u16 sec_kind, u16 sec_flags, const char* sectname, - u32 sect_len) { - u32 f = 0; - if (sec_kind == SEC_TEXT || (sec_flags & SF_EXEC)) { - f |= S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS; - } - if (sec_flags & SF_TLS) { - /* Mach-O distinguishes three TLV section types by sectname: - * __thread_data → S_THREAD_LOCAL_REGULAR (initial data) - * __thread_bss → S_THREAD_LOCAL_ZEROFILL (zero-init data) - * __thread_vars → S_THREAD_LOCAL_VARIABLES (descriptor records) - * dyld dispatches its TLV-bootstrap pass off the S_TYPE; the - * S_ATTR_* bits don't carry TLV semantics so we just emit the type. */ - if (sect_len >= 13 && memcmp(sectname, "__thread_vars", 13) == 0) - return S_THREAD_LOCAL_VARIABLES; - if (sec_kind == SEC_BSS) return S_THREAD_LOCAL_ZEROFILL; - return S_THREAD_LOCAL_REGULAR; - } - if (sec_kind == SEC_BSS || (sect_len >= 5 && memcmp(sectname, "__bss", 5) == 0)) { - f |= S_ZEROFILL; - } - if (sec_flags & SF_STRINGS) { - f = (f & ~SECTION_TYPE) | S_CSTRING_LITERALS; - } - /* Default S_REGULAR (0) for all others. */ - return f; -} - -/* ---- symbol partition ---- */ - -typedef struct MSym { - ObjSymId obj_id; - u32 strx; /* offset in string table */ - u8 n_type; - u8 n_sect; - u16 n_desc; - u64 n_value; -} MSym; - -static int sym_is_undef(const ObjSym* s) { - return s->section_id == OBJ_SEC_NONE && s->kind != SK_ABS && - s->kind != SK_COMMON; -} - -static int sym_is_extdef(const ObjSym* s) { - if (sym_is_undef(s)) return 0; - return s->bind == SB_GLOBAL || s->bind == SB_WEAK; -} - -/* ---- string table ---- - * - * Mach-O strtab: leading zero byte at offset 0 represents the empty - * string. Entries are NUL-terminated; we don't dedupe (small symbol - * counts in v1; matches the simplest llvm output). The "_" prefix on - * C symbols is added inline in the writer below. */ - -void emit_macho(Compiler* c, ObjBuilder* ob, Writer* w) { - Heap* h = (Heap*)c->ctx->heap; - - /* Tombstone sweep first — strip/objcopy mutations and the historical - * UNDEF prune are both expressed via Section.removed / ObjSym.removed - * post-sweep. See obj_sweep_dead. */ - obj_sweep_dead(ob); - - /* ---- target validation ---------------------------------------- */ - const ArchImpl* arch = arch_for_compiler(c); - const ArchMachoOps* macho = arch ? arch->macho : NULL; - u32 cputype, cpusubtype; - u32 (*reloc_to)(u32); - u32 (*reloc_pcrel)(u32); - u32 (*reloc_length)(u32); - if (!macho || !macho->reloc_to || !macho->reloc_pcrel || - !macho->reloc_length) { - compiler_panic(c, no_loc(), "emit_macho: unsupported target arch %u", - (u32)c->target.arch); - } - cputype = macho->cputype; - cpusubtype = macho->cpusubtype; - reloc_to = macho->reloc_to; - reloc_pcrel = macho->reloc_pcrel; - reloc_length = macho->reloc_length; - if (c->target.big_endian) { - compiler_panic(c, no_loc(), "emit_macho: big-endian not supported"); - } - if (c->target.ptr_size != 8) { - compiler_panic(c, no_loc(), "emit_macho: ptr_size %u (expected 8)", - (u32)c->target.ptr_size); - } - - /* ---- pass 1: plan Mach-O sections ----------------------------- */ - u32 nobjsec = obj_section_count(ob); - MSec* secs = arena_zarray(c->scratch, MSec, nobjsec ? nobjsec : 1); - u32* obj_to_msec = arena_zarray(c->scratch, u32, nobjsec ? nobjsec : 1); - u32 nsecs = 0; - - for (u32 i = 1; i < nobjsec; ++i) { - const Section* s = obj_section_get(ob, i); - if (s->removed) continue; /* see obj_sweep_dead */ - /* Skip ELF-style synthetic sections that read_elf would have - * filtered: SYMTAB / STRTAB / RELA / GROUP have no Mach-O - * representation as data sections. */ - if (s->sem == SSEM_SYMTAB || s->sem == SSEM_STRTAB || - s->sem == SSEM_RELA || s->sem == SSEM_REL || - s->sem == SSEM_GROUP) { - continue; - } - Slice nm_s = pool_slice(c->global, s->name); - const char* nm = nm_s.s; - size_t nlen = nm_s.len; - MSec* m = &secs[nsecs]; - name_to_seg_sect(nm ? nm : "", (u32)nlen, s->kind, &m->ns); - m->obj_sec = i; - m->align = s->align ? s->align : 1; - m->entsize = s->entsize; - /* Mach-O reader stashes the raw section.flags (S_TYPE | S_ATTR_*) - * in Section.ext_type when reading a Mach-O input. Use it - * verbatim so attribute bits like S_ATTR_NO_DEAD_STRIP / - * S_ATTR_LIVE_SUPPORT round-trip. Fall back to the kind-derived - * default for sections originating from non-Mach-O readers (e.g. - * cfree codegen). */ - if (s->ext_kind == OBJ_EXT_MACHO && s->ext_type) { - m->flags = s->ext_type; - } else { - m->flags = section_flags_for(s->kind, s->flags, m->ns.sectname, - m->ns.sect_len); - } - if (s->sem == SSEM_NOBITS || s->kind == SEC_BSS) { - m->is_zerofill = 1; - m->size = s->bss_size; - m->obj_bytes = NULL; - /* Preserve S_THREAD_LOCAL_ZEROFILL when SF_TLS routed us there; - * a regular BSS section gets the plain S_ZEROFILL type. */ - u32 stype = m->flags & SECTION_TYPE; - if (stype != S_THREAD_LOCAL_ZEROFILL) - m->flags = (m->flags & ~SECTION_TYPE) | S_ZEROFILL; - } else { - m->is_zerofill = 0; - m->size = s->bytes.total; - m->obj_bytes = &s->bytes; - } - obj_to_msec[i] = nsecs + 1; /* 1-based: matches Mach-O n_sect. */ - nsecs++; - } - - /* ---- pass 2: assign vmaddrs (segment-relative) and per-section - * flat-layout addresses. MH_OBJECT keeps everything in - * one segment with vmaddr=0; section addr fields are - * relative offsets within the segment. - * - * Two-pass to match the conventional Mach-O `MH_OBJECT` layout: - * non-zerofill sections come first in vmaddr order, then zerofill - * sections at the tail. Apple `as` and clang `-c` both lay out - * this way, and roundtripping must reproduce it so symbol n_values - * (which are segment-relative addresses) compare equal. */ - u64 cur_addr = 0; - for (u32 i = 0; i < nsecs; ++i) { - MSec* m = &secs[i]; - if (m->is_zerofill) continue; - cur_addr = ALIGN_UP(cur_addr, (u64)m->align); - m->addr = cur_addr; - cur_addr += m->size; - } - for (u32 i = 0; i < nsecs; ++i) { - MSec* m = &secs[i]; - if (!m->is_zerofill) continue; - cur_addr = ALIGN_UP(cur_addr, (u64)m->align); - m->addr = cur_addr; - cur_addr += m->size; - } - u64 segment_vmsize = cur_addr; - - /* ---- pass 3: partition symbols (locals, extdefs, undefs) ------ */ - u32 nobjsym = 0; - { - ObjSymIter* it = obj_symiter_new(ob); - ObjSymEntry e; - while (obj_symiter_next(it, &e)) ++nobjsym; - obj_symiter_free(it); - } - - MSym* msyms = arena_zarray(c->scratch, MSym, nobjsym + 1); - u32 nmsyms = 0; - u32* sym_obj_to_macho = - arena_zarray(c->scratch, u32, nobjsym + 2); /* obj_id -> mach idx */ - - Buf strtab; - buf_init(&strtab, h); - /* Mach-O strtab convention: the first byte is " " (space) or NUL — - * llvm/Apple emit a single NUL. We start with NUL for offset 0. */ - { - u8 z = 0; - buf_write(&strtab, &z, 1); - } - - /* Emit in three passes so n_type/sect ordering matches LC_DYSYMTAB - * (locals, then extdefs, then undefs). */ - for (int pass = 0; pass < 3; ++pass) { - ObjSymIter* it = obj_symiter_new(ob); - ObjSymEntry e; - while (obj_symiter_next(it, &e)) { - const ObjSym* s = e.sym; - if (s->removed) continue; /* spurious-UNDEF prune + explicit removal */ - int undef = sym_is_undef(s); - int extdef = sym_is_extdef(s); - int local = !undef && !extdef; - int want = (pass == 0 && local) || (pass == 1 && extdef) || - (pass == 2 && undef); - if (!want) continue; - MSym* ms = &msyms[nmsyms]; - ms->obj_id = e.id; - - Slice nm_s = pool_slice(c->global, s->name); - const char* nm = nm_s.s; - size_t nlen = nm_s.len; - /* Mach-O symbol names are stored on disk verbatim — including - * the leading `_` Apple toolchains use for C-source-level - * symbols ("_main" for `int main()`). cfree treats the prefix - * as part of the on-disk name, not a transform applied at emit. - * Name-canonicalization for API callers (cfree_jit_lookup, - * link_set_entry) lives one layer up at the linker boundary - * (link.c), so emit/read stay byte-for-byte stable. */ - if (nlen && nm) { - u32 off = buf_pos(&strtab); - buf_write(&strtab, nm, nlen); - u8 z = 0; - buf_write(&strtab, &z, 1); - ms->strx = off; - } else { - ms->strx = 0; - } - - u8 type = 0; - if (extdef) type |= N_EXT; - if (s->vis == SV_HIDDEN || s->vis == SV_INTERNAL) { - /* Mach-O encodes hidden externals as N_PEXT|N_EXT. */ - type |= N_PEXT; - } - u8 n_sect = NO_SECT; - u16 n_desc = 0; - u64 value = s->value; - - if (undef) { - type |= N_UNDF; - /* Undefined symbols with non-LOCAL bind are external references - * (the common case — every `extern int x;`). Setting N_EXT - * matches what clang emits and what Apple `ld` expects. */ - if (s->bind == SB_GLOBAL || s->bind == SB_WEAK) type |= N_EXT; - if (s->bind == SB_WEAK) n_desc |= N_WEAK_REF; - value = 0; - } else if (s->kind == SK_ABS) { - type |= N_ABS; - } else if (s->kind == SK_COMMON) { - /* Mach-O common symbols are N_UNDF|N_EXT with n_value=size and - * n_desc carrying log2(align) in the GET_COMM_ALIGN bits. */ - type = N_UNDF | N_EXT; - value = s->size; - u32 a = s->common_align ? (u32)s->common_align : 1; - n_desc = (u16)(log2_align(a) << 8); /* GET_COMM_ALIGN field */ - } else { - type |= N_SECT; - u32 ms_idx = (s->section_id < nobjsec) ? obj_to_msec[s->section_id] : 0; - n_sect = (u8)ms_idx; - if (n_sect && n_sect <= nsecs) { - value = secs[n_sect - 1].addr + s->value; - } - if (s->bind == SB_WEAK) n_desc |= N_WEAK_DEF; - } - - /* OR in any pass-through n_desc bits the reader stashed in - * sym->flags (N_NO_DEAD_STRIP, etc.). The bits we already - * compute (N_WEAK_DEF / N_WEAK_REF and the common-alignment - * field) are already excluded by read_macho before stashing, - * so a plain OR can't double-count. */ - n_desc |= s->flags; - - ms->n_type = type; - ms->n_sect = n_sect; - ms->n_desc = n_desc; - ms->n_value = value; - - sym_obj_to_macho[e.id] = nmsyms + 1; /* 1-based index, 0 = none. */ - nmsyms++; - } - obj_symiter_free(it); - } - - u32 nlocals = 0, nextdefs = 0, nundefs = 0; - for (u32 i = 0; i < nmsyms; ++i) { - u8 t = msyms[i].n_type; - u8 ext = (t & N_EXT) != 0; - u8 typ = (u8)(t & N_TYPE); - if (typ == N_UNDF && ext) { - /* Could be undef or common — common has nonzero n_value. */ - if (msyms[i].n_value != 0) - ++nextdefs; /* common is conventionally extdef-shaped */ - else - ++nundefs; - } else if (ext) { - ++nextdefs; - } else { - ++nlocals; - } - } - /* Re-derive without the common fudge by counting partition pass: we - * already wrote them in (locals,extdefs,undefs) order, so the prefix - * counts are just the per-pass counts. Mirror the spurious-UNDEF - * prune from the emit loop above so the LC_DYSYMTAB index counts - * line up with the symbols we actually wrote. */ - nlocals = 0; - nextdefs = 0; - nundefs = 0; - { - ObjSymIter* it = obj_symiter_new(ob); - ObjSymEntry e; - while (obj_symiter_next(it, &e)) { - const ObjSym* s = e.sym; - if (s->removed) continue; - int undef = sym_is_undef(s); - if (undef) - ++nundefs; - else if (sym_is_extdef(s)) - ++nextdefs; - else - ++nlocals; - } - obj_symiter_free(it); - } - - /* ---- pass 4: build per-section relocation tables -------------- */ - u32 total_relocs = obj_reloc_total(ob); - for (u32 i = 0; i < nsecs; ++i) { - MSec* m = &secs[i]; - u32 nr = obj_reloc_count(ob, m->obj_sec); - if (!nr) continue; - /* Worst case: each reloc may be preceded by an ARM64_RELOC_ADDEND - * pair entry. We size the buffer for that upper bound. */ - u8* buf = (u8*)arena_alloc(c->scratch, (size_t)MACHO_RELOC_SIZE * nr * 2, - _Alignof(u32)); - u32 j = 0; - for (u32 ri = 0; ri < total_relocs; ++ri) { - const Reloc* r = obj_reloc_at(ob, ri); - if (r->removed) continue; - if (r->section_id != m->obj_sec) continue; - if ((r->kind == R_RV_ADD8 || r->kind == R_RV_ADD16 || - r->kind == R_RV_ADD32 || r->kind == R_RV_ADD64) && - ri + 1u < total_relocs) { - const Reloc* sub = obj_reloc_at(ob, ri + 1u); - int paired = - sub && sub->section_id == r->section_id && - sub->offset == r->offset && - ((r->kind == R_RV_ADD8 && sub->kind == R_RV_SUB8) || - (r->kind == R_RV_ADD16 && sub->kind == R_RV_SUB16) || - (r->kind == R_RV_ADD32 && sub->kind == R_RV_SUB32) || - (r->kind == R_RV_ADD64 && sub->kind == R_RV_SUB64)); - if (paired) { - u32 length = (r->kind == R_RV_ADD64) ? 3u - : (r->kind == R_RV_ADD32) ? 2u - : (r->kind == R_RV_ADD16) ? 1u - : 0u; - u32 add_idx; - u32 sub_idx; - u32 sub_type = c->target.arch == CFREE_ARCH_ARM_64 - ? ARM64_RELOC_SUBTRACTOR - : X86_64_RELOC_SUBTRACTOR; - u32 unsigned_type = c->target.arch == CFREE_ARCH_ARM_64 - ? ARM64_RELOC_UNSIGNED - : X86_64_RELOC_UNSIGNED; - if (r->sym == OBJ_SYM_NONE || sub->sym == OBJ_SYM_NONE) { - compiler_panic(c, no_loc(), - "emit_macho: symdiff reloc without symbol"); - } - add_idx = sym_obj_to_macho[r->sym]; - sub_idx = sym_obj_to_macho[sub->sym]; - if (add_idx == 0 || sub_idx == 0) { - compiler_panic(c, no_loc(), - "emit_macho: symdiff reloc target not in symtab"); - } - { - u8* slot = buf + (size_t)j * MACHO_RELOC_SIZE; - wr_u32_le(slot + 0, (u32)r->offset); - wr_u32_le(slot + 4, ((sub_idx - 1u) & 0x00ffffffu) | - (length << 25) | (1u << 27) | - ((sub_type & 0xfu) << 28)); - ++j; - } - { - u8* slot = buf + (size_t)j * MACHO_RELOC_SIZE; - wr_u32_le(slot + 0, (u32)r->offset); - wr_u32_le(slot + 4, ((add_idx - 1u) & 0x00ffffffu) | - (length << 25) | (1u << 27) | - ((unsigned_type & 0xfu) << 28)); - ++j; - } - ++ri; - continue; - } - } - u32 mtype = reloc_to(r->kind); - if (mtype == (u32)-1) { - compiler_panic(c, no_loc(), - "emit_macho: unsupported reloc kind %u for arch %u", - (u32)r->kind, (u32)c->target.arch); - } - u32 pcrel = reloc_pcrel(r->kind); - u32 length = reloc_length(r->kind); - - /* Resolve target — extern always 1 in our model (every Reloc has - * an ObjSymId). Skip relocs without a symbol — they would map to - * a section-relative reloc which the v1 cgtarget never emits. */ - if (r->sym == OBJ_SYM_NONE) { - compiler_panic(c, no_loc(), - "emit_macho: reloc without symbol not supported " - "(sec=%u offset=%u kind=%u)", - (u32)r->section_id, (u32)r->offset, (u32)r->kind); - } - u32 mach_sym_idx = sym_obj_to_macho[r->sym]; - if (mach_sym_idx == 0) { - compiler_panic(c, no_loc(), - "emit_macho: reloc target sym %u not in symtab", - (u32)r->sym); - } - u32 r_symbolnum = mach_sym_idx - 1; /* Mach-O uses 0-based. */ - - /* Non-zero addend: emit a leading ARM64_RELOC_ADDEND pair (only - * meaningful for non-UNSIGNED types — UNSIGNED carries the addend - * inline in the patched bytes). */ - if (r->addend != 0 && mtype != ARM64_RELOC_UNSIGNED) { - u8* slot = buf + (size_t)j * MACHO_RELOC_SIZE; - wr_u32_le(slot + 0, (u32)r->offset); - u32 packed = ((u32)(i64)r->addend & 0x00ffffffu) | (0u << 24) | - (length << 25) | (1u << 27) /*extern*/ | - (ARM64_RELOC_ADDEND << 28); - wr_u32_le(slot + 4, packed); - ++j; - } - - u8* slot = buf + (size_t)j * MACHO_RELOC_SIZE; - wr_u32_le(slot + 0, (u32)r->offset); - u32 packed = (r_symbolnum & 0x00ffffffu) | - ((pcrel & 1u) << 24) | - ((length & 3u) << 25) | - (1u << 27) /*extern*/ | - ((mtype & 0xfu) << 28); - wr_u32_le(slot + 4, packed); - ++j; - } - m->relocs = buf; - m->nreloc = j; - } - - /* ---- pass 5: assign file offsets ------------------------------ */ - /* Layout after the load-command block: - * section bytes (in order, respecting align) - * relocation tables (per section, 4-aligned) - * symbol table (8-aligned) - * string table */ - u32 nload_cmds = 4; /* LC_SEGMENT_64 + LC_BUILD_VERSION + LC_SYMTAB + LC_DYSYMTAB */ - u32 segcmd_size = MACHO_SEGCMD64_SIZE + nsecs * MACHO_SECT64_SIZE; - u32 build_version_size = 24; /* fixed: cmd+cmdsize+platform+minos+sdk+ntools(0) */ - u32 sizeofcmds = segcmd_size + build_version_size + MACHO_SYMTAB_CMD_SIZE + - MACHO_DYSYMTAB_CMD_SIZE; - - u64 cur = MACHO_HDR64_SIZE + sizeofcmds; - u32 fileoff_first = (u32)cur; - for (u32 i = 0; i < nsecs; ++i) { - MSec* m = &secs[i]; - if (m->is_zerofill) { - m->fileoff = 0; - continue; - } - cur = ALIGN_UP(cur, (u64)m->align); - m->fileoff = (u32)cur; - cur += m->size; - } - - /* Reloc tables. */ - for (u32 i = 0; i < nsecs; ++i) { - MSec* m = &secs[i]; - if (!m->nreloc) { - m->reloff = 0; - continue; - } - cur = ALIGN_UP(cur, (u64)4); - m->reloff = (u32)cur; - cur += (u64)m->nreloc * MACHO_RELOC_SIZE; - } - - cur = ALIGN_UP(cur, (u64)8); - u64 symoff = cur; - cur += (u64)nmsyms * MACHO_NLIST64_SIZE; - u64 stroff = cur; - u32 strtab_size = buf_pos(&strtab); - cur += strtab_size; - - /* ---- pass 6: write the file ------------------------------------ */ - cfree_writer_seek(w, 0); - - /* mach_header_64 */ - wr_u32(w, MH_MAGIC_64); - wr_u32(w, cputype); - wr_u32(w, cpusubtype); - wr_u32(w, MH_OBJECT); - wr_u32(w, nload_cmds); - wr_u32(w, sizeofcmds); - wr_u32(w, 0); /* flags — MH_OBJECT carries none in v1 */ - wr_u32(w, 0); /* reserved */ - - /* LC_SEGMENT_64 (anonymous, contains everything) */ - wr_u32(w, LC_SEGMENT_64); - wr_u32(w, segcmd_size); - wr_name16(w, "", 0); /* segname: empty for MH_OBJECT */ - wr_u64(w, 0); /* vmaddr */ - wr_u64(w, segment_vmsize); /* vmsize */ - wr_u64(w, fileoff_first); /* fileoff */ - /* filesize = bytes covered by non-zerofill sections (post-section - * file offset minus the start). */ - u64 filesize = 0; - for (u32 i = 0; i < nsecs; ++i) { - MSec* m = &secs[i]; - if (m->is_zerofill) continue; - u64 end = (u64)m->fileoff + m->size; - u64 begin = m->fileoff; - if (end > filesize + fileoff_first) filesize = end - fileoff_first; - (void)begin; - } - wr_u64(w, filesize); - /* maxprot/initprot — VM_PROT_READ|WRITE|EXECUTE = 7 for object segs. */ - wr_u32(w, 7); - wr_u32(w, 7); - wr_u32(w, nsecs); - wr_u32(w, 0); /* flags */ - - /* sections inline within the segment command */ - for (u32 i = 0; i < nsecs; ++i) { - MSec* m = &secs[i]; - wr_name16(w, m->ns.sectname, m->ns.sect_len); - wr_name16(w, m->ns.segname, m->ns.seg_len); - wr_u64(w, m->addr); - wr_u64(w, m->size); - wr_u32(w, m->fileoff); - wr_u32(w, log2_align(m->align)); - wr_u32(w, m->reloff); - wr_u32(w, m->nreloc); - wr_u32(w, m->flags); - wr_u32(w, 0); /* reserved1 */ - wr_u32(w, m->entsize); /* reserved2 */ - wr_u32(w, 0); /* reserved3 */ - } - - /* LC_BUILD_VERSION — platform=PLATFORM_MACOS(1), minos/sdk=14.0.0, - * ntools=0. The exact min-version isn't load-bearing for MH_OBJECT, - * but Apple's `ld` warns when it's missing. */ - wr_u32(w, LC_BUILD_VERSION); - wr_u32(w, build_version_size); - wr_u32(w, 1); /* platform: PLATFORM_MACOS */ - wr_u32(w, (14u << 16) | 0); /* minos: 14.0.0 */ - wr_u32(w, (14u << 16) | 0); /* sdk: 14.0.0 */ - wr_u32(w, 0); /* ntools */ - - /* LC_SYMTAB */ - wr_u32(w, LC_SYMTAB); - wr_u32(w, MACHO_SYMTAB_CMD_SIZE); - wr_u32(w, (u32)symoff); - wr_u32(w, nmsyms); - wr_u32(w, (u32)stroff); - wr_u32(w, strtab_size); - - /* LC_DYSYMTAB */ - wr_u32(w, LC_DYSYMTAB); - wr_u32(w, MACHO_DYSYMTAB_CMD_SIZE); - wr_u32(w, 0); /* ilocalsym */ - wr_u32(w, nlocals); - wr_u32(w, nlocals); - wr_u32(w, nextdefs); - wr_u32(w, nlocals + nextdefs); - wr_u32(w, nundefs); - wr_u32(w, 0); wr_u32(w, 0); /* tocoff, ntoc */ - wr_u32(w, 0); wr_u32(w, 0); /* modtaboff, nmodtab */ - wr_u32(w, 0); wr_u32(w, 0); /* extrefsymoff, nextrefsyms */ - wr_u32(w, 0); wr_u32(w, 0); /* indirectsymoff, nindirectsyms */ - wr_u32(w, 0); wr_u32(w, 0); /* extreloff, nextrel */ - wr_u32(w, 0); wr_u32(w, 0); /* locreloff, nlocrel */ - - /* section bytes */ - for (u32 i = 0; i < nsecs; ++i) { - MSec* m = &secs[i]; - if (m->is_zerofill || !m->size) continue; - cfree_writer_seek(w, m->fileoff); - if (m->obj_bytes) { - u32 sz = m->obj_bytes->total; - u8* tmp = (u8*)h->alloc(h, sz ? sz : 1, 1); - if (sz) buf_flatten(m->obj_bytes, tmp); - cfree_writer_write(w, tmp, sz); - h->free(h, tmp, sz ? sz : 1); - } - } - - /* reloc tables */ - for (u32 i = 0; i < nsecs; ++i) { - MSec* m = &secs[i]; - if (!m->nreloc) continue; - cfree_writer_seek(w, m->reloff); - cfree_writer_write(w, m->relocs, (size_t)m->nreloc * MACHO_RELOC_SIZE); - } - - /* symtab */ - cfree_writer_seek(w, symoff); - for (u32 i = 0; i < nmsyms; ++i) { - const MSym* ms = &msyms[i]; - u8 entry[MACHO_NLIST64_SIZE]; - wr_u32_le(entry + 0, ms->strx); - entry[4] = ms->n_type; - entry[5] = ms->n_sect; - wr_u16_le(entry + 6, ms->n_desc); - wr_u64_le(entry + 8, ms->n_value); - cfree_writer_write(w, entry, MACHO_NLIST64_SIZE); - } - - /* strtab */ - { - u8* flat = (u8*)arena_alloc(c->scratch, strtab_size ? strtab_size : 1, 1); - if (strtab_size) buf_flatten(&strtab, flat); - cfree_writer_seek(w, stroff); - cfree_writer_write(w, flat, strtab_size); - } - buf_fini(&strtab); -} diff --git a/src/obj/macho_read.c b/src/obj/macho_read.c @@ -1,635 +0,0 @@ -/* Mach-O MH_OBJECT reader. Parses a 64-bit little-endian relocatable - * object back into a fresh ObjBuilder. The post-finalize ObjBuilder - * shape is the canonical superset of the writer's input: - * read_macho of an emit_macho output produces an ObjBuilder - * shape-equivalent to the writer's input, modulo the synthesized - * "__SEG,__sect"-form section names. - * - * Scope: AArch64 little-endian, MH_OBJECT only (MH_DYLIB is a stub — - * the linker has no consumer for it yet). Other archs / endianness - * produce a compiler_panic with a diagnostic. */ - -#include <string.h> - -#include "arch/arch.h" -#include "core/arena.h" -#include "core/bytes.h" -#include "core/heap.h" -#include "core/pool.h" -#include "core/slice.h" -#include "core/util.h" -#include "obj/macho.h" - -static SrcLoc no_loc(void) { - SrcLoc l = {0, 0, 0}; - return l; -} - -/* ---- mach-section scratch struct ---- */ - -typedef struct MSecRec { - char segname[16]; - char sectname[16]; - u32 seg_len; - u32 sect_len; - u64 addr; - u64 size; - u32 fileoff; - u32 align_log2; - u32 reloff; - u32 nreloc; - u32 flags; - u32 reserved2; - ObjSecId obj_sec; /* assigned in pass 1 */ -} MSecRec; - -static u32 fixed16_len(const char* s) { - u32 n = 0; - while (n < 16 && s[n] != 0) ++n; - return n; -} - -static u16 sec_kind_from_seg_sect(const char* segname, u32 seg_len, - const char* sectname, u32 sect_len, - u32 flags) { - u32 stype = flags & SECTION_TYPE; - if (stype == S_ZEROFILL || stype == S_THREAD_LOCAL_ZEROFILL) return SEC_BSS; - if (flags & S_ATTR_PURE_INSTRUCTIONS) return SEC_TEXT; - - if (seg_len == 7 && memcmp(segname, "__DWARF", 7) == 0) return SEC_DEBUG; - if (seg_len == 6 && memcmp(segname, "__TEXT", 6) == 0) { - if (sect_len == 6 && memcmp(sectname, "__text", 6) == 0) return SEC_TEXT; - return SEC_RODATA; /* __const, __cstring, ... */ - } - if (seg_len == 6 && memcmp(segname, "__DATA", 6) == 0) { - if (sect_len == 5 && memcmp(sectname, "__bss", 5) == 0) return SEC_BSS; - return SEC_DATA; - } - return SEC_OTHER; -} - -static u16 sec_flags_from(u32 mflags, u16 sec_kind) { - u16 f = 0; - if (sec_kind == SEC_TEXT || (mflags & S_ATTR_PURE_INSTRUCTIONS)) { - f |= SF_ALLOC | SF_EXEC; - } else if (sec_kind == SEC_RODATA) { - f |= SF_ALLOC; - } else if (sec_kind == SEC_DATA || sec_kind == SEC_BSS) { - f |= SF_ALLOC | SF_WRITE; - } - u32 stype = mflags & SECTION_TYPE; - if (stype == S_THREAD_LOCAL_REGULAR || stype == S_THREAD_LOCAL_ZEROFILL || - stype == S_THREAD_LOCAL_VARIABLES) { - f |= SF_TLS; - } - if (stype == S_CSTRING_LITERALS) { - f |= SF_MERGE | SF_STRINGS; - } - return f; -} - -static u16 sec_sem_from(u32 mflags, u16 sec_kind) { - u32 stype = mflags & SECTION_TYPE; - if (stype == S_ZEROFILL || stype == S_THREAD_LOCAL_ZEROFILL || - sec_kind == SEC_BSS) { - return SSEM_NOBITS; - } - if (stype == S_MOD_INIT_FUNC_POINTERS) return SSEM_INIT_ARRAY; - if (stype == S_MOD_TERM_FUNC_POINTERS) return SSEM_FINI_ARRAY; - return SSEM_PROGBITS; -} - -ObjBuilder* read_macho(Compiler* c, const char* name, const u8* data, - size_t len) { - (void)name; - if (len < MACHO_HDR64_SIZE) - compiler_panic(c, no_loc(), "read_macho: input shorter than header"); - - u32 magic = rd_u32_le(data + 0); - if (magic != MH_MAGIC_64) - compiler_panic(c, no_loc(), "read_macho: bad magic 0x%x", magic); - - u32 cputype = rd_u32_le(data + 4); - const ArchImpl* arch = arch_lookup_macho_cputype(cputype); - const ArchMachoOps* macho = arch ? arch->macho : NULL; - u32 filetype = rd_u32_le(data + 12); - u32 ncmds = rd_u32_le(data + 16); - u32 sizeofcmds = rd_u32_le(data + 20); - - if (!macho || !macho->reloc_from) - compiler_panic(c, no_loc(), "read_macho: unsupported cputype 0x%x", - cputype); - if (filetype != MH_OBJECT) - compiler_panic(c, no_loc(), - "read_macho: only MH_OBJECT supported, got filetype %u", - filetype); - - if ((u64)MACHO_HDR64_SIZE + sizeofcmds > len) - compiler_panic(c, no_loc(), "read_macho: load commands exceed file"); - - /* ---- pass 1: walk load commands, collect sections, symtab cmd. */ - MSecRec* msecs = NULL; - u32 nmsecs = 0; - u32 symoff = 0, nsyms = 0, stroff = 0, strsize = 0; - - u64 pos = MACHO_HDR64_SIZE; - u64 end = pos + sizeofcmds; - for (u32 ci = 0; ci < ncmds && pos + 8 <= end; ++ci) { - u32 cmd = rd_u32_le(data + pos); - u32 cmdsize = rd_u32_le(data + pos + 4); - if (cmdsize < 8 || pos + cmdsize > end) - compiler_panic(c, no_loc(), "read_macho: malformed load command"); - - if (cmd == LC_SEGMENT_64) { - u32 nsects = rd_u32_le(data + pos + 64); - if (MACHO_SEGCMD64_SIZE + (u64)nsects * MACHO_SECT64_SIZE > cmdsize) - compiler_panic(c, no_loc(), "read_macho: segment cmd truncated"); - MSecRec* extra = arena_array(c->scratch, MSecRec, nmsecs + nsects); - if (msecs && nmsecs) - memcpy(extra, msecs, sizeof(MSecRec) * nmsecs); - msecs = extra; - const u8* sp = data + pos + MACHO_SEGCMD64_SIZE; - for (u32 si = 0; si < nsects; ++si, sp += MACHO_SECT64_SIZE) { - MSecRec* m = &msecs[nmsecs++]; - memset(m, 0, sizeof *m); - memcpy(m->sectname, sp + 0, 16); - memcpy(m->segname, sp + 16, 16); - m->seg_len = fixed16_len(m->segname); - m->sect_len = fixed16_len(m->sectname); - m->addr = rd_u64_le(sp + 32); - m->size = rd_u64_le(sp + 40); - m->fileoff = rd_u32_le(sp + 48); - m->align_log2 = rd_u32_le(sp + 52); - m->reloff = rd_u32_le(sp + 56); - m->nreloc = rd_u32_le(sp + 60); - m->flags = rd_u32_le(sp + 64); - m->reserved2 = rd_u32_le(sp + 72); - } - } else if (cmd == LC_SYMTAB) { - symoff = rd_u32_le(data + pos + 8); - nsyms = rd_u32_le(data + pos + 12); - stroff = rd_u32_le(data + pos + 16); - strsize = rd_u32_le(data + pos + 20); - } - pos += cmdsize; - } - - if (stroff + (u64)strsize > len) - compiler_panic(c, no_loc(), "read_macho: string table out of range"); - if (symoff + (u64)nsyms * MACHO_NLIST64_SIZE > len) - compiler_panic(c, no_loc(), "read_macho: symbol table out of range"); - const u8* strtab = data + stroff; - - ObjBuilder* ob = obj_new(c); - if (!ob) compiler_panic(c, no_loc(), "read_macho: obj_new failed"); - - /* ---- pass 2: create ObjSecs and copy bytes. */ - for (u32 i = 0; i < nmsecs; ++i) { - MSecRec* m = &msecs[i]; - /* Build "__SEG,__sect"-form name; matches what emit_macho would - * round-trip back out. */ - char nmbuf[34]; - u32 nlen = 0; - memcpy(nmbuf + nlen, m->segname, m->seg_len); - nlen += m->seg_len; - nmbuf[nlen++] = ','; - memcpy(nmbuf + nlen, m->sectname, m->sect_len); - nlen += m->sect_len; - Sym sn = pool_intern_slice(c->global, (Slice){ .s = nmbuf, .len = nlen }); - - u16 kind = sec_kind_from_seg_sect(m->segname, m->seg_len, m->sectname, - m->sect_len, m->flags); - u16 flags = sec_flags_from(m->flags, kind); - u16 sem = sec_sem_from(m->flags, kind); - u32 align = 1u << (m->align_log2 & 31); - - ObjSecId id = obj_section_ex(ob, sn, (SecKind)kind, (SecSem)sem, flags, - align, m->reserved2, 0, 0); - if (id == OBJ_SEC_NONE) - compiler_panic(c, no_loc(), "read_macho: obj_section_ex failed"); - - /* Preserve the raw mach section.flags so emit_macho can write back - * the same S_TYPE / S_ATTR_* bits. */ - obj_section_set_ext(ob, id, OBJ_EXT_MACHO, m->flags, 0); - - if (sem == SSEM_NOBITS) { - obj_reserve_bss(ob, id, (u32)m->size, align); - } else if (m->size) { - if (m->fileoff + m->size > len) - compiler_panic(c, no_loc(), "read_macho: section bytes out of range"); - obj_write(ob, id, data + m->fileoff, (size_t)m->size); - } - m->obj_sec = id; - } - - /* ---- pass 3: parse symbol table. Two-pass strategy: first pass - * creates undefs (so relocations can refer to them), second - * pass creates defined locals/extdefs. Both write into - * mach_idx -> ObjSymId so reloc resolution works. */ - ObjSymId* sym_macho_to_obj = - arena_zarray(c->scratch, ObjSymId, nsyms ? nsyms : 1); - - const u8* sbase = data + symoff; - for (u32 i = 0; i < nsyms; ++i) { - const u8* p = sbase + (u64)i * MACHO_NLIST64_SIZE; - u32 strx = rd_u32_le(p + 0); - u8 n_type = p[4]; - u8 n_sect = p[5]; - u16 n_desc = rd_u16_le(p + 6); - u64 n_value = rd_u64_le(p + 8); - - const char* nm = ""; - u32 nlen = 0; - if (strx < strsize) { - nm = (const char*)(strtab + strx); - while (strx + nlen < strsize && nm[nlen]) ++nlen; - } - /* Mach-O names round-trip verbatim — the leading `_` Apple - * toolchains apply to C symbols is part of the on-disk name as - * far as ObjBuilder is concerned. Name-canonicalization (the - * `test_main` ↔ `_test_main` mapping for API callers) happens - * one layer up at the linker API boundary (link_c_name_intern - * in link.c); the on-disk shape stays byte-for-byte stable. */ - Sym sn = nlen ? pool_intern_slice(c->global, (Slice){ .s = nm, .len = nlen }) : 0; - - u8 type_field = (u8)(n_type & N_TYPE); - u8 ext = (u8)(n_type & N_EXT); - u8 pext = (u8)(n_type & N_PEXT); - - u16 bind = ext ? SB_GLOBAL : SB_LOCAL; - /* Weak DEFs (defined symbols) carry N_WEAK_DEF; weak REFs (undef - * `__attribute__((weak))` references) carry N_WEAK_REF. Either - * one collapses to SB_WEAK in the cfree model. */ - if (ext && (n_desc & (N_WEAK_DEF | N_WEAK_REF))) bind = SB_WEAK; - u8 vis = pext ? SV_HIDDEN : SV_DEFAULT; - - u16 kind; - ObjSecId sec_id = OBJ_SEC_NONE; - u64 value = 0; - u64 size = 0; - u64 cmnalign = 0; - - if (type_field == N_UNDF) { - if (ext && n_value != 0) { - /* Common: n_value is size, n_desc encodes log2(align) in - * GET_COMM_ALIGN bits. */ - kind = SK_COMMON; - value = 0; - size = n_value; - u32 la = (u32)((n_desc >> 8) & 0xf); - cmnalign = 1u << la; - } else { - kind = SK_UNDEF; - } - } else if (type_field == N_ABS) { - kind = SK_ABS; - value = n_value; - } else if (type_field == N_SECT) { - if (n_sect == 0 || n_sect > nmsecs) { - kind = SK_NOTYPE; - } else { - sec_id = msecs[n_sect - 1].obj_sec; - /* Mach-O n_value for defined symbols is segment-relative addr; - * convert back to a section-local offset. */ - u64 base = msecs[n_sect - 1].addr; - value = (n_value >= base) ? (n_value - base) : 0; - kind = (msecs[n_sect - 1].flags & S_ATTR_PURE_INSTRUCTIONS) ? SK_FUNC - : SK_OBJ; - } - } else { - kind = SK_NOTYPE; - } - - ObjSymId id = obj_symbol_ex(ob, sn, (SymBind)bind, (SymVis)vis, - (SymKind)kind, sec_id, value, size, cmnalign); - obj_sym_mark_referenced(ob, id); - /* n_desc carries Mach-O attribute bits beyond what bind/vis/kind - * model — N_NO_DEAD_STRIP, N_REF_TO_WEAK, N_ARM_THUMB_DEF, etc. - * Mask off the bits we already round-trip via bind (N_WEAK_DEF / - * N_WEAK_REF) and the alignment field for commons (which lives - * in cmnalign), then stash the remainder so emit_macho can OR it - * back in. */ - u16 desc_pass = n_desc; - desc_pass &= (u16)~(N_WEAK_DEF | N_WEAK_REF); - if (kind == SK_COMMON) desc_pass &= 0x00ff; /* drop align field */ - if (desc_pass) obj_symbol_set_flags(ob, id, desc_pass); - sym_macho_to_obj[i] = id; - } - - /* ---- pass 4: parse per-section relocations into ObjBuilder relocs. - * Mach-O encodes addends out-of-band as a leading - * ARM64_RELOC_ADDEND followed by the real reloc; the - * reader collapses the pair on the way in. */ - /* Lazily-populated section-start local symbols, for clang-emitted - * non-extern (section-relative) relocations. See the r_extern==0 - * branch below for the encoding. */ - ObjSymId* sec_start_sym = - arena_zarray(c->scratch, ObjSymId, nmsecs ? nmsecs : 1); - for (u32 i = 0; i < nmsecs; ++i) sec_start_sym[i] = OBJ_SYM_NONE; - for (u32 i = 0; i < nmsecs; ++i) { - MSecRec* m = &msecs[i]; - if (!m->nreloc) continue; - if (m->reloff + (u64)m->nreloc * MACHO_RELOC_SIZE > len) - compiler_panic(c, no_loc(), - "read_macho: relocation table out of range"); - const u8* rp = data + m->reloff; - i64 pending_addend = 0; - int have_pending = 0; - int pending_subtractor = 0; - u32 pending_subtractor_offset = 0; - u32 pending_subtractor_length = 0; - for (u32 j = 0; j < m->nreloc; ++j) { - u32 r_address = rd_u32_le(rp + j * MACHO_RELOC_SIZE); - u32 packed = rd_u32_le(rp + j * MACHO_RELOC_SIZE + 4); - u32 r_symbolnum = packed & 0x00ffffffu; - u32 r_pcrel = (packed >> 24) & 1u; - u32 r_length = (packed >> 25) & 3u; - u32 r_extern = (packed >> 27) & 1u; - u32 r_type = (packed >> 28) & 0xfu; - - if (r_type == ARM64_RELOC_ADDEND) { - /* Sign-extend 24-bit addend. */ - i32 ad = (i32)(r_symbolnum & 0x00ffffffu); - if (ad & 0x00800000) ad |= ~0x00ffffff; - pending_addend = (i64)ad; - have_pending = 1; - continue; - } - - u32 kind; - if (r_type == ARM64_RELOC_SUBTRACTOR) { - kind = (r_length == 3) ? R_RV_SUB64 - : (r_length == 2) ? R_RV_SUB32 - : (r_length == 1) ? R_RV_SUB16 - : R_RV_SUB8; - } else { - kind = macho->reloc_from(r_type); - } - if (kind == (u32)-1) - compiler_panic(c, no_loc(), - "read_macho: unsupported reloc type %u", r_type); - - /* Refine kind by (r_pcrel, r_length) when the type field alone - * is ambiguous. ARM64_RELOC_UNSIGNED collapses R_ABS64/R_ABS32 - * and PC-relative variants. */ - if (r_type == ARM64_RELOC_UNSIGNED) { - if (pending_subtractor && pending_subtractor_offset == r_address && - pending_subtractor_length == r_length) { - kind = (r_length == 3) ? R_RV_ADD64 - : (r_length == 2) ? R_RV_ADD32 - : (r_length == 1) ? R_RV_ADD16 - : R_RV_ADD8; - pending_subtractor = 0; - } else if (r_pcrel) { - kind = (r_length == 3) ? R_PC64 : R_PC32; - } else { - kind = (r_length == 3) ? R_ABS64 : R_ABS32; - } - } else if (r_type == ARM64_RELOC_BRANCH26) { - kind = R_AARCH64_CALL26; - } else if (r_type == ARM64_RELOC_PAGEOFF12) { - /* PAGEOFF12 is access-size-agnostic in Mach-O; the linker - * applier needs to scale the immediate by the load/store size - * (or apply it raw for ADD). Inspect the patched instruction - * at r_address to pick the right RelocKind so the applier in - * link_reloc.c shifts the lo12 correctly. */ - if (m->fileoff + r_address + 4u > len) - compiler_panic(c, no_loc(), - "read_macho: PAGEOFF12 r_address %u out of range", - r_address); - u32 ins = rd_u32_le(data + m->fileoff + r_address); - /* ADD (immediate): bits 30:24 = 0010001 (W=10001 / X=10010001). - * Mask 0x7f800000 isolates sf=0/1 + the 0010001 pattern; values - * 0x11000000 (32-bit) and 0x91000000 (64-bit) — match the latter - * via the same 0x7f mask leaving bit 31 free. */ - if ((ins & 0x7f800000u) == 0x11000000u) { - kind = R_AARCH64_ADD_ABS_LO12_NC; - } else if ((ins & 0x3b000000u) == 0x39000000u) { - /* LDR/STR (immediate unsigned offset). Bits 29:27=111, bit 26=V - * (0=integer, 1=SIMD/FP), bits 25:24=01. size in [31:30] plus - * opc bit 23 for the SIMD 128-bit case (size=00, opc=11). */ - u32 sz = (ins >> 30) & 3u; - u32 v_bit = (ins >> 26) & 1u; - u32 opc1 = (ins >> 23) & 1u; - if (v_bit && sz == 0 && opc1) { - kind = R_AARCH64_LDST128_ABS_LO12_NC; - } else { - kind = (sz == 0) ? R_AARCH64_LDST8_ABS_LO12_NC - : (sz == 1) ? R_AARCH64_LDST16_ABS_LO12_NC - : (sz == 2) ? R_AARCH64_LDST32_ABS_LO12_NC - : R_AARCH64_LDST64_ABS_LO12_NC; - } - } - /* else: leave as the default R_AARCH64_ADD_ABS_LO12_NC. */ - } - - ObjSymId target = OBJ_SYM_NONE; - i64 inplace_addend_override = 0; - int use_inplace_addend = 0; - if (r_extern) { - if (r_symbolnum < nsyms) target = sym_macho_to_obj[r_symbolnum]; - if (!have_pending && r_type == ARM64_RELOC_UNSIGNED) { - u32 rsz = 1u << r_length; - if ((u64)m->fileoff + r_address + rsz > len) - compiler_panic(c, no_loc(), - "read_macho: extern unsigned reloc r_address out " - "of range"); - const u8* pv = data + m->fileoff + r_address; - u64 inplace; - if (r_length == 3) inplace = rd_u64_le(pv); - else if (r_length == 2) inplace = (u64)rd_u32_le(pv); - else if (r_length == 1) inplace = (u64)rd_u16_le(pv); - else inplace = (u64)pv[0]; - inplace_addend_override = (i64)inplace; - use_inplace_addend = 1; - } - } else { - /* Section-relative reloc — clang emits these for compact unwind, - * EH frame, and DWARF debug info. r_symbolnum is the 1-based - * section index; the in-place value at r_address is the absolute - * .o virtual address of the referent. Synthesize a local - * symbol pointing to the target section's start (lazily, once - * per section) and re-express the reloc as - * target = sec_start_sym, addend = inplace - section.addr. */ - if (r_symbolnum == 0 || r_symbolnum > nmsecs) - compiler_panic(c, no_loc(), - "read_macho: section-relative reloc references " - "invalid section index %u", - r_symbolnum); - u32 sec_idx = r_symbolnum - 1u; - MSecRec* tm = &msecs[sec_idx]; - if (sec_start_sym[sec_idx] == OBJ_SYM_NONE) { - /* Build ".Lcfree.macho_secstart.<sec_idx>" without snprintf - * (the freestanding build doesn't pull in stdio). */ - static const char prefix[] = ".Lcfree.macho_secstart."; - char nmbuf[sizeof(prefix) + 10]; - u32 nlen = (u32)(sizeof(prefix) - 1); - memcpy(nmbuf, prefix, nlen); - char dec[10]; - u32 dn = 0; - u32 v = sec_idx; - do { - dec[dn++] = (char)('0' + (v % 10u)); - v /= 10u; - } while (v); - for (u32 k = 0; k < dn; ++k) nmbuf[nlen + k] = dec[dn - 1 - k]; - nlen += dn; - Sym sn = pool_intern_slice(c->global, (Slice){ .s = nmbuf, .len = nlen }); - u16 sk = (tm->flags & S_ATTR_PURE_INSTRUCTIONS) ? SK_FUNC : SK_OBJ; - sec_start_sym[sec_idx] = obj_symbol(ob, sn, SB_LOCAL, (SymKind)sk, - tm->obj_sec, 0, 0); - } - target = sec_start_sym[sec_idx]; - u32 rsz = 1u << r_length; - if ((u64)m->fileoff + r_address + rsz > len) - compiler_panic(c, no_loc(), - "read_macho: non-extern reloc r_address out of range"); - u64 inplace; - const u8* pv = data + m->fileoff + r_address; - if (r_length == 3) inplace = rd_u64_le(pv); - else if (r_length == 2) inplace = (u64)rd_u32_le(pv); - else if (r_length == 1) inplace = (u64)rd_u16_le(pv); - else inplace = (u64)pv[0]; - inplace_addend_override = (i64)inplace - (i64)tm->addr; - use_inplace_addend = 1; - } - - i64 addend = have_pending ? pending_addend - : (use_inplace_addend ? inplace_addend_override - : 0); - int has_explicit = have_pending || use_inplace_addend || addend != 0; - have_pending = 0; - pending_addend = 0; - - obj_reloc_ex(ob, m->obj_sec, r_address, (RelocKind)kind, target, - addend, has_explicit, 0); - if (r_type == ARM64_RELOC_SUBTRACTOR) { - pending_subtractor = 1; - pending_subtractor_offset = r_address; - pending_subtractor_length = r_length; - } - } - } - - obj_finalize(ob); - return ob; -} - -/* ---- read_macho_dso ---- - * - * MH_DYLIB reader. Walks load commands once to find LC_ID_DYLIB - * (install-name) and LC_SYMTAB (symbol table + string table), then - * emits one defined ObjSym per externally-visible nlist entry. - * - * Like read_elf_dso, the produced ObjBuilder carries no sections / - * relocations / groups — only symbol definitions in OBJ_SEC_NONE. The - * consumer's resolve_undefs sees these as defined globals and marks the - * matching consumer-side undef as `imported`. The dylib's own undefs - * (its imports of other dylibs) are filtered: they don't satisfy any - * undef in the consumer. */ - -ObjBuilder* read_macho_dso(Compiler* c, const char* name, const u8* data, - size_t len, Sym* install_name_out) { - (void)name; - if (install_name_out) *install_name_out = 0; - if (len < MACHO_HDR64_SIZE) - compiler_panic(c, no_loc(), "read_macho_dso: input shorter than header"); - - u32 magic = rd_u32_le(data + 0); - if (magic != MH_MAGIC_64) - compiler_panic(c, no_loc(), "read_macho_dso: bad magic 0x%x", magic); - - u32 cputype = rd_u32_le(data + 4); - u32 filetype = rd_u32_le(data + 12); - u32 ncmds = rd_u32_le(data + 16); - u32 sizeofcmds = rd_u32_le(data + 20); - - if (!arch_lookup_macho_cputype(cputype)) - compiler_panic(c, no_loc(), "read_macho_dso: unsupported cputype 0x%x", - cputype); - if (filetype != MH_DYLIB && filetype != MH_BUNDLE) - compiler_panic(c, no_loc(), - "read_macho_dso: not MH_DYLIB/MH_BUNDLE (filetype=%u)", - filetype); - if ((u64)MACHO_HDR64_SIZE + sizeofcmds > len) - compiler_panic(c, no_loc(), "read_macho_dso: load commands exceed file"); - - u32 symoff = 0, nsyms = 0, stroff = 0, strsize = 0; - Sym install_name = 0; - - u64 pos = MACHO_HDR64_SIZE; - u64 end = pos + sizeofcmds; - for (u32 ci = 0; ci < ncmds && pos + 8 <= end; ++ci) { - u32 cmd = rd_u32_le(data + pos); - u32 cmdsize = rd_u32_le(data + pos + 4); - if (cmdsize < 8 || pos + cmdsize > end) - compiler_panic(c, no_loc(), "read_macho_dso: malformed load command"); - if (cmd == LC_ID_DYLIB) { - /* dylib_command: cmd, cmdsize, name(lc_str: 4-byte offset within - * the cmd), timestamp, current_version, compat_version. */ - if (cmdsize < 24) goto next; - u32 nm_off = rd_u32_le(data + pos + 8); - if (nm_off >= cmdsize) goto next; - const char* p = (const char*)(data + pos + nm_off); - u32 maxlen = cmdsize - nm_off; - u32 nlen = 0; - while (nlen < maxlen && p[nlen]) ++nlen; - if (nlen) install_name = pool_intern_slice(c->global, (Slice){ .s = p, .len = nlen }); - } else if (cmd == LC_SYMTAB) { - symoff = rd_u32_le(data + pos + 8); - nsyms = rd_u32_le(data + pos + 12); - stroff = rd_u32_le(data + pos + 16); - strsize = rd_u32_le(data + pos + 20); - } - next: - pos += cmdsize; - } - if (install_name_out) *install_name_out = install_name; - - if (stroff + (u64)strsize > len) - compiler_panic(c, no_loc(), "read_macho_dso: string table out of range"); - if (symoff + (u64)nsyms * MACHO_NLIST64_SIZE > len) - compiler_panic(c, no_loc(), "read_macho_dso: symbol table out of range"); - - ObjBuilder* ob = obj_new(c); - if (!ob) compiler_panic(c, no_loc(), "read_macho_dso: obj_new failed"); - - const u8* strtab = data + stroff; - const u8* sbase = data + symoff; - for (u32 i = 0; i < nsyms; ++i) { - const u8* p = sbase + (u64)i * MACHO_NLIST64_SIZE; - u32 strx = rd_u32_le(p + 0); - u8 n_type = p[4]; - u16 n_desc = rd_u16_le(p + 6); - - u8 type_field = (u8)(n_type & N_TYPE); - u8 ext = (u8)(n_type & N_EXT); - /* Skip non-external (locals) and undef refs (the dylib's own imports). */ - if (!ext) continue; - if (type_field == N_UNDF) continue; - /* N_INDR / N_PBUD / N_STAB: skip — not interesting for static link. */ - if (n_type & N_STAB) continue; - - if (strx >= strsize) continue; - const char* nm = (const char*)(strtab + strx); - u32 nlen = 0; - while (strx + nlen < strsize && nm[nlen]) ++nlen; - if (!nlen) continue; - Sym sn = pool_intern_slice(c->global, (Slice){ .s = nm, .len = nlen }); - - SymBind bind = (n_desc & (N_WEAK_DEF | N_WEAK_REF)) ? SB_WEAK : SB_GLOBAL; - SymKind kind = SK_NOTYPE; - /* Mach-O dylib nlist doesn't carry STT_FUNC / STT_OBJECT cleanly — - * default to NOTYPE. The consuming linker uses dso_export_is_func - * to peek at this for ELF; for Mach-O the `imported` decision flows - * through synthetic __got / __stubs regardless of kind. */ - { - ObjSymId did = - obj_symbol_ex(ob, sn, bind, SV_DEFAULT, kind, OBJ_SEC_NONE, 0, 0, 0); - obj_sym_mark_referenced(ob, did); - } - } - - obj_finalize(ob); - return ob; -} diff --git a/src/obj/macho_reloc_aarch64.c b/src/obj/macho_reloc_aarch64.c @@ -1,113 +0,0 @@ -/* RelocKind <-> arm64 Mach-O reloc-type mapping. Mirror of - * elf_reloc_aarch64.c for Mach-O. - * - * Mach-O relocations carry three independent fields that the cfree - * RelocKind enum collapses into a single value: r_type (the 4-bit - * ARM64_RELOC_* code), r_pcrel, and r_length. The translator therefore - * exposes three accessors — the writer (macho_emit.c) consults all of - * them per Reloc, and the reader (macho_read.c) inverts via - * macho_aarch64_reloc_from which keys on (r_type, r_pcrel, r_length). */ - -#include "core/util.h" -#include "obj/macho.h" - -u32 macho_aarch64_reloc_to(u32 kind /* RelocKind */) { - switch (kind) { - case R_NONE: - return (u32)-1; - case R_ABS64: - case R_ABS32: - return ARM64_RELOC_UNSIGNED; - case R_REL64: - case R_REL32: - case R_PC64: - case R_PC32: - /* PC-relative absolute pointer-difference; encoded as - * UNSIGNED with r_pcrel=1, length=3/2. */ - return ARM64_RELOC_UNSIGNED; - case R_AARCH64_JUMP26: - case R_AARCH64_CALL26: - return ARM64_RELOC_BRANCH26; - case R_AARCH64_ADR_PREL_PG_HI21: - case R_AARCH64_ADR_PREL_PG_HI21_NC: - return ARM64_RELOC_PAGE21; - case R_AARCH64_ADD_ABS_LO12_NC: - case R_AARCH64_LDST8_ABS_LO12_NC: - case R_AARCH64_LDST16_ABS_LO12_NC: - case R_AARCH64_LDST32_ABS_LO12_NC: - case R_AARCH64_LDST64_ABS_LO12_NC: - case R_AARCH64_LDST128_ABS_LO12_NC: - return ARM64_RELOC_PAGEOFF12; - case R_AARCH64_ADR_GOT_PAGE: - return ARM64_RELOC_GOT_LOAD_PAGE21; - case R_AARCH64_LD64_GOT_LO12_NC: - return ARM64_RELOC_GOT_LOAD_PAGEOFF12; - case R_AARCH64_TLVP_LOAD_PAGE21: - return ARM64_RELOC_TLVP_LOAD_PAGE21; - case R_AARCH64_TLVP_LOAD_PAGEOFF12: - return ARM64_RELOC_TLVP_LOAD_PAGEOFF12; - default: - return (u32)-1; - } -} - -u32 macho_aarch64_reloc_pcrel(u32 kind /* RelocKind */) { - switch (kind) { - case R_REL64: - case R_REL32: - case R_PC64: - case R_PC32: - case R_AARCH64_JUMP26: - case R_AARCH64_CALL26: - case R_AARCH64_ADR_PREL_PG_HI21: - case R_AARCH64_ADR_PREL_PG_HI21_NC: - case R_AARCH64_ADR_GOT_PAGE: - case R_AARCH64_TLVP_LOAD_PAGE21: - return 1; - default: - return 0; - } -} - -u32 macho_aarch64_reloc_length(u32 kind /* RelocKind */) { - /* log2 of the patch width in bytes: 0=byte, 1=hword, 2=word, 3=quad. - * AArch64 instructions are 4 bytes and Mach-O encodes any 32-bit fixup - * (BRANCH26, PAGE21, PAGEOFF12, ...) with length=2. */ - switch (kind) { - case R_ABS64: - case R_REL64: - case R_PC64: - return 3; - default: - return 2; - } -} - -u32 macho_aarch64_reloc_from(u32 macho_type) { - /* The (r_type, r_pcrel, r_length) tuple disambiguates several kinds - * collapsed by macho_aarch64_reloc_to. The reader inspects pcrel and - * length itself when it matters; this function only maps the type - * field, returning the most common AArch64 instance for each. Reader - * callers refine via the pcrel/length companion if they need to - * distinguish R_ABS64 vs R_PC64 (both UNSIGNED). */ - switch (macho_type) { - case ARM64_RELOC_UNSIGNED: - return R_ABS64; - case ARM64_RELOC_BRANCH26: - return R_AARCH64_CALL26; - case ARM64_RELOC_PAGE21: - return R_AARCH64_ADR_PREL_PG_HI21; - case ARM64_RELOC_PAGEOFF12: - return R_AARCH64_ADD_ABS_LO12_NC; - case ARM64_RELOC_GOT_LOAD_PAGE21: - return R_AARCH64_ADR_GOT_PAGE; - case ARM64_RELOC_GOT_LOAD_PAGEOFF12: - return R_AARCH64_LD64_GOT_LO12_NC; - case ARM64_RELOC_TLVP_LOAD_PAGE21: - return R_AARCH64_TLVP_LOAD_PAGE21; - case ARM64_RELOC_TLVP_LOAD_PAGEOFF12: - return R_AARCH64_TLVP_LOAD_PAGEOFF12; - default: - return (u32)-1; - } -} diff --git a/src/obj/macho_reloc_x86_64.c b/src/obj/macho_reloc_x86_64.c @@ -1,84 +0,0 @@ -#include "core/util.h" -#include "obj/macho.h" - -u32 macho_x86_64_reloc_to(u32 kind /* RelocKind */) { - switch (kind) { - case R_NONE: - return (u32)-1; - case R_ABS64: - case R_ABS32: - return X86_64_RELOC_UNSIGNED; - case R_PC32: - case R_REL32: - case R_PC64: - case R_REL64: - case R_X64_PC8: - return X86_64_RELOC_SIGNED; - case R_PLT32: - case R_X64_PLT32: - return X86_64_RELOC_BRANCH; - case R_X64_GOTPCRELX: - case R_X64_REX_GOTPCRELX: - return X86_64_RELOC_GOT_LOAD; - case R_X64_GOTPCREL: - return X86_64_RELOC_GOT; - case R_X64_TPOFF32: - return X86_64_RELOC_TLV; - default: - return (u32)-1; - } -} - -u32 macho_x86_64_reloc_pcrel(u32 kind /* RelocKind */) { - switch (kind) { - case R_PC32: - case R_REL32: - case R_PC64: - case R_REL64: - case R_X64_PC8: - case R_PLT32: - case R_X64_PLT32: - case R_X64_GOTPCREL: - case R_X64_GOTPCRELX: - case R_X64_REX_GOTPCRELX: - case R_X64_TPOFF32: - return 1; - default: - return 0; - } -} - -u32 macho_x86_64_reloc_length(u32 kind /* RelocKind */) { - switch (kind) { - case R_ABS64: - case R_PC64: - case R_REL64: - return 3; - case R_X64_PC8: - return 0; - default: - return 2; - } -} - -u32 macho_x86_64_reloc_from(u32 macho_type) { - switch (macho_type) { - case X86_64_RELOC_UNSIGNED: - return R_ABS64; - case X86_64_RELOC_SIGNED: - case X86_64_RELOC_SIGNED_1: - case X86_64_RELOC_SIGNED_2: - case X86_64_RELOC_SIGNED_4: - return R_PC32; - case X86_64_RELOC_BRANCH: - return R_X64_PLT32; - case X86_64_RELOC_GOT_LOAD: - return R_X64_REX_GOTPCRELX; - case X86_64_RELOC_GOT: - return R_X64_GOTPCREL; - case X86_64_RELOC_TLV: - return R_X64_TPOFF32; - default: - return (u32)-1; - } -} diff --git a/src/obj/registry.c b/src/obj/registry.c @@ -1,15 +1,22 @@ -#include "obj/format.h" - #include <cfree/config.h> #include <string.h> #include "core/slice.h" +#include "obj/coff/coff.h" +#include "obj/elf/elf.h" +#include "obj/format.h" +#include "obj/macho/macho.h" #include "obj/obj.h" void link_emit_elf(LinkImage*, Writer*); void link_emit_macho(LinkImage*, Writer*); void link_emit_coff(LinkImage*, Writer*); +#if CFREE_OBJ_ELF_ENABLED +void layout_dyn(Linker*, LinkImage*); +void link_dyn_state_free(LinkImage*); +#endif + #if CFREE_OBJ_COFF_ENABLED int coff_classify_obj_input(Compiler*, ObjBuilder*, Sym* soname_out); Sym coff_archive_hint(Compiler*, const char* archive_name); @@ -18,6 +25,186 @@ ObjFormatArchiveAction coff_archive_member(Compiler*, ObjBuilder** out); #endif +#if CFREE_ARCH_AA64_ENABLED +void aa64_emit_macho_stub(u8* dst, u64 stub_vaddr, u64 got_slot_vaddr); +void aa64_emit_coff_iat_stub(u8* dst, u64 stub_vaddr, u64 iat_slot_vaddr); +#endif +#if CFREE_ARCH_X64_ENABLED +void x64_emit_coff_iat_stub(u8* dst, u64 stub_vaddr, u64 iat_slot_vaddr); +#endif + +#if CFREE_OBJ_ELF_ENABLED +static const ObjElfArchOps obj_elf_arch_ops[] = { +#if CFREE_ARCH_AA64_ENABLED + { + .arch = CFREE_ARCH_ARM_64, + .e_machine = EM_AARCH64, + .e_flags = 0, + .default_musl_interp = "/lib/ld-musl-aarch64.so.1", + .r_relative = ELF_R_AARCH64_RELATIVE, + .r_glob_dat = ELF_R_AARCH64_GLOB_DAT, + .r_jump_slot = ELF_R_AARCH64_JUMP_SLOT, + .reloc_to = elf_aarch64_reloc_to, + .reloc_from = elf_aarch64_reloc_from, + }, +#endif +#if CFREE_ARCH_X64_ENABLED + { + .arch = CFREE_ARCH_X86_64, + .e_machine = EM_X86_64, + .e_flags = 0, + .default_musl_interp = "/lib/ld-musl-x86_64.so.1", + .r_relative = ELF_R_X86_64_RELATIVE, + .r_glob_dat = ELF_R_X86_64_GLOB_DAT, + .r_jump_slot = ELF_R_X86_64_JUMP_SLOT, + .reloc_to = elf_x86_64_reloc_to, + .reloc_from = elf_x86_64_reloc_from, + }, +#endif +#if CFREE_ARCH_RV64_ENABLED + { + .arch = CFREE_ARCH_RV64, + .e_machine = EM_RISCV, + .e_flags = EF_RISCV_RVC | EF_RISCV_FLOAT_ABI_DOUBLE, + .default_musl_interp = "/lib/ld-musl-riscv64.so.1", + .r_relative = ELF_R_RISCV_RELATIVE, + .r_glob_dat = ELF_R_RISCV_64, + .r_jump_slot = ELF_R_RISCV_JUMP_SLOT, + .reloc_to = elf_riscv64_reloc_to, + .reloc_from = elf_riscv64_reloc_from, + }, +#endif +#if !CFREE_ARCH_AA64_ENABLED && !CFREE_ARCH_X64_ENABLED && \ + !CFREE_ARCH_RV64_ENABLED + {.arch = CFREE_ARCH_WASM}, +#endif +}; + +static const ObjElfArchOps* obj_elf_arch(CfreeArchKind arch) { + u32 i; + for (i = 0; i < (u32)(sizeof obj_elf_arch_ops / sizeof obj_elf_arch_ops[0]); + ++i) { + if (obj_elf_arch_ops[i].arch == arch) return &obj_elf_arch_ops[i]; + } + return NULL; +} + +static const ObjElfArchOps* obj_elf_machine(u32 e_machine) { + u32 i; + for (i = 0; i < (u32)(sizeof obj_elf_arch_ops / sizeof obj_elf_arch_ops[0]); + ++i) { + if (obj_elf_arch_ops[i].e_machine && + obj_elf_arch_ops[i].e_machine == e_machine) + return &obj_elf_arch_ops[i]; + } + return NULL; +} +#endif + +#if CFREE_OBJ_MACHO_ENABLED +static const ObjMachoArchOps obj_macho_arch_ops[] = { +#if CFREE_ARCH_AA64_ENABLED + { + .arch = CFREE_ARCH_ARM_64, + .cputype = CPU_TYPE_ARM64, + .cpusubtype = CPU_SUBTYPE_ARM64_ALL, + .stub_size = 12u, + .emit_stub = aa64_emit_macho_stub, + .reloc_to = macho_aarch64_reloc_to, + .reloc_pcrel = macho_aarch64_reloc_pcrel, + .reloc_length = macho_aarch64_reloc_length, + .reloc_from = macho_aarch64_reloc_from, + }, +#endif +#if CFREE_ARCH_X64_ENABLED + { + .arch = CFREE_ARCH_X86_64, + .cputype = CPU_TYPE_X86_64, + .cpusubtype = CPU_SUBTYPE_X86_64_ALL, + .stub_size = 0u, + .emit_stub = NULL, + .reloc_to = macho_x86_64_reloc_to, + .reloc_pcrel = macho_x86_64_reloc_pcrel, + .reloc_length = macho_x86_64_reloc_length, + .reloc_from = macho_x86_64_reloc_from, + }, +#endif +#if !CFREE_ARCH_AA64_ENABLED && !CFREE_ARCH_X64_ENABLED + {.arch = CFREE_ARCH_WASM}, +#endif +}; + +static const ObjMachoArchOps* obj_macho_arch(CfreeArchKind arch) { + u32 i; + for (i = 0; + i < (u32)(sizeof obj_macho_arch_ops / sizeof obj_macho_arch_ops[0]); + ++i) { + if (obj_macho_arch_ops[i].arch == arch) return &obj_macho_arch_ops[i]; + } + return NULL; +} + +static const ObjMachoArchOps* obj_macho_cputype(u32 cputype) { + u32 i; + for (i = 0; + i < (u32)(sizeof obj_macho_arch_ops / sizeof obj_macho_arch_ops[0]); + ++i) { + if (obj_macho_arch_ops[i].cputype && + obj_macho_arch_ops[i].cputype == cputype) + return &obj_macho_arch_ops[i]; + } + return NULL; +} +#endif + +#if CFREE_OBJ_COFF_ENABLED +static const ObjCoffArchOps obj_coff_arch_ops[] = { +#if CFREE_ARCH_AA64_ENABLED + { + .arch = CFREE_ARCH_ARM_64, + .machine = IMAGE_FILE_MACHINE_ARM64, + .stub_size = 12u, + .emit_iat_stub = aa64_emit_coff_iat_stub, + .reloc_to = coff_aarch64_reloc_to, + .reloc_from = coff_aarch64_reloc_from, + }, +#endif +#if CFREE_ARCH_X64_ENABLED + { + .arch = CFREE_ARCH_X86_64, + .machine = IMAGE_FILE_MACHINE_AMD64, + .stub_size = 6u, + .emit_iat_stub = x64_emit_coff_iat_stub, + .reloc_to = coff_x86_64_reloc_to, + .reloc_from = coff_x86_64_reloc_from, + }, +#endif +#if !CFREE_ARCH_AA64_ENABLED && !CFREE_ARCH_X64_ENABLED + {.arch = CFREE_ARCH_WASM}, +#endif +}; + +static const ObjCoffArchOps* obj_coff_arch(CfreeArchKind arch) { + u32 i; + for (i = 0; i < (u32)(sizeof obj_coff_arch_ops / sizeof obj_coff_arch_ops[0]); + ++i) { + if (obj_coff_arch_ops[i].arch == arch) return &obj_coff_arch_ops[i]; + } + return NULL; +} + +static const ObjCoffArchOps* obj_coff_machine(u16 machine) { + u32 i; + if (machine == 0xA641u) machine = IMAGE_FILE_MACHINE_ARM64; + for (i = 0; i < (u32)(sizeof obj_coff_arch_ops / sizeof obj_coff_arch_ops[0]); + ++i) { + if (obj_coff_arch_ops[i].machine && obj_coff_arch_ops[i].machine == machine) + return &obj_coff_arch_ops[i]; + } + return NULL; +} +#endif + static const ObjFormatImpl obj_format_impl_wasm = { .kind = CFREE_OBJ_WASM, .bin_fmt = CFREE_BIN_WASM, @@ -41,6 +228,10 @@ static const ObjFormatImpl obj_format_impl_elf = { .read = read_elf, .read_dso = read_elf_dso, .link_emit = link_emit_elf, + .layout_dyn = layout_dyn, + .free_dyn = link_dyn_state_free, + .elf_arch = obj_elf_arch, + .elf_machine = obj_elf_machine, }; #endif @@ -55,6 +246,8 @@ static const ObjFormatImpl obj_format_impl_macho = { .read = read_macho, .read_dso = read_macho_dso, .link_emit = link_emit_macho, + .macho_arch = obj_macho_arch, + .macho_cputype = obj_macho_cputype, }; #endif @@ -69,6 +262,8 @@ static const ObjFormatImpl obj_format_impl_coff = { .read = read_coff, .read_dso = read_coff_dso, .link_emit = link_emit_coff, + .coff_arch = obj_coff_arch, + .coff_machine = obj_coff_machine, .classify_obj_input = coff_classify_obj_input, .archive_hint = coff_archive_hint, .archive_member = coff_archive_member, diff --git a/test/coff/README.md b/test/coff/README.md @@ -12,7 +12,7 @@ Hand-built `ObjBuilder` → `emit_coff` → bytes → `read_coff` → second 1. Structural equivalence between the original `ObjBuilder` and the readback (sections, symbols, relocations, groups all preserved modulo synthesized SECTION symbols and section-definition aux - records — the asymmetry that `src/obj/coff_read.c` documents). + records — the asymmetry that `src/obj/coff/read.c` documents). 2. Byte stability across `emit_coff(read_coff(emit_coff(ob)))` — the second emit must produce the exact bytes of the first. diff --git a/test/coff/cfree-roundtrip-coff.c b/test/coff/cfree-roundtrip-coff.c @@ -63,15 +63,14 @@ static CfreeDiagSink g_diag = {diag_emit, NULL, 0, 0}; static int g_failures; static const char* g_test_name = "?"; -#define EXPECT(cond, ...) \ - do { \ - if (!(cond)) { \ - fprintf(stderr, "FAIL [%s] %s:%d: ", g_test_name, __FILE__, \ - __LINE__); \ - fprintf(stderr, __VA_ARGS__); \ - fputc('\n', stderr); \ - g_failures++; \ - } \ +#define EXPECT(cond, ...) \ + do { \ + if (!(cond)) { \ + fprintf(stderr, "FAIL [%s] %s:%d: ", g_test_name, __FILE__, __LINE__); \ + fprintf(stderr, __VA_ARGS__); \ + fputc('\n', stderr); \ + g_failures++; \ + } \ } while (0) /* ---- target builders ---------------------------------------------- */ @@ -655,8 +654,7 @@ static void test_data_with_reloc_rel32_x64(void) { * SK_FUNC + section_id == 0 emits Type=function but the reader collapses * to SK_UNDEF on readback (no "undef function" kind in cfree's model), * which breaks byte stability. See CORPUS.md §10. */ - ObjSymId helper = - obj_symbol(ob, hn, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0); + ObjSymId helper = obj_symbol(ob, hn, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0); obj_reloc(ob, sec, 1, R_PC32, helper, 0); obj_finalize(ob); @@ -678,8 +676,8 @@ static void verify_aa64_branch26(const ObjBuilder* ob, Pool* p) { const Reloc* r = obj_reloc_at(ob, i); if (r->removed) continue; if (r->section_id != text_id) continue; - EXPECT(r->kind == R_AARCH64_CALL26, - "branch26 reloc kind=%u (want %u)", r->kind, R_AARCH64_CALL26); + EXPECT(r->kind == R_AARCH64_CALL26, "branch26 reloc kind=%u (want %u)", + r->kind, R_AARCH64_CALL26); ++seen; } EXPECT(seen == 1, "branch26 reloc count=%d (want 1)", seen); @@ -709,8 +707,7 @@ static void test_aa64_branch26(void) { static const uint8_t bytes[8] = {0, 0, 0, 0x94, 0xc0, 0x03, 0x5f, 0xd6}; obj_write(ob, sec, bytes, sizeof bytes); /* See reloc_rel32_x64 note on SK_UNDEF for undef symbols. */ - ObjSymId callee = - obj_symbol(ob, cn, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0); + ObjSymId callee = obj_symbol(ob, cn, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0); obj_reloc(ob, sec, 0, R_AARCH64_CALL26, callee, 0); obj_finalize(ob); @@ -768,8 +765,7 @@ static void test_aa64_pagebase_pageoffset(void) { obj_write(ob, tsec, txt, sizeof txt); static const uint8_t str[6] = "hello"; obj_write(ob, rsec, str, sizeof str); - ObjSymId kStr = - obj_symbol(ob, kn, SB_LOCAL, SK_OBJ, rsec, 0, sizeof str); + ObjSymId kStr = obj_symbol(ob, kn, SB_LOCAL, SK_OBJ, rsec, 0, sizeof str); obj_reloc(ob, tsec, 0, R_AARCH64_ADR_PREL_PG_HI21, kStr, 0); obj_reloc(ob, tsec, 4, R_AARCH64_ADD_ABS_LO12_NC, kStr, 0); obj_finalize(ob); @@ -953,10 +949,8 @@ static void verify_comdat_group(const ObjBuilder* ob, Pool* p) { const Section* dsec = find_section_named(ob, p, ".data$x"); EXPECT(tsec != NULL, ".text$x missing"); EXPECT(dsec != NULL, ".data$x missing"); - if (tsec) - EXPECT((tsec->flags & SF_GROUP) != 0, ".text$x missing SF_GROUP"); - if (dsec) - EXPECT((dsec->flags & SF_GROUP) != 0, ".data$x missing SF_GROUP"); + if (tsec) EXPECT((tsec->flags & SF_GROUP) != 0, ".text$x missing SF_GROUP"); + if (dsec) EXPECT((dsec->flags & SF_GROUP) != 0, ".data$x missing SF_GROUP"); /* COFF encodes COMDAT per-section (each member section carries its * own section-definition aux with the selection rule); the wire @@ -975,8 +969,7 @@ static void verify_comdat_group(const ObjBuilder* ob, Pool* p) { } obj_groupiter_free(it); EXPECT(seen == 2, "expected 2 groups after COMDAT round-trip, got %d", seen); - EXPECT(total_member_sections == 2, - "total COMDAT member sections=%u (want 2)", + EXPECT(total_member_sections == 2, "total COMDAT member sections=%u (want 2)", total_member_sections); } @@ -1000,16 +993,16 @@ static void test_comdat_group(void) { /* Short section names (<= 8 bytes) — section names that overflow into * the strtab don't round-trip COMDAT detection because the section * symbol's name is truncated on emit but the reader compares the - * resolved long name. See CORPUS.md §10 / src/obj/coff_read.c + * resolved long name. See CORPUS.md §10 / src/obj/coff/read.c * is_section_sym logic. */ Sym tn = pool_intern_slice(p, SLICE_LIT(".text$x")); Sym dn = pool_intern_slice(p, SLICE_LIT(".data$x")); Sym sign = pool_intern_slice(p, SLICE_LIT("inline_fn")); - ObjSecId tsec = obj_section(ob, tn, SEC_TEXT, - SF_ALLOC | SF_EXEC | SF_GROUP, 16); - ObjSecId dsec = obj_section(ob, dn, SEC_DATA, - SF_ALLOC | SF_WRITE | SF_GROUP, 8); + ObjSecId tsec = + obj_section(ob, tn, SEC_TEXT, SF_ALLOC | SF_EXEC | SF_GROUP, 16); + ObjSecId dsec = + obj_section(ob, dn, SEC_DATA, SF_ALLOC | SF_WRITE | SF_GROUP, 8); obj_write(ob, tsec, TEXT_X64, sizeof TEXT_X64); static const uint8_t z8[8] = {0}; obj_write(ob, dsec, z8, sizeof z8); @@ -1112,18 +1105,19 @@ static void test_section_symbol_synthesis(void) { ObjBuilder* ob = obj_new(c); Pool* p = c->global; - ObjSecId text = obj_section(ob, pool_intern_slice(p, SLICE_LIT(".text")), SEC_TEXT, - SF_ALLOC | SF_EXEC, 16); + ObjSecId text = obj_section(ob, pool_intern_slice(p, SLICE_LIT(".text")), + SEC_TEXT, SF_ALLOC | SF_EXEC, 16); obj_write(ob, text, TEXT_X64, sizeof TEXT_X64); - ObjSecId data = obj_section(ob, pool_intern_slice(p, SLICE_LIT(".data")), SEC_DATA, - SF_ALLOC | SF_WRITE, 8); + ObjSecId data = obj_section(ob, pool_intern_slice(p, SLICE_LIT(".data")), + SEC_DATA, SF_ALLOC | SF_WRITE, 8); static const uint8_t z8[8] = {0}; obj_write(ob, data, z8, sizeof z8); - ObjSecId rdata = obj_section(ob, pool_intern_slice(p, SLICE_LIT(".rdata")), SEC_RODATA, - SF_ALLOC, 8); + ObjSecId rdata = obj_section(ob, pool_intern_slice(p, SLICE_LIT(".rdata")), + SEC_RODATA, SF_ALLOC, 8); obj_write(ob, rdata, "hi\0", 3); - ObjSecId bss = obj_section_ex(ob, pool_intern_slice(p, SLICE_LIT(".bss")), SEC_BSS, - SSEM_NOBITS, SF_ALLOC | SF_WRITE, 8, 0, 0, 0); + ObjSecId bss = + obj_section_ex(ob, pool_intern_slice(p, SLICE_LIT(".bss")), SEC_BSS, + SSEM_NOBITS, SF_ALLOC | SF_WRITE, 8, 0, 0, 0); obj_reserve_bss(ob, bss, 16, 8); obj_finalize(ob); @@ -1141,8 +1135,8 @@ static void verify_tls_section(const ObjBuilder* ob, Pool* p) { const Section* s = find_section_named(ob, p, ".tls$"); EXPECT(s != NULL, ".tls$ not present"); if (s) { - EXPECT((s->flags & SF_TLS) != 0, - ".tls$ missing SF_TLS (flags=0x%x)", s->flags); + EXPECT((s->flags & SF_TLS) != 0, ".tls$ missing SF_TLS (flags=0x%x)", + s->flags); } } @@ -1288,15 +1282,19 @@ static void test_short_import_amd64(void) { const uint32_t kSymLen = (uint32_t)(sizeof kSym - 1); const uint32_t kDllLen = (uint32_t)(sizeof kDll - 1); const uint32_t kDataLen = sizeof kSym + sizeof kDll; /* 12 + 13 = 25 */ - const size_t kTotal = 20 + kDataLen; /* 45 */ + const size_t kTotal = 20 + kDataLen; /* 45 */ uint8_t buf[64]; EXPECT(kTotal <= sizeof buf, "buf too small"); memset(buf, 0, kTotal); /* Header. */ - buf[0] = 0x00; buf[1] = 0x00; /* Sig1 = 0 */ - buf[2] = 0xFF; buf[3] = 0xFF; /* Sig2 = 0xFFFF */ - buf[4] = 0x00; buf[5] = 0x00; /* Version = 0 */ - buf[6] = 0x64; buf[7] = 0x86; /* Machine = AMD64 (0x8664) */ + buf[0] = 0x00; + buf[1] = 0x00; /* Sig1 = 0 */ + buf[2] = 0xFF; + buf[3] = 0xFF; /* Sig2 = 0xFFFF */ + buf[4] = 0x00; + buf[5] = 0x00; /* Version = 0 */ + buf[6] = 0x64; + buf[7] = 0x86; /* Machine = AMD64 (0x8664) */ /* TimeDateStamp = 0 (bytes 8..11 already 0). */ buf[12] = (uint8_t)(kDataLen & 0xFF); buf[13] = (uint8_t)((kDataLen >> 8) & 0xFF); @@ -1304,7 +1302,8 @@ static void test_short_import_amd64(void) { buf[15] = (uint8_t)((kDataLen >> 24) & 0xFF); /* OrdinalOrHint = 0 (16..17). */ /* TypeFlags = Type=CODE(0) | NameType=NAME(1)<<2 = 0x0004. */ - buf[18] = 0x04; buf[19] = 0x00; + buf[18] = 0x04; + buf[19] = 0x00; /* Body: symbol name NUL DLL name NUL. */ memcpy(buf + 20, kSym, sizeof kSym); memcpy(buf + 20 + sizeof kSym, kDll, sizeof kDll); @@ -1335,8 +1334,7 @@ static void test_short_import_amd64(void) { const ObjSym* s = obj_symbol_get(ob, imp_id); EXPECT(s->bind == SB_GLOBAL, "__imp_ bind=%u (want SB_GLOBAL)", s->bind); EXPECT(s->section_id == OBJ_SEC_NONE, - "__imp_ section_id=%u (want OBJ_SEC_NONE)", - (unsigned)s->section_id); + "__imp_ section_id=%u (want OBJ_SEC_NONE)", (unsigned)s->section_id); } Sym dll = 0; @@ -1344,7 +1342,8 @@ static void test_short_import_amd64(void) { EXPECT(got, "obj_get_coff_import_dll returned 0 (annotation missing)"); if (got) EXPECT(sym_eq_str(p, dll, kDll), "DLL name mismatch"); - (void)kSymLen; (void)kDllLen; + (void)kSymLen; + (void)kDllLen; obj_free(ob); cfree_compiler_free((CfreeCompiler*)c); } @@ -1400,4 +1399,3 @@ int main(void) { fprintf(stderr, "OK %zu tests\n", NTESTS); return 0; } - diff --git a/test/coff/pe-dso-forwarder.c b/test/coff/pe-dso-forwarder.c @@ -19,7 +19,7 @@ #include "core/core.h" #include "core/pool.h" -#include "obj/coff.h" +#include "obj/coff/coff.h" #include "obj/obj.h" /* ---- env vtables --------------------------------------------------- */ @@ -54,14 +54,14 @@ static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc, static CfreeDiagSink g_diag = {diag_emit, NULL, 0, 0}; static int g_failures; -#define EXPECT(cond, ...) \ - do { \ - if (!(cond)) { \ - fprintf(stderr, "FAIL %s:%d: ", __FILE__, __LINE__); \ - fprintf(stderr, __VA_ARGS__); \ - fputc('\n', stderr); \ - g_failures++; \ - } \ +#define EXPECT(cond, ...) \ + do { \ + if (!(cond)) { \ + fprintf(stderr, "FAIL %s:%d: ", __FILE__, __LINE__); \ + fprintf(stderr, __VA_ARGS__); \ + fputc('\n', stderr); \ + g_failures++; \ + } \ } while (0) /* ---- compiler ----------------------------------------------------- */ @@ -119,33 +119,33 @@ static void wr_u32(uint8_t* p, uint32_t v) { * including the forwarder target string so the reader classifies * "OTHERDLL.OtherSym" EAT entries as forwarders. */ -#define E_LFANEW 0x40u -#define FH_OFF (E_LFANEW + 4u) -#define OH_OFF (FH_OFF + COFF_FILE_HEADER_SIZE) -#define SH_OFF (OH_OFF + COFF_OPT_HDR64_SIZE) -#define RAW_OFF 0x170u -#define SEC_VA 0x1000u -#define SEC_RAW_SZ 0x200u -#define FILE_SIZE (RAW_OFF + SEC_RAW_SZ) +#define E_LFANEW 0x40u +#define FH_OFF (E_LFANEW + 4u) +#define OH_OFF (FH_OFF + COFF_FILE_HEADER_SIZE) +#define SH_OFF (OH_OFF + COFF_OPT_HDR64_SIZE) +#define RAW_OFF 0x170u +#define SEC_VA 0x1000u +#define SEC_RAW_SZ 0x200u +#define FILE_SIZE (RAW_OFF + SEC_RAW_SZ) /* In-section offsets (relative to RAW_OFF / RVA = SEC_VA + off). */ -#define EXP_DIR_OFF 0u -#define EAT_OFF (EXP_DIR_OFF + COFF_EXPORT_DIR_SIZE) /* +40 */ -#define EAT_COUNT 2u -#define ENT_OFF (EAT_OFF + EAT_COUNT * 4u) /* +48 */ -#define ENT_COUNT 2u -#define ORD_OFF (ENT_OFF + ENT_COUNT * 4u) /* +56 */ -#define DLLNAME_OFF (ORD_OFF + ENT_COUNT * 2u) /* +60 */ - -static const char kDllName[] = "TestDll.dll"; -static const char kDirect[] = "DirectFn"; +#define EXP_DIR_OFF 0u +#define EAT_OFF (EXP_DIR_OFF + COFF_EXPORT_DIR_SIZE) /* +40 */ +#define EAT_COUNT 2u +#define ENT_OFF (EAT_OFF + EAT_COUNT * 4u) /* +48 */ +#define ENT_COUNT 2u +#define ORD_OFF (ENT_OFF + ENT_COUNT * 4u) /* +56 */ +#define DLLNAME_OFF (ORD_OFF + ENT_COUNT * 2u) /* +60 */ + +static const char kDllName[] = "TestDll.dll"; +static const char kDirect[] = "DirectFn"; static const char kForwarded[] = "ForwardedFn"; static const char kForwardTarget[] = "OTHERDLL.OtherSym"; -#define DIRECT_NAME_OFF (DLLNAME_OFF + (uint32_t)sizeof kDllName) +#define DIRECT_NAME_OFF (DLLNAME_OFF + (uint32_t)sizeof kDllName) #define FORWARDED_NAME_OFF (DIRECT_NAME_OFF + (uint32_t)sizeof kDirect) -#define FORWARD_TGT_OFF (FORWARDED_NAME_OFF + (uint32_t)sizeof kForwarded) -#define EXP_DIR_END (FORWARD_TGT_OFF + (uint32_t)sizeof kForwardTarget) +#define FORWARD_TGT_OFF (FORWARDED_NAME_OFF + (uint32_t)sizeof kForwarded) +#define EXP_DIR_END (FORWARD_TGT_OFF + (uint32_t)sizeof kForwardTarget) /* Some RVA outside the export directory range — interpreted as a * direct export pointing into the (notional) code section. */ @@ -163,10 +163,10 @@ static void build_dso(uint8_t* buf) { /* IMAGE_FILE_HEADER. */ wr_u16(buf + FH_OFF + 0, IMAGE_FILE_MACHINE_AMD64); - wr_u16(buf + FH_OFF + 2, 1); /* NumberOfSections */ - wr_u32(buf + FH_OFF + 4, 0); /* TimeDateStamp */ - wr_u32(buf + FH_OFF + 8, 0); /* PointerToSymbolTable */ - wr_u32(buf + FH_OFF + 12, 0); /* NumberOfSymbols */ + wr_u16(buf + FH_OFF + 2, 1); /* NumberOfSections */ + wr_u32(buf + FH_OFF + 4, 0); /* TimeDateStamp */ + wr_u32(buf + FH_OFF + 8, 0); /* PointerToSymbolTable */ + wr_u32(buf + FH_OFF + 12, 0); /* NumberOfSymbols */ wr_u16(buf + FH_OFF + 16, COFF_OPT_HDR64_SIZE); wr_u16(buf + FH_OFF + 18, IMAGE_FILE_DLL); @@ -174,8 +174,8 @@ static void build_dso(uint8_t* buf) { * matter: Magic, and the export DataDirectory at index 0. */ wr_u16(buf + OH_OFF + 0, IMAGE_NT_OPTIONAL_HDR64_MAGIC); /* Data directories live at the tail of the optional header. */ - uint32_t dd_off = OH_OFF + COFF_OPT_HDR64_SIZE - - COFF_NUM_DATA_DIRECTORIES * COFF_DATA_DIRECTORY_SIZE; + uint32_t dd_off = OH_OFF + COFF_OPT_HDR64_SIZE - + COFF_NUM_DATA_DIRECTORIES * COFF_DATA_DIRECTORY_SIZE; uint32_t exp_rva = SEC_VA + EXP_DIR_OFF; uint32_t exp_size = EXP_DIR_END; wr_u32(buf + dd_off + IMAGE_DIRECTORY_ENTRY_EXPORT * 8u + 0, exp_rva); @@ -183,33 +183,33 @@ static void build_dso(uint8_t* buf) { /* One section header: ".edata". */ memcpy(buf + SH_OFF + 0, ".edata\0\0", 8); - wr_u32(buf + SH_OFF + 8, exp_size); /* VirtualSize */ - wr_u32(buf + SH_OFF + 12, SEC_VA); /* VirtualAddress */ - wr_u32(buf + SH_OFF + 16, SEC_RAW_SZ); /* SizeOfRawData */ - wr_u32(buf + SH_OFF + 20, RAW_OFF); /* PointerToRawData */ - wr_u32(buf + SH_OFF + 24, 0); /* PtrToRelocations */ - wr_u32(buf + SH_OFF + 28, 0); /* PtrToLinenumbers */ - wr_u16(buf + SH_OFF + 32, 0); /* NumberOfRelocations */ - wr_u16(buf + SH_OFF + 34, 0); /* NumberOfLinenumbers */ - wr_u32(buf + SH_OFF + 36, 0x40000040u); /* Characteristics: - INITIALIZED_DATA | - MEM_READ */ + wr_u32(buf + SH_OFF + 8, exp_size); /* VirtualSize */ + wr_u32(buf + SH_OFF + 12, SEC_VA); /* VirtualAddress */ + wr_u32(buf + SH_OFF + 16, SEC_RAW_SZ); /* SizeOfRawData */ + wr_u32(buf + SH_OFF + 20, RAW_OFF); /* PointerToRawData */ + wr_u32(buf + SH_OFF + 24, 0); /* PtrToRelocations */ + wr_u32(buf + SH_OFF + 28, 0); /* PtrToLinenumbers */ + wr_u16(buf + SH_OFF + 32, 0); /* NumberOfRelocations */ + wr_u16(buf + SH_OFF + 34, 0); /* NumberOfLinenumbers */ + wr_u32(buf + SH_OFF + 36, 0x40000040u); /* Characteristics: + INITIALIZED_DATA | + MEM_READ */ /* Section raw data — written via RAW_OFF + off. */ uint8_t* sec = buf + RAW_OFF; /* Export Directory header. */ - wr_u32(sec + EXP_DIR_OFF + 0, 0); /* Characteristics */ - wr_u32(sec + EXP_DIR_OFF + 4, 0); /* TimeDateStamp */ - wr_u16(sec + EXP_DIR_OFF + 8, 0); /* MajorVersion */ - wr_u16(sec + EXP_DIR_OFF + 10, 0); /* MinorVersion */ - wr_u32(sec + EXP_DIR_OFF + 12, SEC_VA + DLLNAME_OFF); /* Name */ - wr_u32(sec + EXP_DIR_OFF + 16, 1); /* Base */ - wr_u32(sec + EXP_DIR_OFF + 20, EAT_COUNT); /* NumberOfFunctions */ - wr_u32(sec + EXP_DIR_OFF + 24, ENT_COUNT); /* NumberOfNames */ - wr_u32(sec + EXP_DIR_OFF + 28, SEC_VA + EAT_OFF); /* AddressOfFunctions */ - wr_u32(sec + EXP_DIR_OFF + 32, SEC_VA + ENT_OFF); /* AddressOfNames */ - wr_u32(sec + EXP_DIR_OFF + 36, SEC_VA + ORD_OFF); /* AddressOfNameOrds */ + wr_u32(sec + EXP_DIR_OFF + 0, 0); /* Characteristics */ + wr_u32(sec + EXP_DIR_OFF + 4, 0); /* TimeDateStamp */ + wr_u16(sec + EXP_DIR_OFF + 8, 0); /* MajorVersion */ + wr_u16(sec + EXP_DIR_OFF + 10, 0); /* MinorVersion */ + wr_u32(sec + EXP_DIR_OFF + 12, SEC_VA + DLLNAME_OFF); /* Name */ + wr_u32(sec + EXP_DIR_OFF + 16, 1); /* Base */ + wr_u32(sec + EXP_DIR_OFF + 20, EAT_COUNT); /* NumberOfFunctions */ + wr_u32(sec + EXP_DIR_OFF + 24, ENT_COUNT); /* NumberOfNames */ + wr_u32(sec + EXP_DIR_OFF + 28, SEC_VA + EAT_OFF); /* AddressOfFunctions */ + wr_u32(sec + EXP_DIR_OFF + 32, SEC_VA + ENT_OFF); /* AddressOfNames */ + wr_u32(sec + EXP_DIR_OFF + 36, SEC_VA + ORD_OFF); /* AddressOfNameOrds */ /* EAT: index 0 = direct (outside export-dir range); * index 1 = forwarder (inside export-dir range, pointing at diff --git a/test/elf/unit/smoke.c b/test/elf/unit/smoke.c @@ -9,7 +9,7 @@ * * and checks that the readback produces the same shape (modulo * synthesized STT_SECTION symbols and section ordering — the equivalence - * the read_elf comment in src/obj/elf_read.c documents). + * the read_elf comment in src/obj/elf/read.c documents). * * Exit 0 = pass; non-zero = fail (with a one-line stderr explanation). */ diff --git a/test/emu/rv64_extras_test.c b/test/emu/rv64_extras_test.c @@ -27,7 +27,7 @@ #include "core/core.h" #include "emu/emu.h" #include "emu/rv64_ops.h" -#include "obj/elf.h" +#include "obj/elf/elf.h" /* Loader side-channel — declared in elf_load.c. */ int emu_load_elf_attach(EmuCPUState*, const EmuLoadedImage*); @@ -191,8 +191,10 @@ static void decode_rvc(void) { EmuInst insts[8]; unsigned char buf[16]; u32 n; - buf[0] = 0x15; buf[1] = 0x45; /* C.LI a0, 5 */ - buf[2] = 0x05; buf[3] = 0x05; /* C.ADDI a0, 1 */ + buf[0] = 0x15; + buf[1] = 0x45; /* C.LI a0, 5 */ + buf[2] = 0x05; + buf[3] = 0x05; /* C.ADDI a0, 1 */ ((u32*)(buf + 4))[0] = rv_ecall(); n = emu_decode_block(CFREE_EMU_ARCH_RISCV64, buf, 0x10000, insts, 8); EXPECT(n >= 3u, "RVC decode block returned %u insts", n); @@ -201,8 +203,8 @@ static void decode_rvc(void) { "RVC c.li -> addi a0, x0, 5 (got op=%u rd=%u imm=%lld)", (unsigned)insts[0].op, (unsigned)insts[0].operands[0], (long long)(i64)insts[0].operands[3]); - EXPECT(insts[0].guest_bytes == 2u, - "RVC insn must advance PC by 2, got %u", insts[0].guest_bytes); + EXPECT(insts[0].guest_bytes == 2u, "RVC insn must advance PC by 2, got %u", + insts[0].guest_bytes); EXPECT(insts[1].op == RV64_OP_ADDI && (u32)insts[1].operands[0] == 10u && (i64)insts[1].operands[3] == 1, "RVC c.addi -> addi a0, a0, 1"); @@ -237,27 +239,31 @@ static unsigned char* build_fp_elf(size_t* out_len) { /* Instruction stream: 7 insns = 28 bytes. */ u32 prog[16]; size_t prog_n = 0; - prog[prog_n++] = rv_addi(10, 0, 42); /* a0 = 42 */ - prog[prog_n++] = enc_fcvt_s_w(0, 10); /* ft0 = (float)a0 */ - prog[prog_n++] = enc_fmv_x_w(11, 0); /* a1 = bits(ft0) */ - prog[prog_n++] = enc_csrrs(12, 0x003, 0); /* a2 = fcsr */ - prog[prog_n++] = rv_addi(10, 0, 0); /* a0 = 0 (exit code) */ - prog[prog_n++] = rv_addi(17, 0, 94); /* a7 = SYS_exit_group */ - prog[prog_n++] = rv_ecall(); /* ecall */ + prog[prog_n++] = rv_addi(10, 0, 42); /* a0 = 42 */ + prog[prog_n++] = enc_fcvt_s_w(0, 10); /* ft0 = (float)a0 */ + prog[prog_n++] = enc_fmv_x_w(11, 0); /* a1 = bits(ft0) */ + prog[prog_n++] = enc_csrrs(12, 0x003, 0); /* a2 = fcsr */ + prog[prog_n++] = rv_addi(10, 0, 0); /* a0 = 0 (exit code) */ + prog[prog_n++] = rv_addi(17, 0, 94); /* a7 = SYS_exit_group */ + prog[prog_n++] = rv_ecall(); /* ecall */ size_t prog_bytes = prog_n * 4u; size_t total = TEXT_OFF + prog_bytes; unsigned char* b = (unsigned char*)calloc(1, total); if (!b) return NULL; - b[EI_MAG0] = ELFMAG0; b[EI_MAG1] = ELFMAG1; - b[EI_MAG2] = ELFMAG2; b[EI_MAG3] = ELFMAG3; + b[EI_MAG0] = ELFMAG0; + b[EI_MAG1] = ELFMAG1; + b[EI_MAG2] = ELFMAG2; + b[EI_MAG3] = ELFMAG3; b[EI_CLASS] = ELFCLASS64; b[EI_DATA] = ELFDATA2LSB; b[EI_VERSION] = EV_CURRENT; /* e_type=ET_EXEC, e_machine=EM_RISCV, e_entry, e_phoff, ... */ - unsigned* p32; unsigned long long* p64; + unsigned* p32; + unsigned long long* p64; /* Use the same put helpers idiom from smoke_test: open-code them. */ - b[16] = ET_EXEC; b[17] = 0; + b[16] = ET_EXEC; + b[17] = 0; b[18] = (unsigned char)EM_RISCV; b[19] = (unsigned char)(EM_RISCV >> 8); b[20] = EV_CURRENT; @@ -265,19 +271,25 @@ static unsigned char* build_fp_elf(size_t* out_len) { unsigned long long ent = BASE_VA + TEXT_OFF; for (int i = 0; i < 8; ++i) b[24 + i] = (unsigned char)(ent >> (8 * i)); /* e_phoff = 64 */ - b[32] = 64; for (int i = 1; i < 8; ++i) b[32 + i] = 0; + b[32] = 64; + for (int i = 1; i < 8; ++i) b[32 + i] = 0; /* e_ehsize=64, e_phentsize=56, e_phnum=1 */ - b[52] = ELF64_EHDR_SIZE; b[53] = 0; - b[54] = ELF64_PHDR_SIZE; b[55] = 0; - b[56] = 1; b[57] = 0; + b[52] = ELF64_EHDR_SIZE; + b[53] = 0; + b[54] = ELF64_PHDR_SIZE; + b[55] = 0; + b[56] = 1; + b[57] = 0; /* PT_LOAD covering [0, total) at VA BASE_VA. */ b[64] = PT_LOAD; /* p_type lo */ b[64 + 4] = (unsigned char)(PF_R | PF_X); /* p_offset = 0; p_vaddr = BASE_VA; p_paddr = BASE_VA; p_filesz = total; * p_memsz = total; p_align = PAGE. */ - for (int i = 0; i < 8; ++i) b[64 + 16 + i] = (unsigned char)(BASE_VA >> (8 * i)); - for (int i = 0; i < 8; ++i) b[64 + 24 + i] = (unsigned char)(BASE_VA >> (8 * i)); + for (int i = 0; i < 8; ++i) + b[64 + 16 + i] = (unsigned char)(BASE_VA >> (8 * i)); + for (int i = 0; i < 8; ++i) + b[64 + 24 + i] = (unsigned char)(BASE_VA >> (8 * i)); unsigned long long tot = total; for (int i = 0; i < 8; ++i) b[64 + 32 + i] = (unsigned char)(tot >> (8 * i)); for (int i = 0; i < 8; ++i) b[64 + 40 + i] = (unsigned char)(tot >> (8 * i)); @@ -286,7 +298,8 @@ static unsigned char* build_fp_elf(size_t* out_len) { /* Copy the program bytes at file offset TEXT_OFF. */ memcpy(b + TEXT_OFF, prog, prog_bytes); - (void)p32; (void)p64; + (void)p32; + (void)p64; *out_len = total; return b; } @@ -307,10 +320,14 @@ static void fp_csr_interp(void) { if (!elf) return; memset(&img, 0, sizeof img); - int rc = emu_load_elf(cc, CFREE_EMU_ARCH_RISCV64, elf, elf_len, NULL, NULL, - &img); + int rc = + emu_load_elf(cc, CFREE_EMU_ARCH_RISCV64, elf, elf_len, NULL, NULL, &img); EXPECT(rc == 0, "emu_load_elf rc=%d", rc); - if (rc != 0) { free(elf); cfree_compiler_free(c); return; } + if (rc != 0) { + free(elf); + cfree_compiler_free(c); + return; + } cpu = emu_cpu_new(cc, CFREE_EMU_ARCH_RISCV64, img.entry_pc, img.initial_sp); EXPECT(cpu != NULL, "cpu_new"); @@ -320,9 +337,15 @@ static void fp_csr_interp(void) { for (steps = 0; steps < 64u; ++steps) { u64 pc = emu_cpu_pc(cpu); unsigned char* p = emu_cpu_va_to_host_pub(cpu, pc, 4); - if (!p) { EXPECT(0, "PC OOB"); break; } + if (!p) { + EXPECT(0, "PC OOB"); + break; + } n = emu_decode_block(CFREE_EMU_ARCH_RISCV64, p, pc, insts, 16); - if (n == 0) { EXPECT(0, "decode 0"); break; } + if (n == 0) { + EXPECT(0, "decode 0"); + break; + } emu_cpu_interp_block(cpu, insts, n); if (emu_cpu_trap_reason(cpu) != EMU_TRAP_NONE) break; } @@ -334,8 +357,7 @@ static void fp_csr_interp(void) { EXPECT(emu_cpu_xreg(cpu, 11) == 0x42280000ull, "a1 should hold bits of (float)42 = 0x42280000, got 0x%llx", (unsigned long long)emu_cpu_xreg(cpu, 11)); - EXPECT(emu_cpu_xreg(cpu, 12) == 0, - "a2 fcsr starts at 0, got 0x%llx", + EXPECT(emu_cpu_xreg(cpu, 12) == 0, "a2 fcsr starts at 0, got 0x%llx", (unsigned long long)emu_cpu_xreg(cpu, 12)); emu_cpu_free(cpu); @@ -404,12 +426,15 @@ static unsigned char* build_minimal_interp_elf(size_t* out_len) { size_t total = TEXT_OFF + 16; unsigned char* b = (unsigned char*)calloc(1, total); if (!b) return NULL; - b[EI_MAG0] = ELFMAG0; b[EI_MAG1] = ELFMAG1; - b[EI_MAG2] = ELFMAG2; b[EI_MAG3] = ELFMAG3; + b[EI_MAG0] = ELFMAG0; + b[EI_MAG1] = ELFMAG1; + b[EI_MAG2] = ELFMAG2; + b[EI_MAG3] = ELFMAG3; b[EI_CLASS] = ELFCLASS64; b[EI_DATA] = ELFDATA2LSB; b[EI_VERSION] = EV_CURRENT; - b[16] = ET_DYN; b[17] = 0; + b[16] = ET_DYN; + b[17] = 0; b[18] = (unsigned char)EM_RISCV; b[19] = (unsigned char)(EM_RISCV >> 8); b[20] = EV_CURRENT; @@ -457,12 +482,15 @@ static unsigned char* build_program_with_interp(size_t* out_len) { size_t total = interp_off + sizeof(interp_path) + 0x100; unsigned char* b = (unsigned char*)calloc(1, total); if (!b) return NULL; - b[EI_MAG0] = ELFMAG0; b[EI_MAG1] = ELFMAG1; - b[EI_MAG2] = ELFMAG2; b[EI_MAG3] = ELFMAG3; + b[EI_MAG0] = ELFMAG0; + b[EI_MAG1] = ELFMAG1; + b[EI_MAG2] = ELFMAG2; + b[EI_MAG3] = ELFMAG3; b[EI_CLASS] = ELFCLASS64; b[EI_DATA] = ELFDATA2LSB; b[EI_VERSION] = EV_CURRENT; - b[16] = ET_EXEC; b[17] = 0; + b[16] = ET_EXEC; + b[17] = 0; b[18] = (unsigned char)EM_RISCV; b[19] = (unsigned char)(EM_RISCV >> 8); b[20] = EV_CURRENT; @@ -475,8 +503,10 @@ static unsigned char* build_program_with_interp(size_t* out_len) { /* PT_LOAD covering [0, total) at VA BASE_VA. */ b[64] = PT_LOAD; b[64 + 4] = (unsigned char)(PF_R | PF_X); - for (int i = 0; i < 8; ++i) b[64 + 16 + i] = (unsigned char)(BASE_VA >> (8 * i)); - for (int i = 0; i < 8; ++i) b[64 + 24 + i] = (unsigned char)(BASE_VA >> (8 * i)); + for (int i = 0; i < 8; ++i) + b[64 + 16 + i] = (unsigned char)(BASE_VA >> (8 * i)); + for (int i = 0; i < 8; ++i) + b[64 + 24 + i] = (unsigned char)(BASE_VA >> (8 * i)); unsigned long long tot = total; for (int i = 0; i < 8; ++i) b[64 + 32 + i] = (unsigned char)(tot >> (8 * i)); for (int i = 0; i < 8; ++i) b[64 + 40 + i] = (unsigned char)(tot >> (8 * i)); @@ -488,8 +518,10 @@ static unsigned char* build_program_with_interp(size_t* out_len) { unsigned long long ioff = interp_off; for (int i = 0; i < 8; ++i) b[ph2 + 8 + i] = (unsigned char)(ioff >> (8 * i)); unsigned long long ilen = sizeof(interp_path); - for (int i = 0; i < 8; ++i) b[ph2 + 32 + i] = (unsigned char)(ilen >> (8 * i)); - for (int i = 0; i < 8; ++i) b[ph2 + 40 + i] = (unsigned char)(ilen >> (8 * i)); + for (int i = 0; i < 8; ++i) + b[ph2 + 32 + i] = (unsigned char)(ilen >> (8 * i)); + for (int i = 0; i < 8; ++i) + b[ph2 + 40 + i] = (unsigned char)(ilen >> (8 * i)); /* Program text: exit(42). */ u32 i0 = rv_addi(10, 0, 42); u32 i1 = rv_addi(17, 0, 94); @@ -511,7 +543,10 @@ static void pt_interp_handoff(void) { unsigned char* prog = build_program_with_interp(&prog_len); EXPECT(interp && prog, "buffer alloc"); if (!interp || !prog) { - free(interp); free(prog); cfree_compiler_free(c); return; + free(interp); + free(prog); + cfree_compiler_free(c); + return; } /* Stage the interpreter bytes; loader consumes them on the next @@ -524,7 +559,10 @@ static void pt_interp_handoff(void) { &img); EXPECT(rc == 0, "emu_load_elf with PT_INTERP rc=%d", rc); if (rc != 0) { - free(interp); free(prog); cfree_compiler_free(c); return; + free(interp); + free(prog); + cfree_compiler_free(c); + return; } /* entry_pc should be the interpreter's entry (which we placed past * the program). The program's BASE_VA is 0x40000, so the interpreter @@ -552,8 +590,7 @@ static void pt_interp_handoff(void) { EXPECT(emu_cpu_trap_reason(cpu) == EMU_TRAP_EXIT, "interp exited via EMU_TRAP_EXIT"); EXPECT(emu_cpu_exit_code(cpu) == 99, - "interp exit code 99 (= a0 at exit), got %d", - emu_cpu_exit_code(cpu)); + "interp exit code 99 (= a0 at exit), got %d", emu_cpu_exit_code(cpu)); emu_cpu_free(cpu); emu_unload_image(cc, &img); diff --git a/test/emu/rv64_smoke_test.c b/test/emu/rv64_smoke_test.c @@ -31,7 +31,7 @@ #include "core/core.h" #include "emu/emu.h" #include "emu/rv64_ops.h" -#include "obj/elf.h" +#include "obj/elf/elf.h" /* The loader exposes emu_load_elf_attach via a forward decl since the * locked include/cfree/emu.h does not expose it. cpu.c exports the @@ -155,29 +155,29 @@ static unsigned char* build_minimal_elf(size_t* out_len) { b[EI_DATA] = ELFDATA2LSB; b[EI_VERSION] = EV_CURRENT; b[EI_OSABI] = ELFOSABI_NONE; - put16(b, 16, ET_EXEC); /* e_type */ - put16(b, 18, EM_RISCV); /* e_machine */ - put32(b, 20, EV_CURRENT); /* e_version */ - put64(b, 24, BASE_VA + TEXT_OFF); /* e_entry */ - put64(b, 32, 64); /* e_phoff */ - put64(b, 40, 0); /* e_shoff (none) */ - put32(b, 48, 0); /* e_flags */ - put16(b, 52, ELF64_EHDR_SIZE); /* e_ehsize */ - put16(b, 54, ELF64_PHDR_SIZE); /* e_phentsize */ - put16(b, 56, 1); /* e_phnum */ - put16(b, 58, 0); /* e_shentsize */ - put16(b, 60, 0); /* e_shnum */ - put16(b, 62, 0); /* e_shstrndx */ + put16(b, 16, ET_EXEC); /* e_type */ + put16(b, 18, EM_RISCV); /* e_machine */ + put32(b, 20, EV_CURRENT); /* e_version */ + put64(b, 24, BASE_VA + TEXT_OFF); /* e_entry */ + put64(b, 32, 64); /* e_phoff */ + put64(b, 40, 0); /* e_shoff (none) */ + put32(b, 48, 0); /* e_flags */ + put16(b, 52, ELF64_EHDR_SIZE); /* e_ehsize */ + put16(b, 54, ELF64_PHDR_SIZE); /* e_phentsize */ + put16(b, 56, 1); /* e_phnum */ + put16(b, 58, 0); /* e_shentsize */ + put16(b, 60, 0); /* e_shnum */ + put16(b, 62, 0); /* e_shstrndx */ /* PT_LOAD phdr — 56 bytes at offset 64. */ - put32(b, 64 + 0, PT_LOAD); /* p_type */ - put32(b, 64 + 4, PF_R | PF_X); /* p_flags */ - put64(b, 64 + 8, 0); /* p_offset */ - put64(b, 64 + 16, BASE_VA); /* p_vaddr */ - put64(b, 64 + 24, BASE_VA); /* p_paddr */ - put64(b, 64 + 32, total); /* p_filesz */ - put64(b, 64 + 40, total); /* p_memsz */ - put64(b, 64 + 48, PAGE); /* p_align */ + put32(b, 64 + 0, PT_LOAD); /* p_type */ + put32(b, 64 + 4, PF_R | PF_X); /* p_flags */ + put64(b, 64 + 8, 0); /* p_offset */ + put64(b, 64 + 16, BASE_VA); /* p_vaddr */ + put64(b, 64 + 24, BASE_VA); /* p_paddr */ + put64(b, 64 + 32, total); /* p_filesz */ + put64(b, 64 + 40, total); /* p_memsz */ + put64(b, 64 + 48, PAGE); /* p_align */ /* .text: addi a0,zero,42 ; addi a7,zero,94 ; ecall */ put32(b, TEXT_OFF + 0, rv_addi(RV_A0, RV_ZERO, 42)); @@ -208,8 +208,8 @@ static void decoder_smoke(void) { EXPECT((i64)insts[0].operands[3] == 42, "imm should be 42"); EXPECT(insts[1].op == RV64_OP_ADDI, "second insn must be ADDI"); EXPECT((i64)insts[1].operands[3] == 94, "imm should be 94"); - EXPECT(insts[2].op == RV64_OP_ECALL, - "third insn must be ECALL, got %u", insts[2].op); + EXPECT(insts[2].op == RV64_OP_ECALL, "third insn must be ECALL, got %u", + insts[2].op); EXPECT(insts[2].flags & RV64_INST_FLAG_TERMINATOR, "ECALL must be marked terminator"); /* The block stops at ECALL; the ADD at offset 12 should not have @@ -274,8 +274,7 @@ static void interp_smoke(void) { if (emu_cpu_trap_reason(cpu) != EMU_TRAP_NONE) break; } EXPECT(emu_cpu_trap_reason(cpu) == EMU_TRAP_EXIT, - "expected EMU_TRAP_EXIT, got %u", - (unsigned)emu_cpu_trap_reason(cpu)); + "expected EMU_TRAP_EXIT, got %u", (unsigned)emu_cpu_trap_reason(cpu)); exit_code = emu_cpu_exit_code(cpu); EXPECT(exit_code == 42, "exit_code should be 42, got %d", exit_code);