commit d0785fcb8283dadb957a8afb00dd246b4f59e461
parent b56c3ece7c3472325f2596146c84be3b61cf2079
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 25 May 2026 03:45:32 -0700
Refactor object formats into gated directories
Diffstat:
66 files changed, 12893 insertions(+), 12751 deletions(-)
diff --git a/Makefile b/Makefile
@@ -33,13 +33,20 @@ LIB_SRCS_NONARCH = $(shell find src -name '*.c' \
-not -path 'src/arch/aa64/*' \
-not -path 'src/arch/x64/*' \
-not -path 'src/arch/rv64/*' \
- -not -path 'src/arch/c_target/*')
+ -not -path 'src/arch/c_target/*' \
+ -not -path 'src/obj/elf/*' \
+ -not -path 'src/obj/macho/*' \
+ -not -path 'src/obj/coff/*')
LIB_SRCS_ARCH_AA64 = $(shell find src/arch/aa64 -name '*.c' 2>/dev/null)
LIB_SRCS_ARCH_X64 = $(shell find src/arch/x64 -name '*.c' 2>/dev/null)
LIB_SRCS_ARCH_RV64 = $(shell find src/arch/rv64 -name '*.c' 2>/dev/null)
LIB_SRCS_ARCH_C_TARGET = $(shell find src/arch/c_target -name '*.c' 2>/dev/null)
+LIB_SRCS_OBJ_ELF = $(shell find src/obj/elf -name '*.c' 2>/dev/null)
+LIB_SRCS_OBJ_MACHO = $(shell find src/obj/macho -name '*.c' 2>/dev/null)
+LIB_SRCS_OBJ_COFF = $(shell find src/obj/coff -name '*.c' 2>/dev/null)
+
LIB_SRCS = $(LIB_SRCS_NONARCH)
ifeq ($(CFREE_ARCH_AA64_ENABLED),1)
LIB_SRCS += $(LIB_SRCS_ARCH_AA64)
@@ -53,6 +60,15 @@ endif
ifeq ($(CFREE_ARCH_C_TARGET_ENABLED),1)
LIB_SRCS += $(LIB_SRCS_ARCH_C_TARGET)
endif
+ifeq ($(CFREE_OBJ_ELF_ENABLED),1)
+LIB_SRCS += $(LIB_SRCS_OBJ_ELF)
+endif
+ifeq ($(CFREE_OBJ_MACHO_ENABLED),1)
+LIB_SRCS += $(LIB_SRCS_OBJ_MACHO)
+endif
+ifeq ($(CFREE_OBJ_COFF_ENABLED),1)
+LIB_SRCS += $(LIB_SRCS_OBJ_COFF)
+endif
# Per-frontend source sets. Each is gated by its CFREE_LANG_*_ENABLED flag
# from mk/config.mk so the matching `#if` in src/api/lang_registry.c and
diff --git a/doc/REGISTRY.md b/doc/REGISTRY.md
@@ -33,59 +33,49 @@ Hand-edited today; a future `configure` script can generate it.
#define CFREE_CONFIG_H
/* Backend architectures. */
-#define CFREE_ARCH_AA64 1
-#define CFREE_ARCH_X64 1
-#define CFREE_ARCH_RV64 1
-#define CFREE_ARCH_C_TARGET 0
+#define CFREE_ARCH_AA64_ENABLED 1
+#define CFREE_ARCH_X64_ENABLED 1
+#define CFREE_ARCH_RV64_ENABLED 1
+#define CFREE_ARCH_C_TARGET_ENABLED 1
-/* Object/image formats. Each gates emit + read + link-image paths and
- * the matching arch-side reloc tables. */
-#define CFREE_OBJ_ELF 1
-#define CFREE_OBJ_MACHO 1
-#define CFREE_OBJ_COFF 1
+/* Object/image formats. */
+#define CFREE_OBJ_ELF_ENABLED 1
+#define CFREE_OBJ_MACHO_ENABLED 1
+#define CFREE_OBJ_COFF_ENABLED 1
/* Language frontends. CFREE_LANG_ASM is unconditional: the assembler
* lives inside libcfree as part of the codegen substrate. */
-#define CFREE_LANG_C_ENABLED 1
-#define CFREE_LANG_TOY_ENABLED 1
-#define CFREE_LANG_WASM_ENABLED 1
+#define CFREE_LANG_CPP_ENABLED 1
+#define CFREE_LANG_C_ENABLED 1
+#define CFREE_LANG_TOY_ENABLED 1
+#define CFREE_LANG_WASM_ENABLED 1
#endif
```
-A `config.c` adds `_Static_assert` checks that at least one arch and at
-least one obj format are enabled.
+`src/core/config_assert.c` adds `_Static_assert` checks that at least
+one arch and at least one obj format are enabled.
## Axis 1: Backend architectures
-**Status: vtable and registry already exist; only gating is new.**
+**Status: vtable, registry, and source gating are done.**
-- **Vtable**: `ArchImpl` (`src/arch/arch.h:1098`). Carries sub-pointers
- for the format-specific reloc tables (`elf`, `macho`, `coff`),
- `dwarf`/`dbg` hooks, the `link` arch descriptor, the per-OS
- `abi_vtable` dispatcher, and the codegen/assembler/disassembler
- factories.
+- **Vtable**: `ArchImpl` (`src/arch/arch.h`). Carries `dwarf`/`dbg`
+ hooks, the `link` arch descriptor, the per-OS `abi_vtable`
+ dispatcher, and the codegen/assembler/disassembler factories.
- **Registry**: `src/arch/registry.c` already holds a static
- `arch_impls[]` array and exposes `arch_lookup`,
- `arch_lookup_elf_machine`, `arch_lookup_macho_cputype`,
- `arch_lookup_coff_machine`.
-- **Change**: wrap each entry (and its `extern const ArchImpl`
- declaration) with `#if CFREE_ARCH_<NAME>`. The `Makefile` switches
- from globbed arch sources to per-arch object groups gated by the same
- flags, so disabled archs are neither compiled nor linked.
-
-No new vtable or registry code; this axis is the cheapest of the four.
+ `arch_impls[]` array and exposes `arch_lookup`.
+- **Build**: each `src/arch/<name>/` source group is gated by the
+ matching `CFREE_ARCH_*_ENABLED` flag.
## Axis 2: Object/image formats
-**Status: vtable and registry exist; source gating remains.**
+**Status: vtable, registry, format-arch ops, and directory-based source
+gating are done.**
-`emit_elf` / `emit_macho` / `emit_coff` are still implemented as
-format-specific functions (`src/obj/{elf,macho,coff}_emit.c`), as are
-the read paths (`*_read.c`), DSO readers (`*_read_dso.c`), and
-link-image emitters (`src/link/link_{elf,macho,coff}.c`). Generic call
-sites now reach them through `ObjFormatImpl` in `src/obj/format.h` and
-`src/obj/registry.c`.
+`emit_elf` / `emit_macho` / `emit_coff` and their read/link paths live
+under `src/obj/{elf,macho,coff}/`. Generic call sites reach them through
+`ObjFormatImpl` in `src/obj/format.h` and `src/obj/registry.c`.
**Vtable** (`src/obj/format.h`):
@@ -108,6 +98,16 @@ typedef struct ObjFormatImpl {
/* Link-image emit (executable / shared object). */
void (*link_emit)(LinkImage*, Writer*);
+ void (*layout_dyn)(Linker*, LinkImage*);
+ void (*free_dyn)(LinkImage*);
+
+ /* Format-owned arch mappings and relocation wire encoders. */
+ const ObjElfArchOps* (*elf_arch)(CfreeArchKind);
+ const ObjElfArchOps* (*elf_machine)(u32 e_machine);
+ const ObjMachoArchOps* (*macho_arch)(CfreeArchKind);
+ const ObjMachoArchOps* (*macho_cputype)(u32 cputype);
+ const ObjCoffArchOps* (*coff_arch)(CfreeArchKind);
+ const ObjCoffArchOps* (*coff_machine)(u16 machine);
/* Optional format-specific linker input policy. */
int (*classify_obj_input)(Compiler*, ObjBuilder*, Sym* soname_out);
@@ -142,13 +142,14 @@ static const ObjFormatImpl* const obj_format_impls[] = {
**Call-site changes**: the switch in `src/link/link.c` and the obj
emit/read entry points have collapsed to
`obj_format_lookup(target.obj_format)->fn(...)`. COFF short-import and
-long-form import-archive handling now live behind object-format hooks,
-so linker input ingestion stays generic over the object format.
+long-form import-archive handling, ELF dynamic layout, and
+(arch × format) relocation wire metadata now live behind object-format
+hooks, so the linker and object entry points stay generic over the
+object format implementation.
-The arch-side format reloc tables (`ArchElfOps`, `ArchMachoOps`,
-`ArchCoffOps` on `ArchImpl`) stay where they are — they're the
-(arch × format) intersection and naturally drop out when either side
-isn't compiled in.
+**Build**: `Makefile` excludes `src/obj/elf/`, `src/obj/macho/`, and
+`src/obj/coff/` from the shared source glob, then adds each directory
+back only when the matching `CFREE_OBJ_*_ENABLED` flag is enabled.
## Axis 3: ABIs (derived)
@@ -209,27 +210,24 @@ isn't compiled in.
## Summary
-| Axis | Vtable | Registry | Net new work |
-|-------------|-------------------------------------------------|----------------------------------------------|-----------------------------------------------------------|
-| Arch | `ArchImpl` (exists) | `src/arch/registry.c` (exists) | `#if CFREE_ARCH_*` gates |
-| Obj format | `ObjFormatImpl` (new) | `src/obj/registry.c` (new) | Extract `emit_*` / `read_*` / `link_emit_*` behind vtable |
-| ABI | `ABIVtable` (exists) | per-arch `abi_dispatch` (exists) | Gate per-OS dispatch entries by obj-format flag |
-| Frontend | `CfreeFrontendVTable` (exists) | `src/api/lang_registry.c` (new) | Per-frontend vtable extern + folded into `libcfree.a` |
+| Axis | Vtable | Registry | Remaining work |
+|------------|-----------------------|---------------------------|-------------------------------------------------|
+| Arch | `ArchImpl` | `src/arch/registry.c` | none for registry/source gating |
+| Obj format | `ObjFormatImpl` | `src/obj/registry.c` | move small policy checks into vtable as needed |
+| ABI | `ABIVtable` | per-arch `abi_dispatch` | gate per-OS dispatch entries by obj-format flag |
+| Frontend | `CfreeFrontendVTable` | `src/api/lang_registry.c` | none for registry/source gating |
## Implementation order
-1. Land `include/cfree/config.h` with all flags set to `1` (no behavior
- change). Add `_Static_assert` minimums in `config.c`.
-2. Gate arch registry entries and arch Makefile sources. Verify with a
- build that flips one arch off.
-3. Extract `ObjFormatImpl` and the obj registry. Replace the switches
- in `link.c` and the obj entry points. Verify a build with one obj
- format off.
-4. Gate per-ABI sources and per-OS dispatch entries.
-5. Add `src/api/lang_registry.c`, expose `cfree_<lang>_frontend_vtable`
- externs, fold `lang/<name>/*.c` into `libcfree.a` gated by
- `CFREE_LANG_<NAME>_ENABLED`, and drop host-side registration calls
- from `driver/env.c`.
+1. Done: land `include/cfree/config.h` and `mk/config.mk` with all flags
+ enabled by default.
+2. Done: gate arch registry entries and arch Makefile sources.
+3. Done: extract `ObjFormatImpl`, move format code under
+ `src/obj/{elf,macho,coff}/`, and gate those directories.
+4. Remaining: gate per-ABI sources and per-OS dispatch entries.
+5. Done: add `src/api/lang_registry.c`, expose
+ `cfree_<lang>_frontend_vtable` externs, and fold frontends into
+ `libcfree.a`.
Each step is independently testable and leaves the build green with the
default all-on configuration.
diff --git a/src/api/stubs.c b/src/api/stubs.c
@@ -11,7 +11,7 @@ static _Noreturn void unimplemented(Compiler* c, const char* what) {
}
/* WASM emit/read remain stubs until those writers/readers land.
- * COFF emit/read are implemented in src/obj/coff_emit.c and coff_read.c. */
+ * COFF emit/read are implemented under src/obj/coff/. */
void emit_wasm(Compiler* c, ObjBuilder* o, Writer* w) {
(void)o;
diff --git a/src/arch/aa64/arch.c b/src/arch/aa64/arch.c
@@ -8,9 +8,6 @@
#include "arch/aa64/regs.h"
#include "core/bytes.h"
#include "link/link_arch.h"
-#include "obj/coff.h"
-#include "obj/elf.h"
-#include "obj/macho.h"
#include "obj/obj.h"
extern const LinkArchDesc link_arch_aa64;
@@ -45,33 +42,11 @@ static void aa64_wr_u64_target(Compiler* c, u8* p, u64 v) {
}
}
-static const ArchElfOps aa64_elf_ops = {
- .e_machine = EM_AARCH64,
- .e_flags = 0,
- .reloc_to = elf_aarch64_reloc_to,
- .reloc_from = elf_aarch64_reloc_from,
-};
-
-static const ArchMachoOps aa64_macho_ops = {
- .cputype = CPU_TYPE_ARM64,
- .cpusubtype = CPU_SUBTYPE_ARM64_ALL,
- .reloc_to = macho_aarch64_reloc_to,
- .reloc_pcrel = macho_aarch64_reloc_pcrel,
- .reloc_length = macho_aarch64_reloc_length,
- .reloc_from = macho_aarch64_reloc_from,
-};
-
static const ArchDwarfOps aa64_dwarf_ops = {
.min_inst_len = 4u,
.max_ops_per_inst = 1u,
};
-static const ArchCoffOps aa64_coff_ops = {
- .machine = IMAGE_FILE_MACHINE_ARM64,
- .reloc_to = coff_aarch64_reloc_to,
- .reloc_from = coff_aarch64_reloc_from,
-};
-
static int aa64_apply_label_fixup(Compiler* c, const ArchLabelFixup* fx) {
const Section* s;
u8 cur[4];
@@ -110,8 +85,8 @@ static int aa64_apply_label_fixup(Compiler* c, const ArchLabelFixup* fx) {
u32 imm21 = (u32)(fx->disp & 0x1fffffu);
u32 immlo = imm21 & 0x3u;
u32 immhi = (imm21 >> 2) & 0x7ffffu;
- word = (word & ~((0x3u << 29) | (0x7ffffu << 5))) |
- (immlo << 29) | (immhi << 5);
+ word = (word & ~((0x3u << 29) | (0x7ffffu << 5))) | (immlo << 29) |
+ (immhi << 5);
break;
}
case R_AARCH64_INTRA_LABEL_ADDR: {
@@ -169,7 +144,8 @@ static const CfreePredefinedMacro aa64_predefined_macros[] = {
{CFREE_SLICE_LIT("__LP64__"), CFREE_SLICE_LIT("1")},
{CFREE_SLICE_LIT("__ORDER_LITTLE_ENDIAN__"), CFREE_SLICE_LIT("1234")},
{CFREE_SLICE_LIT("__ORDER_BIG_ENDIAN__"), CFREE_SLICE_LIT("4321")},
- {CFREE_SLICE_LIT("__BYTE_ORDER__"), CFREE_SLICE_LIT("__ORDER_LITTLE_ENDIAN__")},
+ {CFREE_SLICE_LIT("__BYTE_ORDER__"),
+ CFREE_SLICE_LIT("__ORDER_LITTLE_ENDIAN__")},
{CFREE_SLICE_LIT("__LITTLE_ENDIAN__"), CFREE_SLICE_LIT("1")},
};
@@ -183,9 +159,6 @@ const ArchImpl arch_impl_aa64 = {
.disasm_new = aa64_disasm_new,
.apply_label_fixup = aa64_apply_label_fixup,
.link = &link_arch_aa64,
- .elf = &aa64_elf_ops,
- .macho = &aa64_macho_ops,
- .coff = &aa64_coff_ops,
.dwarf = &aa64_dwarf_ops,
.dbg = &aa64_dbg_ops,
.predefined_macros = aa64_predefined_macros,
diff --git a/src/arch/aa64/link.c b/src/arch/aa64/link.c
@@ -15,8 +15,6 @@
#include "core/bytes.h"
#include "core/core.h"
#include "link/link_arch.h"
-#include "obj/elf.h"
-#include "obj/macho.h"
#include "obj/obj.h"
/* Fixed register assignments mandated by the AArch64 PLT ABI. */
@@ -145,8 +143,7 @@ static u32 aa64_emit_iplt_stub(u8* dst, u64 stub_vaddr, u64 slot_vaddr,
* sees an unperturbed x30 / argument registers. Page+offset are
* baked from the post-shift IAT slot vaddr; no apply-time reloc
* needed because both ends move together under image-base shift. */
-static void aa64_emit_coff_iat_stub(u8* dst, u64 stub_vaddr,
- u64 iat_slot_vaddr) {
+void aa64_emit_coff_iat_stub(u8* dst, u64 stub_vaddr, u64 iat_slot_vaddr) {
u32 immlo, immhi;
aa64_adrp_imm_halves(stub_vaddr, iat_slot_vaddr, &immlo, &immhi);
u32 lo12 = (u32)(iat_slot_vaddr & AA64_PAGE_MASK);
@@ -160,7 +157,7 @@ static void aa64_emit_coff_iat_stub(u8* dst, u64 stub_vaddr,
wr_u32_le(dst + 8, aa64_br(AA64_PLT_SCRATCH_X16));
}
-static void aa64_emit_macho_stub(u8* out, u64 stub_vaddr, u64 got_slot_vaddr) {
+void aa64_emit_macho_stub(u8* out, u64 stub_vaddr, u64 got_slot_vaddr) {
i64 page_s = ((i64)got_slot_vaddr) & ~(i64)0xfff;
i64 page_p = ((i64)stub_vaddr) & ~(i64)0xfff;
i64 imm21 = (page_s - page_p) >> 12;
@@ -205,16 +202,6 @@ static int aa64_is_direct_page_reloc(RelocKind kind) {
}
const LinkArchDesc link_arch_aa64 = {
- .e_machine = EM_AARCH64,
- .default_musl_interp = "/lib/ld-musl-aarch64.so.1",
-
- .elf_r_relative = ELF_R_AARCH64_RELATIVE,
- .elf_r_glob_dat = ELF_R_AARCH64_GLOB_DAT,
- .elf_r_jump_slot = ELF_R_AARCH64_JUMP_SLOT,
-
- .macho_cputype = CPU_TYPE_ARM64,
- .macho_cpusubtype = CPU_SUBTYPE_ARM64_ALL,
-
.plt0_size = AA64_PLT0_SIZE,
.plt_entry_size = AA64_PLT_ENTRY_SIZE,
.iplt_stub_size = AA64_IPLT_STUB_SIZE,
@@ -222,11 +209,6 @@ const LinkArchDesc link_arch_aa64 = {
.emit_plt0 = aa64_emit_plt0,
.emit_plt_entry = aa64_emit_plt_entry,
.emit_iplt_stub = aa64_emit_iplt_stub,
- .macho_stub_size = AA64_IPLT_STUB_SIZE,
- .emit_macho_stub = aa64_emit_macho_stub,
-
- .coff_stub_size = AA64_IPLT_STUB_SIZE,
- .emit_coff_iat_stub = aa64_emit_coff_iat_stub,
.is_branch_reloc = aa64_is_branch_reloc,
.is_got_load_reloc = aa64_is_got_load_reloc,
diff --git a/src/arch/arch.h b/src/arch/arch.h
@@ -256,9 +256,9 @@ typedef struct AliasRoot {
typedef struct MemAccess {
CfreeCgTypeId type; /* codegen object type accessed */
- u32 size; /* ABI byte size of this access */
- u32 align; /* known byte alignment; 0 means unknown */
- u16 flags; /* MemFlag */
+ u32 size; /* ABI byte size of this access */
+ u32 align; /* known byte alignment; 0 means unknown */
+ u16 flags; /* MemFlag */
u16 addr_space;
AliasRoot alias;
} MemAccess;
@@ -501,22 +501,22 @@ typedef struct CGScopeDesc {
u8 pad[3];
Label break_label; /* explicit target for break; LABEL_NONE => target creates
one */
- Label continue_label; /* explicit target for continue; LABEL_NONE for
- non-loops */
- Operand cond; /* SCOPE_IF condition; ignored otherwise */
+ Label continue_label; /* explicit target for continue; LABEL_NONE for
+ non-loops */
+ Operand cond; /* SCOPE_IF condition; ignored otherwise */
CfreeCgTypeId result_type; /* reserved for structured expression results */
} CGScopeDesc;
typedef enum AsmDir { ASM_IN, ASM_OUT, ASM_INOUT } AsmDir;
typedef struct AsmConstraint {
- const char* str; /* GCC-style: "r", "=&r", "+m", "i", "0" ... */
- Sym name; /* GCC `[name]` symbolic operand; 0 if absent */
+ const char* str; /* GCC-style: "r", "=&r", "+m", "i", "0" ... */
+ Sym name; /* GCC `[name]` symbolic operand; 0 if absent */
CfreeCgTypeId type; /* codegen type of the bound expression (output lvalue or
input rvalue). Drives RegClass + width for the
binder. NULL only for hand-built test constraints
(binder falls back to a 64-bit int default). */
- u8 dir; /* AsmDir */
+ u8 dir; /* AsmDir */
u8 pad[3];
} AsmConstraint;
@@ -633,13 +633,13 @@ typedef struct CGSwitchCase {
} CGSwitchCase;
typedef struct CGSwitchDesc {
- Operand selector; /* OPK_REG or OPK_IMM */
+ Operand selector; /* OPK_REG or OPK_IMM */
CfreeCgTypeId selector_type;
- Label default_label; /* LABEL_NONE means "fall through past the switch" */
+ Label default_label; /* LABEL_NONE means "fall through past the switch" */
const CGSwitchCase* cases;
u32 ncases;
- u8 hint; /* CfreeCgSwitchHint */
- u8 opt_level; /* 0/1/2; reads policy in cg_lower_switch_default */
+ u8 hint; /* CfreeCgSwitchHint */
+ u8 opt_level; /* 0/1/2; reads policy in cg_lower_switch_default */
u8 pad[2];
} CGSwitchDesc;
@@ -869,11 +869,11 @@ struct CGTarget {
* value stack through load_const into OPK_REG. cg and opt's machinize/emit
* both rely on this contract to pass small constants through without
* burning a value-stack register on materialization. */
- void (*binop)(CGTarget*, BinOp, Operand dst /*REG*/,
- Operand a /*REG|IMM*/, Operand b /*REG|IMM*/);
+ void (*binop)(CGTarget*, BinOp, Operand dst /*REG*/, Operand a /*REG|IMM*/,
+ Operand b /*REG|IMM*/);
void (*unop)(CGTarget*, UnOp, Operand dst /*REG*/, Operand a /*REG|IMM*/);
- void (*cmp)(CGTarget*, CmpOp, Operand dst /*REG*/,
- Operand a /*REG|IMM*/, Operand b /*REG|IMM*/); /* materialize 0/1 */
+ void (*cmp)(CGTarget*, CmpOp, Operand dst /*REG*/, Operand a /*REG|IMM*/,
+ Operand b /*REG|IMM*/); /* materialize 0/1 */
void (*convert)(CGTarget*, ConvKind, Operand dst, Operand src);
/* ---- calls / return ----
@@ -1047,28 +1047,6 @@ struct ArchDisasm {
typedef struct LinkArchDesc LinkArchDesc;
typedef struct ABIVtable ABIVtable;
-typedef struct ArchElfOps {
- u32 e_machine;
- u32 e_flags;
- u32 (*reloc_to)(u32 kind);
- u32 (*reloc_from)(u32 wire_type);
-} ArchElfOps;
-
-typedef struct ArchMachoOps {
- u32 cputype;
- u32 cpusubtype;
- u32 (*reloc_to)(u32 kind);
- u32 (*reloc_pcrel)(u32 kind);
- u32 (*reloc_length)(u32 kind);
- u32 (*reloc_from)(u32 wire_type);
-} ArchMachoOps;
-
-typedef struct ArchCoffOps {
- u16 machine; /* IMAGE_FILE_MACHINE_* */
- u32 (*reloc_to)(u32 kind);
- u32 (*reloc_from)(u32 wire_type);
-} ArchCoffOps;
-
typedef struct ArchDwarfOps {
/* DWARF .debug_line minimum instruction length and maximum operations per
* instruction. Fixed-width ISAs normally use their instruction width; x86_64
@@ -1097,9 +1075,8 @@ typedef struct ArchDbgOps {
CfreeStatus (*decode_insn)(const u8* bytes, u32 len, u64 pc,
ArchDbgInsn* out);
CfreeStatus (*build_displaced_shim)(const ArchDbgInsn* insn,
- void* scratch_write,
- u64 scratch_runtime, u32 scratch_cap,
- u32* sentinel_off,
+ void* scratch_write, u64 scratch_runtime,
+ u32 scratch_cap, u32* sentinel_off,
u64* fallthrough_pc);
int (*is_call)(const ArchDbgInsn* insn);
} ArchDbgOps;
@@ -1134,9 +1111,6 @@ typedef struct ArchImpl {
int (*apply_label_fixup)(Compiler*, const ArchLabelFixup*);
const LinkArchDesc* link;
- const ArchElfOps* elf;
- const ArchMachoOps* macho;
- const ArchCoffOps* coff;
const ArchDwarfOps* dwarf;
const ArchDbgOps* dbg;
@@ -1161,9 +1135,6 @@ typedef struct ArchImpl {
const ArchImpl* arch_lookup(CfreeArchKind);
const ArchImpl* arch_for_compiler(const Compiler*);
-const ArchImpl* arch_lookup_elf_machine(u32 e_machine);
-const ArchImpl* arch_lookup_macho_cputype(u32 cputype);
-const ArchImpl* arch_lookup_coff_machine(u16 machine);
/* Pick the right CGBackend for a session given the compiler's target arch
* and the per-emit CodeOptions. Returns &arch_for_compiler(c)->backend for
diff --git a/src/arch/registry.c b/src/arch/registry.c
@@ -3,8 +3,8 @@
* This file is the *only* place in the codebase that checks
* CFREE_ARCH_*_ENABLED. Everything downstream operates on the registry's
* outputs — `const CGBackend*` for session-level code emission, and
- * `const ArchImpl*` for arch-specific metadata (ELF/Mach-O/COFF reloc
- * tables, ABI selection, DWARF, debugger hooks, register file, etc.).
+ * `const ArchImpl*` for arch-specific metadata (ABI selection, DWARF,
+ * debugger hooks, register file, etc.).
*
* Conceptually:
* - A CGBackend is "something that can build a CGTarget from a Compiler +
@@ -32,8 +32,8 @@ extern const CGBackend cg_backend_c_target;
#endif
/* Arch-metadata roster. The arch_lookup_* helpers iterate this list when a
- * caller needs ELF/Mach-O/COFF/ABI/etc. metadata — answers only come from
- * backends that have an ArchImpl, so c_target is intentionally absent.
+ * caller needs ABI/debug/etc. metadata — answers only come from backends
+ * that have an ArchImpl, so c_target is intentionally absent.
* cg_backend_for_session() picks the CGBackend (which is &impl->backend or
* &cg_backend_c_target) without consulting this list. */
static const ArchImpl* const arch_impls[] = {
@@ -64,35 +64,6 @@ const ArchImpl* arch_for_compiler(const Compiler* c) {
return arch_lookup(c->target.arch);
}
-const ArchImpl* arch_lookup_elf_machine(u32 e_machine) {
- for (u32 i = 0; i < arch_impls_count(); ++i) {
- const ArchImpl* impl = arch_impls[i];
- if (impl->elf && impl->elf->e_machine == e_machine) return impl;
- }
- return NULL;
-}
-
-const ArchImpl* arch_lookup_macho_cputype(u32 cputype) {
- for (u32 i = 0; i < arch_impls_count(); ++i) {
- const ArchImpl* impl = arch_impls[i];
- if (impl->macho && impl->macho->cputype == cputype) return impl;
- }
- return NULL;
-}
-
-const ArchImpl* arch_lookup_coff_machine(u16 machine) {
- /* IMAGE_FILE_MACHINE_ARM64EC (0xA641) aliases to AArch64 — the
- * instruction encoding is identical; only the ABI differs, and the
- * linker treats both as a single image's worth of code on Windows
- * targets. */
- if (machine == 0xA641u) machine = 0xAA64u;
- for (u32 i = 0; i < arch_impls_count(); ++i) {
- const ArchImpl* impl = arch_impls[i];
- if (impl->coff && impl->coff->machine == machine) return impl;
- }
- return NULL;
-}
-
const CGBackend* cg_backend_for_session(const Compiler* c,
const CfreeCodeOptions* opts) {
if (opts && opts->emit_c_source) {
diff --git a/src/arch/rv64/arch.c b/src/arch/rv64/arch.c
@@ -7,7 +7,6 @@
#include "arch/rv64/rv64.h"
#include "core/bytes.h"
#include "link/link_arch.h"
-#include "obj/elf.h"
#include "obj/obj.h"
extern const LinkArchDesc link_arch_rv64;
@@ -19,13 +18,6 @@ static const ABIVtable* rv64_abi_vtable(Compiler* c, CfreeOSKind os) {
return &rv64_vtable;
}
-static const ArchElfOps rv64_elf_ops = {
- .e_machine = EM_RISCV,
- .e_flags = EF_RISCV_RVC | EF_RISCV_FLOAT_ABI_DOUBLE,
- .reloc_to = elf_riscv64_reloc_to,
- .reloc_from = elf_riscv64_reloc_from,
-};
-
static const ArchDwarfOps rv64_dwarf_ops = {
.min_inst_len = 4u,
.max_ops_per_inst = 1u,
@@ -134,7 +126,8 @@ static const CfreePredefinedMacro rv64_predefined_macros[] = {
{CFREE_SLICE_LIT("_LP64"), CFREE_SLICE_LIT("1")},
{CFREE_SLICE_LIT("__ORDER_LITTLE_ENDIAN__"), CFREE_SLICE_LIT("1234")},
{CFREE_SLICE_LIT("__ORDER_BIG_ENDIAN__"), CFREE_SLICE_LIT("4321")},
- {CFREE_SLICE_LIT("__BYTE_ORDER__"), CFREE_SLICE_LIT("__ORDER_LITTLE_ENDIAN__")},
+ {CFREE_SLICE_LIT("__BYTE_ORDER__"),
+ CFREE_SLICE_LIT("__ORDER_LITTLE_ENDIAN__")},
{CFREE_SLICE_LIT("__LITTLE_ENDIAN__"), CFREE_SLICE_LIT("1")},
};
@@ -160,8 +153,6 @@ const ArchImpl arch_impl_rv64 = {
.disasm_new = rv64_disasm_new,
.apply_label_fixup = rv64_apply_label_fixup,
.link = &link_arch_rv64,
- .elf = &rv64_elf_ops,
- .macho = NULL,
.dwarf = &rv64_dwarf_ops,
.dbg = &rv64_dbg_ops,
.predefined_macros = rv64_predefined_macros,
diff --git a/src/arch/rv64/link.c b/src/arch/rv64/link.c
@@ -9,7 +9,6 @@
#include "core/bytes.h"
#include "core/core.h"
#include "link/link_arch.h"
-#include "obj/elf.h"
/* PLT0 is 8 canonical NOPs (32 bytes); each PLT entry and IPLT stub is
* 4 instructions (16 bytes) / 3 instructions (12 bytes) respectively.
@@ -77,13 +76,6 @@ static u32 rv64_emit_iplt_stub(u8* dst, u64 stub_vaddr, u64 slot_vaddr,
}
const LinkArchDesc link_arch_rv64 = {
- .e_machine = EM_RISCV,
- .default_musl_interp = "/lib/ld-musl-riscv64.so.1",
- /* RISC-V psABI has no dedicated GLOB_DAT — GOT-slot data imports
- * use the generic absolute-64 reloc instead. */
- .elf_r_relative = ELF_R_RISCV_RELATIVE,
- .elf_r_glob_dat = ELF_R_RISCV_64,
- .elf_r_jump_slot = ELF_R_RISCV_JUMP_SLOT,
.plt0_size = RV64_PLT0_SIZE,
.plt_entry_size = RV64_PLT_ENTRY_SIZE,
.iplt_stub_size = RV64_IPLT_STUB_SIZE,
diff --git a/src/arch/x64/arch.c b/src/arch/x64/arch.c
@@ -7,9 +7,6 @@
#include "arch/x64/x64.h"
#include "core/bytes.h"
#include "link/link_arch.h"
-#include "obj/coff.h"
-#include "obj/elf.h"
-#include "obj/macho.h"
#include "obj/obj.h"
extern const LinkArchDesc link_arch_x64;
@@ -27,33 +24,11 @@ static const ABIVtable* x64_abi_vtable(Compiler* c, CfreeOSKind os) {
}
}
-static const ArchElfOps x64_elf_ops = {
- .e_machine = EM_X86_64,
- .e_flags = 0,
- .reloc_to = elf_x86_64_reloc_to,
- .reloc_from = elf_x86_64_reloc_from,
-};
-
-static const ArchMachoOps x64_macho_ops = {
- .cputype = CPU_TYPE_X86_64,
- .cpusubtype = CPU_SUBTYPE_X86_64_ALL,
- .reloc_to = macho_x86_64_reloc_to,
- .reloc_pcrel = macho_x86_64_reloc_pcrel,
- .reloc_length = macho_x86_64_reloc_length,
- .reloc_from = macho_x86_64_reloc_from,
-};
-
static const ArchDwarfOps x64_dwarf_ops = {
.min_inst_len = 1u,
.max_ops_per_inst = 1u,
};
-static const ArchCoffOps x64_coff_ops = {
- .machine = IMAGE_FILE_MACHINE_AMD64,
- .reloc_to = coff_x86_64_reloc_to,
- .reloc_from = coff_x86_64_reloc_from,
-};
-
static int x64_apply_label_fixup(Compiler* c, const ArchLabelFixup* fx) {
(void)c;
if (!fx || fx->kind != R_PC32 || fx->width != 4) return 1;
@@ -73,7 +48,8 @@ static const CfreePredefinedMacro x64_predefined_macros[] = {
{CFREE_SLICE_LIT("__LP64__"), CFREE_SLICE_LIT("1")},
{CFREE_SLICE_LIT("__ORDER_LITTLE_ENDIAN__"), CFREE_SLICE_LIT("1234")},
{CFREE_SLICE_LIT("__ORDER_BIG_ENDIAN__"), CFREE_SLICE_LIT("4321")},
- {CFREE_SLICE_LIT("__BYTE_ORDER__"), CFREE_SLICE_LIT("__ORDER_LITTLE_ENDIAN__")},
+ {CFREE_SLICE_LIT("__BYTE_ORDER__"),
+ CFREE_SLICE_LIT("__ORDER_LITTLE_ENDIAN__")},
{CFREE_SLICE_LIT("__LITTLE_ENDIAN__"), CFREE_SLICE_LIT("1")},
};
@@ -108,9 +84,6 @@ const ArchImpl arch_impl_x64 = {
.disasm_new = x64_disasm_new,
.apply_label_fixup = x64_apply_label_fixup,
.link = &link_arch_x64,
- .elf = &x64_elf_ops,
- .macho = &x64_macho_ops,
- .coff = &x64_coff_ops,
.dwarf = &x64_dwarf_ops,
.dbg = &x64_dbg_ops,
.predefined_macros = x64_predefined_macros,
diff --git a/src/arch/x64/link.c b/src/arch/x64/link.c
@@ -7,12 +7,10 @@
* descriptor switchover is a pure refactor. All raw byte values come
* from named constants / inline writers in arch/x64/isa.h. */
-#include "link/link_arch.h"
-
#include "arch/x64/isa.h"
#include "core/bytes.h"
#include "core/core.h"
-#include "obj/elf.h"
+#include "link/link_arch.h"
/* PLT0 layout under DF_1_NOW: never executed (loader pre-binds every
* slot via .rela.plt before user code runs), so we just emit 32 bytes
@@ -36,8 +34,7 @@ static void x64_emit_plt_entry(u8* dst, u64 entry_vaddr, u64 slot_vaddr) {
i64 disp = (i64)slot_vaddr - (i64)(entry_vaddr + X64_JMP_RIPREL_SIZE);
i32 disp32 = (i32)(u32)((u64)disp & 0xffffffffu);
x64_write_jmp_riprel(dst, disp32);
- x64_write_nop_pad(dst + X64_JMP_RIPREL_SIZE,
- 16u - X64_JMP_RIPREL_SIZE);
+ x64_write_nop_pad(dst + X64_JMP_RIPREL_SIZE, 16u - X64_JMP_RIPREL_SIZE);
}
/* IPLT (ifunc) trampoline stub (12 B):
@@ -77,33 +74,22 @@ static int x64_is_got_load_reloc(RelocKind kind) {
* head, minus the trailing NOP pad — Win64 calls don't need a stub
* aligned to a fixed entry stride because there's no PLT0 to share
* the address space with. */
-static void x64_emit_coff_iat_stub(u8* dst, u64 stub_vaddr,
- u64 iat_slot_vaddr) {
+void x64_emit_coff_iat_stub(u8* dst, u64 stub_vaddr, u64 iat_slot_vaddr) {
i64 disp = (i64)iat_slot_vaddr - (i64)(stub_vaddr + X64_JMP_RIPREL_SIZE);
i32 disp32 = (i32)(u32)((u64)disp & 0xffffffffu);
x64_write_jmp_riprel(dst, disp32);
}
const LinkArchDesc link_arch_x64 = {
- .e_machine = EM_X86_64,
- .default_musl_interp = "/lib/ld-musl-x86_64.so.1",
-
- .elf_r_relative = ELF_R_X86_64_RELATIVE,
- .elf_r_glob_dat = ELF_R_X86_64_GLOB_DAT,
- .elf_r_jump_slot = ELF_R_X86_64_JUMP_SLOT,
-
- .plt0_size = 32u,
- .plt_entry_size = 16u,
- .iplt_stub_size = 12u,
-
- .emit_plt0 = x64_emit_plt0,
- .emit_plt_entry = x64_emit_plt_entry,
- .emit_iplt_stub = x64_emit_iplt_stub,
+ .plt0_size = 32u,
+ .plt_entry_size = 16u,
+ .iplt_stub_size = 12u,
- .is_branch_reloc = x64_is_branch_reloc,
- .is_got_load_reloc = x64_is_got_load_reloc,
- .needs_jit_call_stub = x64_is_branch_reloc,
+ .emit_plt0 = x64_emit_plt0,
+ .emit_plt_entry = x64_emit_plt_entry,
+ .emit_iplt_stub = x64_emit_iplt_stub,
- .coff_stub_size = X64_JMP_RIPREL_SIZE,
- .emit_coff_iat_stub = x64_emit_coff_iat_stub,
+ .is_branch_reloc = x64_is_branch_reloc,
+ .is_got_load_reloc = x64_is_got_load_reloc,
+ .needs_jit_call_stub = x64_is_branch_reloc,
};
diff --git a/src/emu/elf_load.c b/src/emu/elf_load.c
@@ -29,7 +29,7 @@
#include "core/slice.h"
#include "emu/emu.h"
#include "emu/rv64_ops.h"
-#include "obj/elf.h"
+#include "obj/elf/elf.h"
/* ---- Layout knobs ---- */
/* Stack size — large enough for typical libc init in the smoke tests
@@ -50,9 +50,7 @@ static u16 rd16(const u8* p) { return (u16)p[0] | ((u16)p[1] << 8); }
static u32 rd32(const u8* p) {
return (u32)p[0] | ((u32)p[1] << 8) | ((u32)p[2] << 16) | ((u32)p[3] << 24);
}
-static u64 rd64(const u8* p) {
- return (u64)rd32(p) | ((u64)rd32(p + 4) << 32);
-}
+static u64 rd64(const u8* p) { return (u64)rd32(p) | ((u64)rd32(p + 4) << 32); }
static void wr64(u8* p, u64 v) {
u32 i;
@@ -238,7 +236,10 @@ int emu_load_elf(Compiler* c, CfreeEmuArch arch, const u8* bytes, size_t len,
for (i = 0; i < e_phnum; ++i) {
const u8* ph = bytes + e_phoff + (u64)i * e_phentsize;
u32 p_type = rd32(ph + 0);
- if (p_type == PT_INTERP) { have_interp = 1; break; }
+ if (p_type == PT_INTERP) {
+ have_interp = 1;
+ break;
+ }
}
if (have_interp) {
if (!g_pending_interp.bytes || g_pending_interp.len == 0) {
@@ -357,8 +358,7 @@ int emu_load_elf(Compiler* c, CfreeEmuArch arch, const u8* bytes, size_t len,
if (envc > 0) {
envp_addrs = (u64*)heap->alloc(heap, sizeof(u64) * (size_t)envc, 8u);
if (!envp_addrs) {
- if (argv_addrs)
- heap->free(heap, argv_addrs, sizeof(u64) * (size_t)argc);
+ if (argv_addrs) heap->free(heap, argv_addrs, sizeof(u64) * (size_t)argc);
heap->free(heap, guest_base, (size_t)guest_size);
return 14;
}
@@ -392,10 +392,10 @@ int emu_load_elf(Compiler* c, CfreeEmuArch arch, const u8* bytes, size_t len,
/* Table size: argc(8) + (argc+1)*8 + (envc+1)*8 + auxv (6 pairs *
* 16). Place the table so that final sp is 16-byte aligned. */
- u64 table_bytes = 8u /* argc */
- + (u64)(argc + 1) * 8u /* argv + NULL */
- + (u64)(envc + 1) * 8u /* envp + NULL */
- + 6u * 16u; /* auxv pairs incl. AT_NULL */
+ u64 table_bytes = 8u /* argc */
+ + (u64)(argc + 1) * 8u /* argv + NULL */
+ + (u64)(envc + 1) * 8u /* envp + NULL */
+ + 6u * 16u; /* auxv pairs incl. AT_NULL */
/* Round table_bytes up to 16 so sp lands aligned. */
u64 sp_table = (cursor - table_bytes) & ~(u64)0xfu;
sp = sp_table;
@@ -447,8 +447,8 @@ int emu_load_elf(Compiler* c, CfreeEmuArch arch, const u8* bytes, size_t len,
};
u32 aux_count = sizeof(aux) / sizeof(aux[0]);
/* If the table_bytes budget was undersized, recompute and shift sp. */
- u64 needed = 8u + (u64)(argc + 1) * 8u + (u64)(envc + 1) * 8u +
- (u64)aux_count * 16u;
+ u64 needed =
+ 8u + (u64)(argc + 1) * 8u + (u64)(envc + 1) * 8u + (u64)aux_count * 16u;
if (needed > table_bytes) {
/* Re-place table_bytes := needed, re-align sp_table. */
sp_table = (cursor - needed) & ~(u64)0xfu;
diff --git a/src/link/link.c b/src/link/link.c
@@ -80,8 +80,8 @@ Linker* link_new(Compiler* c) {
* LC_MAIN names main directly (dyld owns the C runtime startup),
* so the on-disk symbol is `_main` (the mangled form of `main`).
* Format choice lives in obj_format_default_entry_name. */
- l->entry_name =
- pool_intern_slice(c->global, slice_from_cstr(obj_format_default_entry_name(c)));
+ l->entry_name = pool_intern_slice(
+ c->global, slice_from_cstr(obj_format_default_entry_name(c)));
/* Match the rest of libcfree's lifetime story: the new'd Linker is
* registered for cleanup in case a panic fires before link_free. */
l->deferred = compiler_defer(c, linker_cleanup, l);
@@ -132,20 +132,19 @@ LinkInputId link_add_obj_bytes(Linker* l, const char* name, const u8* data,
fmt = cfree_detect_fmt(data, len);
impl = obj_format_lookup_bin(fmt);
if (!impl || !impl->read)
- compiler_panic(l->c, no_loc(),
- "link_add_obj_bytes: unsupported object format "
- "(fmt=%u) for '%.*s'",
- (u32)fmt,
- SLICE_ARG(name ? slice_from_cstr(name)
- : SLICE_LIT("(unnamed)")));
+ compiler_panic(
+ l->c, no_loc(),
+ "link_add_obj_bytes: unsupported object format "
+ "(fmt=%u) for '%.*s'",
+ (u32)fmt,
+ SLICE_ARG(name ? slice_from_cstr(name) : SLICE_LIT("(unnamed)")));
reader_name = impl->read_name;
ob = impl->read(l->c, name, data, len);
if (!ob)
- compiler_panic(l->c, no_loc(),
- "link_add_obj_bytes: %.*s returned NULL for '%.*s'",
- SLICE_ARG(slice_from_cstr(reader_name)),
- SLICE_ARG(name ? slice_from_cstr(name)
- : SLICE_LIT("(unnamed)")));
+ compiler_panic(
+ l->c, no_loc(), "link_add_obj_bytes: %.*s returned NULL for '%.*s'",
+ SLICE_ARG(slice_from_cstr(reader_name)),
+ SLICE_ARG(name ? slice_from_cstr(name) : SLICE_LIT("(unnamed)")));
in = inputs_push(l, &id);
in->order = l->next_input_order++;
in->obj = ob; /* re-uses the ObjBuilder slot for ownership */
@@ -174,20 +173,19 @@ LinkInputId link_add_dso_bytes(Linker* l, const char* name, const u8* data,
const char* reader_name;
if (!l || !data || !len) return LINK_INPUT_NONE;
if (!obj_format_dso_reader_for_bytes(data, len, &fmt, &reader))
- compiler_panic(l->c, no_loc(),
- "link_add_dso_bytes: unsupported DSO format "
- "(fmt=%u) for '%.*s'",
- (u32)fmt,
- SLICE_ARG(name ? slice_from_cstr(name)
- : SLICE_LIT("(unnamed)")));
+ compiler_panic(
+ l->c, no_loc(),
+ "link_add_dso_bytes: unsupported DSO format "
+ "(fmt=%u) for '%.*s'",
+ (u32)fmt,
+ SLICE_ARG(name ? slice_from_cstr(name) : SLICE_LIT("(unnamed)")));
reader_name = reader.name;
ob = reader.read(l->c, name, data, len, &soname);
if (!ob)
- compiler_panic(l->c, no_loc(),
- "link_add_dso_bytes: %.*s returned NULL for '%.*s'",
- SLICE_ARG(slice_from_cstr(reader_name)),
- SLICE_ARG(name ? slice_from_cstr(name)
- : SLICE_LIT("(unnamed)")));
+ compiler_panic(
+ l->c, no_loc(), "link_add_dso_bytes: %.*s returned NULL for '%.*s'",
+ SLICE_ARG(slice_from_cstr(reader_name)),
+ SLICE_ARG(name ? slice_from_cstr(name) : SLICE_LIT("(unnamed)")));
in = inputs_push(l, &id);
in->kind = LINK_INPUT_DSO_BYTES;
in->order = l->next_input_order++;
@@ -228,11 +226,13 @@ LinkInputId link_add_archive_bytes(Linker* l, const char* name, const u8* data,
in_arc.data = data;
in_arc.len = len;
- if (cfree_ar_iter_new(cfree_compiler_context(l->c), &in_arc, &it) != CFREE_OK || !it)
- compiler_panic(l->c, no_loc(),
- "link_add_archive_bytes: '%.*s' is not a valid ar archive",
- SLICE_ARG(name ? slice_from_cstr(name)
- : SLICE_LIT("(unnamed)")));
+ if (cfree_ar_iter_new(cfree_compiler_context(l->c), &in_arc, &it) !=
+ CFREE_OK ||
+ !it)
+ compiler_panic(
+ l->c, no_loc(),
+ "link_add_archive_bytes: '%.*s' is not a valid ar archive",
+ SLICE_ARG(name ? slice_from_cstr(name) : SLICE_LIT("(unnamed)")));
/* Two-pass: count members so we allocate the member array exactly
* once. The linker_release path frees by nmembers, so we need
@@ -264,12 +264,14 @@ LinkInputId link_add_archive_bytes(Linker* l, const char* name, const u8* data,
* for us, so every member returned here is a real object file.
* Format is detected per-member so a single archive could in
* principle hold mixed formats (in practice it never does). */
- if (cfree_ar_iter_new(cfree_compiler_context(l->c), &in_arc, &it) != CFREE_OK || !it)
- compiler_panic(l->c, no_loc(),
- "link_add_archive_bytes: ar_iter_init failed on '%.*s' "
- "second pass",
- SLICE_ARG(name ? slice_from_cstr(name)
- : SLICE_LIT("(unnamed)")));
+ if (cfree_ar_iter_new(cfree_compiler_context(l->c), &in_arc, &it) !=
+ CFREE_OK ||
+ !it)
+ compiler_panic(
+ l->c, no_loc(),
+ "link_add_archive_bytes: ar_iter_init failed on '%.*s' "
+ "second pass",
+ SLICE_ARG(name ? slice_from_cstr(name) : SLICE_LIT("(unnamed)")));
n = 0;
while (cfree_ar_iter_next(it, &mem) == CFREE_ITER_ITEM && n < ar->nmembers) {
ObjBuilder* ob = NULL;
@@ -295,23 +297,21 @@ LinkInputId link_add_archive_bytes(Linker* l, const char* name, const u8* data,
}
}
if (!member_impl || !member_impl->read)
- compiler_panic(l->c, no_loc(),
- "link_add_archive_bytes: unsupported member "
- "format (fmt=%u) for '%.*s' in archive '%.*s'",
- (u32)mfmt,
- SLICE_ARG(mem.name.len ? mem.name
- : SLICE_LIT("(unnamed)")),
- SLICE_ARG(name ? slice_from_cstr(name)
- : SLICE_LIT("(unnamed)")));
+ compiler_panic(
+ l->c, no_loc(),
+ "link_add_archive_bytes: unsupported member "
+ "format (fmt=%u) for '%.*s' in archive '%.*s'",
+ (u32)mfmt,
+ SLICE_ARG(mem.name.len ? mem.name : SLICE_LIT("(unnamed)")),
+ SLICE_ARG(name ? slice_from_cstr(name) : SLICE_LIT("(unnamed)")));
ob = member_impl->read(l->c, mem.name.s, mem.data, mem.size);
if (!ob)
- compiler_panic(l->c, no_loc(),
- "link_add_archive_bytes: object read failed for "
- "member '%.*s' of archive '%.*s'",
- SLICE_ARG(mem.name.len ? mem.name
- : SLICE_LIT("(unnamed)")),
- SLICE_ARG(name ? slice_from_cstr(name)
- : SLICE_LIT("(unnamed)")));
+ compiler_panic(
+ l->c, no_loc(),
+ "link_add_archive_bytes: object read failed for "
+ "member '%.*s' of archive '%.*s'",
+ SLICE_ARG(mem.name.len ? mem.name : SLICE_LIT("(unnamed)")),
+ SLICE_ARG(name ? slice_from_cstr(name) : SLICE_LIT("(unnamed)")));
ar->members[n].name =
mem.name.len ? pool_intern_slice(l->c->global, mem.name) : 0;
ar->members[n].obj = ob;
@@ -546,7 +546,10 @@ static void link_image_release(LinkImage* img) {
sizeof(*img->dbg_objs_owned) * img->dbg_objs_n);
}
symhash_fini(&img->globals);
- if (img->dyn) link_dyn_state_free(img);
+ if (img->dyn) {
+ const ObjFormatImpl* fmt = obj_format_lookup(img->c->target.obj);
+ if (fmt && fmt->free_dyn) fmt->free_dyn(img);
+ }
img->heap->free(img->heap, img, sizeof(*img));
}
diff --git a/src/link/link_arch.h b/src/link/link_arch.h
@@ -32,28 +32,6 @@ typedef struct LinkArchIPltReloc {
} LinkArchIPltReloc;
typedef struct LinkArchDesc {
- /* ---- ELF identity ---- */
- u32 e_machine; /* EM_AARCH64 / EM_X86_64 / EM_RISCV */
-
- /* Default PT_INTERP (canonical musl loader for this arch). Drivers
- * should override via link_set_interp_path; the default fires only
- * when the caller leaves it unset and -static isn't in effect. */
- const char* default_musl_interp;
-
- /* ---- Dynamic-reloc type numbers (ELF) ----
- * Used by .rela.dyn / .rela.plt emission. Reloc-type numbers are
- * arch-specific: aarch64 starts at 1024, x86_64 in the low single
- * digits, RISC-V uses its own encoding (and maps GLOB_DAT onto
- * R_RISCV_64 since the psABI has no dedicated GLOB_DAT). */
- u32 elf_r_relative;
- u32 elf_r_glob_dat;
- u32 elf_r_jump_slot;
-
- /* ---- Mach-O identity ----
- * Zero means the target has no Mach-O executable writer yet. */
- u32 macho_cputype;
- u32 macho_cpusubtype;
-
/* ---- PLT geometry ----
* All three arches today use a 32-byte PLT0 + 16-byte per-import
* entry, but exposing the sizes keeps the linker free of magic
@@ -97,24 +75,6 @@ typedef struct LinkArchDesc {
u32 (*emit_iplt_stub)(u8* dst, u64 stub_vaddr, u64 slot_vaddr,
LinkArchIPltReloc out[2]);
- /* Mach-O stubs. Used only when macho_cputype is non-zero. */
- u32 macho_stub_size;
- void (*emit_macho_stub)(u8* dst, u64 stub_vaddr, u64 got_slot_vaddr);
-
- /* PE/COFF IAT stub. Used when target.obj == CFREE_OBJ_COFF and a
- * relocation targets an imported function. The stub performs an
- * indirect jump through the IAT slot:
- * x64: ff 25 disp32 ; jmp [rip + disp_to_iat_slot] (6 B)
- * aa64: adrp/ldr/br x16 ; load IAT slot, branch to it (12 B)
- *
- * The 32-bit displacement on x64 and the page-relative pair on
- * aa64 are baked into the stub bytes directly (no apply-time
- * relocations needed), so callers do not enqueue extra
- * LinkRelocApply records — see how emit_iplt_stub returns 0 for
- * arches that can encode the displacement inline. */
- u32 coff_stub_size;
- void (*emit_coff_iat_stub)(u8* dst, u64 stub_vaddr, u64 iat_slot_vaddr);
-
/* Relocation classification used by format-specific linker passes. */
int (*is_branch_reloc)(RelocKind);
int (*is_got_load_reloc)(RelocKind);
diff --git a/src/link/link_coff.c b/src/link/link_coff.c
@@ -1,1748 +0,0 @@
-/* link_emit_coff: write a PE32+ MH_EXECUTABLE-style image to the
- * caller-provided Writer.
- *
- * Phase 3.1 deliverable per doc/WINDOWS.md: skeleton + base-reloc
- * handling for the four standard PE sections. Import-table synthesis
- * (.idata / IAT) lands in Phase 3.2; per-arch IAT stub bytes in 3.3;
- * TLS directory in 3.5; debug directory in 3.6 — those code paths
- * panic loudly here so the strict-by-default posture surfaces them.
- *
- * File layout (in write order):
- *
- * [DOS stub IMAGE_DOS_HEADER] -- 64 bytes; e_lfanew=0x40
- * [PE signature "PE\0\0"] -- 4 bytes
- * [IMAGE_FILE_HEADER] -- 20 bytes
- * [IMAGE_OPTIONAL_HEADER64] -- 240 bytes (PE32+)
- * [IMAGE_SECTION_HEADER * nsec] -- 40 bytes each
- * [pad to FileAlignment]
- * [.text bytes, padded to FileAlignment]
- * [.rdata bytes, padded to FileAlignment]
- * [.data bytes, padded to FileAlignment]
- * [.reloc bytes, padded to FileAlignment]
- *
- * .bss is uninitialized — it has a section header (with VirtualSize)
- * but no file bytes and PointerToRawData=0.
- *
- * RVAs follow SectionAlignment (0x1000); FileAlignment is 0x200; the
- * first section starts at RVA 0x1000 (right after the headers map).
- * ImageBase is the Win64 convention 0x140000000.
- *
- * Reloc strategy. The link layout pass has already placed every kept
- * input section into img->sections / img->segments under the ELF/Mach-O
- * coordinate system (image-relative vaddrs, often packed by permission
- * bucket). COFF wants a different packing — the four standard
- * sections at SectionAlignment-aligned RVAs — so this writer re-derives
- * per-input-section vaddrs from scratch and shifts each LinkSection /
- * symbol / LinkRelocApply by its section's per-section delta before
- * applying relocations. link_emit_macho takes the same tack for its
- * __DATA_CONST splits; the ELF writer leaves vaddrs alone because the
- * link layout already matches ELF's PT_LOAD shape. */
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "arch/arch.h"
-#include "core/core.h"
-#include "core/heap.h"
-#include "core/pool.h"
-#include "core/slice.h"
-#include "core/util.h"
-#include "core/vec.h"
-#include "link/link.h"
-#include "link/link_arch.h"
-#include "link/link_internal.h"
-#include "obj/coff.h"
-
-/* ---- .idata layout constants ----
- *
- * Per doc/WINDOWS.md §3.2: the .idata content is a concatenation of an
- * IMAGE_IMPORT_DESCRIPTOR table (NULL-terminated), one ILT per DLL
- * (each NULL-terminated u64 array), one IAT per DLL (same shape),
- * a hint/name table, and a DLL-name string pool. Each block is
- * pointer-sized aligned within the section. AArch64 import thunks use
- * PAGEOFFSET_12L for 64-bit ILT/IAT slots, so those sub-blocks must be
- * 8-byte aligned. */
-#define PE_IDATA_BLOCK_ALIGN 8u
-/* Hint field on IMAGE_IMPORT_BY_NAME records. cfree never has a real
- * hint (the OS loader doesn't need one to do the bsearch on the DLL's
- * export name table), so 0 is the canonical "no hint" value. */
-#define PE_IMPORT_HINT_NONE 0u
-
-static SrcLoc no_loc(void) {
- SrcLoc l = {0, 0, 0};
- return l;
-}
-
-/* ---- PE/Win64 layout constants ----
- *
- * Centralised here so the wire-format numbers in this TU stay named
- * (and the magic-numbers rule in CLAUDE.md is honoured). Values match
- * the PE/COFF spec + Win64 conventions; mingw-w64's ld defaults agree. */
-#define PE_IMAGE_BASE LINK_PE_IMAGE_BASE
-#define PE_SECTION_ALIGNMENT 0x1000u
-#define PE_FILE_ALIGNMENT 0x200u
-#define PE_FIRST_SECTION_RVA 0x1000u
-#define PE_DOS_E_LFANEW 0x40u
-#define PE_NUM_DATA_DIRS COFF_NUM_DATA_DIRECTORIES
-#define PE_OPT_HDR_SIZE COFF_OPT_HDR64_SIZE
-#define PE_LINKER_MAJOR 0u
-#define PE_LINKER_MINOR 1u
-#define PE_OS_MAJOR 6u /* Windows Vista+ — mingw default */
-#define PE_OS_MINOR 0u
-#define PE_SUBSYS_MAJOR 6u
-#define PE_SUBSYS_MINOR 0u
-#define PE_STACK_RESERVE 0x100000ULL
-#define PE_STACK_COMMIT 0x1000ULL
-#define PE_HEAP_RESERVE 0x100000ULL
-#define PE_HEAP_COMMIT 0x1000ULL
-#define PE_DLL_CHARS \
- (IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA | \
- IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE | \
- IMAGE_DLLCHARACTERISTICS_NX_COMPAT | \
- IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE)
-
-/* PE32+ DOS-stub-to-PE-signature offsets (manual, since we marshal
- * field-by-field rather than memcpy'ing the packed struct). */
-#define PE_DOS_HDR_SIZE COFF_DOS_HEADER_SIZE
-#define PE_SIG_SIZE 4u
-#define PE_FILE_HDR_SIZE COFF_FILE_HEADER_SIZE
-#define PE_SECTION_HDR_SIZE COFF_SECTION_HEADER_SIZE
-
-/* Standard PE output buckets, plus .idata (import directory) and
- * .reloc — both synthesised here rather than copied from input
- * sections. Order matters: it's the on-image RVA order. */
-typedef enum CoffBucket {
- COFF_BUCKET_TEXT = 0,
- COFF_BUCKET_RDATA = 1,
- COFF_BUCKET_IDATA = 2,
- COFF_BUCKET_DATA = 3,
- COFF_BUCKET_TLS = 4,
- COFF_BUCKET_BSS = 5,
- COFF_BUCKET_RELOC = 6,
- COFF_NBUCKETS = 7,
-} CoffBucket;
-
-/* IMAGE_TLS_DIRECTORY64 wire size: u64*4 + u32*2 = 40 bytes. */
-#define COFF_TLS_DIRECTORY64_SIZE 40u
-/* Byte offsets of the four u64 VA fields within IMAGE_TLS_DIRECTORY64
- * — they need base relocations so ASLR can fix them up. */
-#define COFF_TLSDIR_OFF_START_ADDR 0u
-#define COFF_TLSDIR_OFF_END_ADDR 8u
-#define COFF_TLSDIR_OFF_INDEX_ADDR 16u
-#define COFF_TLSDIR_OFF_CALLBACKS 24u
-
-typedef struct CoffSection {
- const char* name; /* short ASCII; <= 8 bytes including NUL pad */
- u32 characteristics;
- u8* bytes; /* NULL for .bss / .reloc-before-build */
- u32 size; /* VirtualSize (real bytes; for .bss, mem size) */
- u32 size_raw; /* SizeOfRawData (file size, FileAlignment-padded) */
- u32 rva; /* VirtualAddress in image */
- u32 file_offset; /* PointerToRawData; 0 for .bss */
- u8 in_image; /* 1 if this bucket is emitted as a section */
- u8 has_file_bytes; /* 0 for .bss */
- u8 pad[2];
-} CoffSection;
-
-/* ---- byte writer helpers ---- */
-
-static void coff_write_zeroes(Writer* w, u64 n) {
- static const u8 zeroes[256] = {0};
- while (n) {
- u64 step = n > sizeof(zeroes) ? sizeof(zeroes) : n;
- cfree_writer_write(w, zeroes, (size_t)step);
- n -= step;
- }
-}
-
-/* Return the COFF bucket for a kept LinkSection. SF_TLS sections route
- * into the dedicated .tls bucket so SECREL relocations from TLS access
- * code resolve against the merged TLS image, not against .data.
- * Everything else partitions on SF_EXEC / SF_WRITE plus the SSEM_NOBITS
- * bit for .bss. */
-static CoffBucket coff_bucket_for(const LinkSection* ls) {
- if (ls->flags & SF_EXEC) return COFF_BUCKET_TEXT;
- if (ls->flags & SF_TLS) return COFF_BUCKET_TLS;
- if (ls->sem == SSEM_NOBITS) return COFF_BUCKET_BSS;
- if (ls->flags & SF_WRITE) return COFF_BUCKET_DATA;
- return COFF_BUCKET_RDATA;
-}
-
-/* True for relocation kinds that need an entry in .reloc so the OS
- * loader can patch the site after ASLR picks a runtime ImageBase.
- * PC-relative fixups don't need base-relocs — the displacement is
- * load-invariant. */
-static int coff_reloc_needs_base_reloc(RelocKind k) {
- return k == R_ABS64 || k == R_ABS32;
-}
-
-/* Look up the LinkSection whose [vaddr, vaddr+size] range covers the
- * given image-relative address `v`, or return NULL. Used to attribute
- * symbol vaddrs to a containing section so we can apply per-section
- * vaddr deltas after re-laying out for PE. */
-static const LinkSection* coff_section_at(const LinkImage* img, u64 v) {
- u32 i;
- for (i = 0; i < img->nsections; ++i) {
- const LinkSection* ls = &img->sections[i];
- if (v >= ls->vaddr && v <= ls->vaddr + ls->size) return ls;
- }
- return NULL;
-}
-
-/* Per-input-section delta map. Indexed by `LinkSection.id - 1`.
- * Populated by coff_build_buckets. Consumed by every subsequent pass
- * that needs to translate input-coordinate offsets (the world that
- * img->sections / img->relocs live in) into PE-coordinate ones (where
- * the writer plants bytes). delta is stored explicitly so callers
- * avoid recomputing (new_rva + bucket.rva - old_vaddr) for every
- * LinkRelocApply whose link_section_id points at the section. */
-typedef struct CoffSecMap {
- u32 new_rva; /* image-relative RVA after PE relayout */
- u32 new_file_off; /* file offset of the patched byte */
- i64 delta; /* new_rva - old_vaddr */
- u8 bucket;
- u8 pad[3];
-} CoffSecMap;
-
-/* TLS directory placement state. Populated when at least one SF_TLS
- * section survives dead-strip; consumed by the optional-header writer,
- * the .reloc builder (base-relocs for the four absolute VA fields),
- * and the .rdata emit pass that writes the final 40-byte record. */
-typedef struct CoffTlsLayout {
- int present; /* 1 iff at least one TLS section was kept */
- u32 dir_rdata_off; /* byte offset of the IMAGE_TLS_DIRECTORY64 within .rdata */
- u32 tls_size; /* size of the merged .tls bucket */
- LinkSymId tls_index_sym; /* resolved _tls_index LinkSymbol */
- LinkSymId callbacks_sym; /* __xl_a when mingw's TLS callbacks are linked */
- u64 callbacks_addend; /* mingw points past the leading NULL sentinel */
-} CoffTlsLayout;
-
-static LinkSymId coff_find_sym(LinkImage* img, const char* name) {
- Sym sym = pool_intern_slice(img->c->global, slice_from_cstr(name));
- u32 n = LinkSyms_count(&img->syms);
- u32 i;
- for (i = 0; i < n; ++i) {
- const LinkSymbol* s = LinkSyms_at(&img->syms, i);
- if (s->name == sym) return (LinkSymId)(i + 1);
- }
- return LINK_SYM_NONE;
-}
-
-/* Locate _tls_index by name in the resolved symbol table. mingw's
- * libmingwex defines it (as part of tlsmcrt); without a CRT the link
- * fails here with a clear message rather than producing a TLS
- * directory pointing at a stale address. */
-static LinkSymId coff_find_tls_index_sym(LinkImage* img) {
- return coff_find_sym(img, "_tls_index");
-}
-
-static const LinkSection* coff_symbol_section(const LinkImage* img,
- const LinkSymbol* s) {
- if (s->name) {
- Slice nm_s = pool_slice(img->c->global, s->name);
- const char* nm = nm_s.s;
- size_t n = nm_s.len;
- const char* sec_name = NULL;
- if (nm && n == 6 && memcmp(nm, "__xd_a", 6) == 0)
- sec_name = ".CRT$XDA";
- else if (nm && n == 6 && memcmp(nm, "__xd_z", 6) == 0)
- sec_name = ".CRT$XDZ";
- else if (nm && n == 6 && memcmp(nm, "__xl_a", 6) == 0)
- sec_name = ".CRT$XLA";
- else if (nm && n == 6 && memcmp(nm, "__xl_c", 6) == 0)
- sec_name = ".CRT$XLC";
- else if (nm && n == 6 && memcmp(nm, "__xl_d", 6) == 0)
- sec_name = ".CRT$XLD";
- else if (nm && n == 6 && memcmp(nm, "__xl_z", 6) == 0)
- sec_name = ".CRT$XLZ";
- if (sec_name) {
- u32 i;
- for (i = 0; i < img->nsections; ++i) {
- const LinkSection* ls = &img->sections[i];
- if (ls->name &&
- slice_eq_cstr(pool_slice(img->c->global, ls->name), sec_name))
- return ls;
- }
- }
- }
- if (s->section_id != LINK_SEC_NONE && s->section_id <= img->nsections)
- return &img->sections[s->section_id - 1];
- return coff_section_at(img, s->vaddr);
-}
-
-static u64 coff_symbol_final_va(const LinkImage* img,
- const CoffSection out[COFF_NBUCKETS],
- const CoffSecMap* map,
- LinkSymId id,
- const char* what) {
- const LinkSymbol* s = LinkSyms_at(&img->syms, id - 1);
- if (!s->defined || s->kind == SK_ABS) {
- compiler_panic(img->c, no_loc(),
- "link_emit_coff: `%.*s` is not a defined section-bound "
- "symbol",
- SLICE_ARG(slice_from_cstr(what)));
- }
- const LinkSection* sec = coff_symbol_section(img, s);
- if (!sec) {
- compiler_panic(img->c, no_loc(),
- "link_emit_coff: `%.*s` has no containing section",
- SLICE_ARG(slice_from_cstr(what)));
- }
- u8 b = map[sec->id - 1].bucket;
- return PE_IMAGE_BASE + (u64)out[b].rva +
- (u64)map[sec->id - 1].new_rva + (s->vaddr - sec->vaddr);
-}
-
-/* Reserve 40 bytes at the tail of the .rdata bucket for the
- * IMAGE_TLS_DIRECTORY64 record. Records the offset for later emit and
- * grows the bucket if needed. The bytes start zeroed; coff_emit_tls_dir
- * fills them in once final RVAs are known. */
-static void coff_plan_tls_layout(LinkImage* img,
- CoffSection out[COFF_NBUCKETS],
- u32* rdata_cap, CoffTlsLayout* tls) {
- memset(tls, 0, sizeof(*tls));
- if (out[COFF_BUCKET_TLS].size == 0) return;
- tls->present = 1;
- tls->tls_size = out[COFF_BUCKET_TLS].size;
- tls->tls_index_sym = coff_find_tls_index_sym(img);
- if (tls->tls_index_sym == LINK_SYM_NONE) {
- compiler_panic(img->c, no_loc(),
- "link_emit_coff: .tls section requires `_tls_index` "
- "(provided by mingw libmingwex / tlsmcrt.o) — none of "
- "the linked inputs define it");
- }
- /* IMAGE_TLS_DIRECTORY64 needs 8-byte alignment for its u64 fields;
- * round the .rdata size up before reserving the 40-byte record. */
- tls->callbacks_sym = coff_find_sym(img, "__xl_a");
- if (tls->callbacks_sym != LINK_SYM_NONE) {
- tls->callbacks_addend = 8;
- } else {
- tls->callbacks_sym = coff_find_sym(img, "__xl_c");
- tls->callbacks_addend = 0;
- }
- u32 rdata_size = (u32)ALIGN_UP((u64)out[COFF_BUCKET_RDATA].size, 8ull);
- u32 need = rdata_size + COFF_TLS_DIRECTORY64_SIZE;
- if (need > *rdata_cap) {
- (void)VEC_GROW(img->heap, out[COFF_BUCKET_RDATA].bytes, *rdata_cap, need);
- }
- /* Zero any padding bytes introduced by the alignment bump and the
- * directory slot itself. */
- if (rdata_size > out[COFF_BUCKET_RDATA].size) {
- memset(out[COFF_BUCKET_RDATA].bytes + out[COFF_BUCKET_RDATA].size, 0,
- rdata_size - out[COFF_BUCKET_RDATA].size);
- }
- memset(out[COFF_BUCKET_RDATA].bytes + rdata_size, 0,
- COFF_TLS_DIRECTORY64_SIZE);
- tls->dir_rdata_off = rdata_size;
- out[COFF_BUCKET_RDATA].size = need;
-}
-
-/* Write the IMAGE_TLS_DIRECTORY64 bytes once all bucket RVAs are
- * final. Each u64 VA field gets ImageBase + RVA; the base-reloc pass
- * will emit IMAGE_REL_BASED_DIR64 entries so ASLR keeps them valid. */
-static void coff_emit_tls_dir(const LinkImage* img,
- const CoffSection out[COFF_NBUCKETS],
- const CoffSecMap* map,
- const CoffTlsLayout* tls) {
- if (!tls->present) return;
- u64 tls_start = PE_IMAGE_BASE + (u64)out[COFF_BUCKET_TLS].rva;
- u64 tls_end = tls_start + (u64)tls->tls_size;
- u64 idx_vaddr =
- coff_symbol_final_va(img, out, map, tls->tls_index_sym, "_tls_index");
- const char* callbacks_name =
- tls->callbacks_addend ? "__xl_a" : "__xl_c";
- u64 callbacks_vaddr =
- tls->callbacks_sym
- ? coff_symbol_final_va(img, out, map, tls->callbacks_sym,
- callbacks_name) +
- tls->callbacks_addend
- : 0;
-
- u8* p = out[COFF_BUCKET_RDATA].bytes + tls->dir_rdata_off;
- wr_u64_le(p + COFF_TLSDIR_OFF_START_ADDR, tls_start);
- wr_u64_le(p + COFF_TLSDIR_OFF_END_ADDR, tls_end);
- wr_u64_le(p + COFF_TLSDIR_OFF_INDEX_ADDR, idx_vaddr);
- wr_u64_le(p + COFF_TLSDIR_OFF_CALLBACKS, callbacks_vaddr);
- wr_u32_le(p + 32, 0); /* SizeOfZeroFill */
- wr_u32_le(p + 36, 0); /* Characteristics */
-}
-
-static void coff_define_tls_used(LinkImage* img,
- const CoffSection out[COFF_NBUCKETS],
- const CoffTlsLayout* tls) {
- if (!tls->present) return;
- if (!img->linker) return;
- link_emit_boundary_sym(img->linker, img, "_tls_used",
- PE_IMAGE_BASE + (u64)out[COFF_BUCKET_RDATA].rva +
- (u64)tls->dir_rdata_off);
-}
-
-/* ---- import-table synthesis (Phase 3.2) ---------------------------
- *
- * Per doc/WINDOWS.md §3.2: every LinkSymbol with `imported = 1` gets
- * routed through an IAT slot synthesized in `.idata`. Function
- * imports additionally receive a small per-arch stub in `.text`
- * (`ff 25 disp32` on x64 / `adrp;ldr;br` on aa64) so a direct CALL26
- * or PC32 against the symbol lands on a stub that indirects through
- * the IAT. Data imports skip the stub — the symbol's final vaddr is
- * just the IAT slot vaddr, and code-gen emits a `mov rax, [slot]`
- * sequence the same way it would for any other GOT-style load.
- *
- * cfree's COFF code-gen uses direct symbol references; there is no
- * separate `__imp_<name>` LinkSymbol consulted at link time. The
- * IAT-slot rewrite happens entirely by overriding the imported
- * symbol's vaddr in apply_all_relocs. */
-
-typedef struct CoffImport {
- LinkSymId sym; /* canonical LinkSymId from img->syms */
- u32 dll_idx; /* index into CoffImportTable.dlls */
- u32 stub_off; /* offset in .text bucket (functions only) */
- u32 iat_off; /* offset in .idata IAT block */
- u32 ilt_off; /* offset in .idata ILT block */
- u32 hint_off; /* offset in .idata hint/name table */
- u8 is_func;
- u8 pad[3];
-} CoffImport;
-
-typedef struct CoffImportDll {
- Sym soname;
- u32 first; /* index of first import in CoffImportTable.imports */
- u32 count;
- u32 ilt_off; /* offset of this DLL's ILT block in .idata */
- u32 iat_off; /* offset of this DLL's IAT block in .idata */
- u32 name_off; /* offset of DLL name string in .idata */
-} CoffImportDll;
-
-typedef struct CoffImportTable {
- CoffImport* imports;
- u32 nimports;
- u32 imports_cap; /* heap-allocation size for cleanup */
- u32 nfunc_imports; /* subset of nimports that needs a .text stub */
- CoffImportDll* dlls;
- u32 ndlls;
- u32 dlls_cap; /* heap-allocation size for cleanup */
- /* Offsets within .idata of the five sub-blocks. Filled in by
- * coff_plan_idata_layout once nimports / ndlls is known. */
- u32 desc_off; /* always 0 — descriptors come first */
- u32 desc_size;
- u32 ilt_base;
- u32 ilt_total;
- u32 iat_base;
- u32 iat_total;
- u32 hint_base;
- u32 hint_total;
- u32 name_base;
- u32 name_total;
- u32 idata_size;
- /* Stub region in .text bucket. Stubs are appended after every
- * input .text section has been bucketed. stub_text_off is the
- * bucket-local offset of the first stub; per-import stub offsets
- * are stored in CoffImport.stub_off. */
- u32 stub_text_off;
- u32 stub_total;
-} CoffImportTable;
-
-/* Sort comparator: imports grouped by DLL slot, stable on input
- * order within a DLL (sort is stable enough via secondary key). */
-static int coff_import_cmp(const void* a, const void* b) {
- const CoffImport* ia = (const CoffImport*)a;
- const CoffImport* ib = (const CoffImport*)b;
- if (ia->dll_idx < ib->dll_idx) return -1;
- if (ia->dll_idx > ib->dll_idx) return 1;
- /* Secondary: LinkSymId so the order is reproducible. */
- if (ia->sym < ib->sym) return -1;
- if (ia->sym > ib->sym) return 1;
- return 0;
-}
-
-static const char* coff_import_lookup_name(Compiler* c, const LinkSymbol* s,
- size_t* nlen_out) {
- Slice nm_s = s->name ? pool_slice(c->global, s->name) : SLICE_NULL;
- const char* nm = nm_s.s;
- size_t nlen = nm_s.len;
- static const char kImpPrefix[] = "__imp_";
- const size_t kImpPrefixLen = sizeof(kImpPrefix) - 1u;
- if (nm && nlen > kImpPrefixLen &&
- memcmp(nm, kImpPrefix, kImpPrefixLen) == 0) {
- nm += kImpPrefixLen;
- nlen -= kImpPrefixLen;
- }
- if (nlen_out) *nlen_out = nlen;
- return nm;
-}
-
-/* True iff the import classifies as function-like. Mirrors the ELF
- * `sym_is_func_import` heuristic: if the canonical kind is known
- * we trust it, otherwise we default to function (which matches the
- * COFF code-gen contract — direct calls are by far the common case
- * and a data import wrongly stubbed would still fail loudly via the
- * IAT-routed call). */
-static int coff_import_is_func(Compiler* c, const LinkSymbol* s) {
- if (s->name) {
- Slice nm_s = pool_slice(c->global, s->name);
- const char* nm = nm_s.s;
- size_t nlen = nm_s.len;
- if (nm && nlen > 6u && memcmp(nm, "__imp_", 6u) == 0) return 0;
- }
- if (s->kind == SK_FUNC || s->kind == SK_IFUNC) return 1;
- if (s->kind == SK_OBJ) return 0;
- /* SK_UNDEF / SK_NOTYPE: assume function (the common case). */
- return 1;
-}
-
-/* Walk LinkSyms, collect imports, group by DLL soname. Returns 1 if
- * any imports were collected, 0 otherwise (caller skips the entire
- * .idata path). */
-static int coff_collect_imports(LinkImage* img, CoffImportTable* it) {
- Heap* heap = img->heap;
- Compiler* c = img->c;
- Linker* l = img->linker;
- u32 nsyms = LinkSyms_count(&img->syms);
- u32 imp_cap = 0;
- u32 dll_cap = 0;
- u32 i;
-
- memset(it, 0, sizeof(*it));
- if (!l) return 0;
- for (i = 0; i < nsyms; ++i) {
- LinkSymbol* s = LinkSyms_at(&img->syms, i);
- LinkInput* in;
- u32 dll_idx = (u32)-1;
- u32 d;
- if (!s->imported) continue;
- if (s->name == 0) continue;
- if (s->dso_input_id == LINK_INPUT_NONE) {
- compiler_panic(c, no_loc(),
- "link_emit_coff: imported symbol has no providing DSO");
- }
- /* img->globals only carries defined globals/weaks; imported undefs
- * never land there. Dedup by name: skip if any earlier slot
- * already collected this name. */
- {
- int dup = 0;
- for (u32 k = 0; k < it->nimports; ++k) {
- LinkSymbol* prev = LinkSyms_at(&img->syms, it->imports[k].sym - 1);
- if (prev->name == s->name) { dup = 1; break; }
- }
- if (dup) continue;
- }
- if (s->dso_input_id - 1u >= LinkInputs_count(&l->inputs)) {
- compiler_panic(c, no_loc(),
- "link_emit_coff: import dso_input_id out of range");
- }
- in = LinkInputs_at(&l->inputs, s->dso_input_id - 1u);
- if (in->soname == 0) {
- compiler_panic(c, no_loc(),
- "link_emit_coff: providing DSO has no soname; cannot "
- "emit IMAGE_IMPORT_DESCRIPTOR.Name");
- }
- /* Find-or-add the DLL slot. */
- for (d = 0; d < it->ndlls; ++d) {
- if (it->dlls[d].soname == in->soname) { dll_idx = d; break; }
- }
- if (dll_idx == (u32)-1) {
- if (VEC_GROW(heap, it->dlls, dll_cap, it->ndlls + 1u))
- compiler_panic(c, no_loc(), "link_emit_coff: oom on import dlls");
- dll_idx = it->ndlls++;
- memset(&it->dlls[dll_idx], 0, sizeof(it->dlls[dll_idx]));
- it->dlls[dll_idx].soname = in->soname;
- }
- if (VEC_GROW(heap, it->imports, imp_cap, it->nimports + 1u))
- compiler_panic(c, no_loc(), "link_emit_coff: oom on imports");
- memset(&it->imports[it->nimports], 0,
- sizeof(it->imports[it->nimports]));
- it->imports[it->nimports].sym = s->id;
- it->imports[it->nimports].dll_idx = dll_idx;
- it->imports[it->nimports].is_func = (u8)coff_import_is_func(c, s);
- if (it->imports[it->nimports].is_func) ++it->nfunc_imports;
- ++it->nimports;
- it->dlls[dll_idx].count++;
- }
- if (it->nimports == 0) return 0;
- /* Re-bucket the imports array so each DLL's run is contiguous. */
- qsort(it->imports, it->nimports, sizeof(*it->imports), coff_import_cmp);
- /* Fix up CoffImportDll.first now that imports[] is sorted. */
- {
- u32 cur = 0;
- for (u32 d = 0; d < it->ndlls; ++d) {
- it->dlls[d].first = cur;
- cur += it->dlls[d].count;
- }
- }
- it->imports_cap = imp_cap;
- it->dlls_cap = dll_cap;
- return 1;
-}
-
-static void coff_imports_free(LinkImage* img, CoffImportTable* it) {
- Heap* heap = img->heap;
- if (it->imports) {
- heap->free(heap, it->imports,
- (size_t)it->imports_cap * sizeof(*it->imports));
- }
- if (it->dlls) {
- heap->free(heap, it->dlls,
- (size_t)it->dlls_cap * sizeof(*it->dlls));
- }
-}
-
-/* Compute every per-block / per-import offset inside .idata and the
- * total .idata size in bytes. Also assigns per-import hint/name and
- * dll-name offsets so the descriptor table can reference them by RVA
- * later (RVAs need the bucket's final RVA, added in coff_emit_idata). */
-static void coff_plan_idata_layout(LinkImage* img, CoffImportTable* it) {
- Compiler* c = img->c;
- u32 off;
-
- /* Block 1: import descriptors (one per DLL + zero terminator). */
- it->desc_off = 0;
- it->desc_size = (it->ndlls + 1u) * COFF_IMPORT_DESCRIPTOR_SIZE;
- off = (u32)ALIGN_UP((u64)it->desc_size, (u64)PE_IDATA_BLOCK_ALIGN);
-
- /* Block 2: ILTs. Per DLL: count entries + 1 (terminator), 8 B each. */
- it->ilt_base = off;
- for (u32 d = 0; d < it->ndlls; ++d) {
- it->dlls[d].ilt_off = off;
- /* Per-import: assign ilt_off within this DLL's block. */
- for (u32 k = 0; k < it->dlls[d].count; ++k) {
- it->imports[it->dlls[d].first + k].ilt_off =
- off + k * (u32)COFF_THUNK_DATA64_SIZE;
- }
- off += (it->dlls[d].count + 1u) * (u32)COFF_THUNK_DATA64_SIZE;
- }
- it->ilt_total = off - it->ilt_base;
- off = (u32)ALIGN_UP((u64)off, (u64)PE_IDATA_BLOCK_ALIGN);
-
- /* Block 3: IATs (same shape as ILTs). */
- it->iat_base = off;
- for (u32 d = 0; d < it->ndlls; ++d) {
- it->dlls[d].iat_off = off;
- for (u32 k = 0; k < it->dlls[d].count; ++k) {
- it->imports[it->dlls[d].first + k].iat_off =
- off + k * (u32)COFF_THUNK_DATA64_SIZE;
- }
- off += (it->dlls[d].count + 1u) * (u32)COFF_THUNK_DATA64_SIZE;
- }
- it->iat_total = off - it->iat_base;
- off = (u32)ALIGN_UP((u64)off, (u64)PE_IDATA_BLOCK_ALIGN);
-
- /* Block 4: hint/name records. Each: u16 hint + NUL-term name +
- * 1-byte pad if the resulting size is odd (PE/COFF spec). */
- it->hint_base = off;
- for (u32 i = 0; i < it->nimports; ++i) {
- LinkSymbol* s = LinkSyms_at(&img->syms, it->imports[i].sym - 1);
- size_t nlen = 0;
- const char* nm = coff_import_lookup_name(c, s, &nlen);
- if (!nm || nlen == 0)
- compiler_panic(c, no_loc(),
- "link_emit_coff: imported symbol has empty name");
- it->imports[i].hint_off = off;
- /* hint (2 B) + name (nlen + 1) + optional pad to even. */
- u32 rec = 2u + (u32)nlen + 1u;
- if (rec & 1u) ++rec;
- off += rec;
- }
- it->hint_total = off - it->hint_base;
- off = (u32)ALIGN_UP((u64)off, (u64)PE_IDATA_BLOCK_ALIGN);
-
- /* Block 5: DLL name strings (NUL-terminated). */
- it->name_base = off;
- for (u32 d = 0; d < it->ndlls; ++d) {
- Slice nm_s = pool_slice(c->global, it->dlls[d].soname);
- const char* nm = nm_s.s;
- size_t nlen = nm_s.len;
- if (!nm || nlen == 0)
- compiler_panic(c, no_loc(),
- "link_emit_coff: providing DSO has empty soname");
- it->dlls[d].name_off = off;
- off += (u32)nlen + 1u;
- }
- it->name_total = off - it->name_base;
- it->idata_size = off;
-}
-
-/* Append the function-import stubs to the .text bucket. Each stub is
- * `coff_stub_size` bytes (arch-specific). Records each stub's bucket-
- * local offset on the matching CoffImport so the per-symbol stub vaddr
- * can be computed once the .text bucket's RVA is final. */
-static void coff_append_stubs(LinkImage* img, CoffImportTable* it,
- CoffSection* text_bucket,
- u32* text_bucket_cap) {
- Heap* heap = img->heap;
- Compiler* c = img->c;
- const LinkArchDesc* arch = link_arch_desc_for(c);
- u32 stub_size;
- u32 stub_align;
- u64 cur;
- if (!arch || arch->coff_stub_size == 0 || !arch->emit_coff_iat_stub) {
- compiler_panic(c, no_loc(),
- "link_emit_coff: arch has no COFF IAT stub emitter");
- }
- stub_size = arch->coff_stub_size;
- /* Stubs are pure code; aligning to instruction alignment is enough.
- * x64 wants byte-granular, aa64 wants 4 B; align to stub size as a
- * convenient upper bound. */
- stub_align = stub_size;
- cur = (u64)text_bucket->size;
- cur = ALIGN_UP(cur, (u64)stub_align);
- it->stub_text_off = (u32)cur;
- for (u32 i = 0; i < it->nimports; ++i) {
- if (!it->imports[i].is_func) continue;
- it->imports[i].stub_off = (u32)cur;
- cur += stub_size;
- }
- it->stub_total = (u32)cur - it->stub_text_off;
- if (it->stub_total == 0) return;
- /* Grow the .text bucket buffer to hold the new region. */
- u32 need = (u32)cur;
- if (need > *text_bucket_cap) {
- (void)VEC_GROW(heap, text_bucket->bytes, *text_bucket_cap, need);
- }
- /* Zero the alignment pad; stub bytes are written later by
- * coff_emit_stubs once vaddrs are known. */
- if ((u32)cur > text_bucket->size) {
- memset(text_bucket->bytes + text_bucket->size, 0,
- (size_t)((u32)cur - text_bucket->size));
- }
- text_bucket->size = (u32)cur;
-}
-
-/* Emit each function import's IAT stub into the .text bucket. Must
- * run after coff_assign_layout has fixed both .text's RVA and
- * .idata's RVA, since the stub bakes in the post-shift IAT slot
- * displacement. */
-static void coff_emit_stubs(LinkImage* img, const CoffImportTable* it,
- const CoffSection out[COFF_NBUCKETS]) {
- Compiler* c = img->c;
- const LinkArchDesc* arch = link_arch_desc_for(c);
- u64 img_base = PE_IMAGE_BASE;
- u32 text_rva = out[COFF_BUCKET_TEXT].rva;
- u32 idata_rva = out[COFF_BUCKET_IDATA].rva;
- if (!arch || !arch->emit_coff_iat_stub) {
- compiler_panic(c, no_loc(),
- "link_emit_coff: arch has no COFF IAT stub emitter");
- }
- for (u32 i = 0; i < it->nimports; ++i) {
- u64 stub_va, slot_va;
- if (!it->imports[i].is_func) continue;
- stub_va = img_base + (u64)text_rva + (u64)it->imports[i].stub_off;
- slot_va = img_base + (u64)idata_rva + (u64)it->imports[i].iat_off;
- arch->emit_coff_iat_stub(out[COFF_BUCKET_TEXT].bytes +
- it->imports[i].stub_off,
- stub_va, slot_va);
- }
-}
-
-/* Emit .idata content into the bucket buffer. Allocates the buffer
- * here (size is already known from coff_plan_idata_layout). */
-static void coff_emit_idata(LinkImage* img, const CoffImportTable* it,
- CoffSection out[COFF_NBUCKETS],
- u32* idata_bucket_cap) {
- Heap* heap = img->heap;
- Compiler* c = img->c;
- CoffSection* idata = &out[COFF_BUCKET_IDATA];
- u32 idata_rva = idata->rva;
- u8* buf;
- /* Allocate the bucket buffer (idata_size is already block-aligned). */
- buf = (u8*)heap->alloc(heap, it->idata_size, _Alignof(u64));
- if (!buf)
- compiler_panic(c, no_loc(), "link_emit_coff: oom on .idata buffer");
- memset(buf, 0, it->idata_size);
- idata->bytes = buf;
- idata->size = it->idata_size;
- *idata_bucket_cap = it->idata_size;
-
- /* Block 1: IMAGE_IMPORT_DESCRIPTOR table. */
- for (u32 d = 0; d < it->ndlls; ++d) {
- u8* p = buf + d * (u32)COFF_IMPORT_DESCRIPTOR_SIZE;
- u32 ilt_rva = idata_rva + it->dlls[d].ilt_off;
- u32 iat_rva = idata_rva + it->dlls[d].iat_off;
- u32 name_rva = idata_rva + it->dlls[d].name_off;
- wr_u32_le(p + 0, ilt_rva); /* OriginalFirstThunk */
- wr_u32_le(p + 4, 0u); /* TimeDateStamp */
- wr_u32_le(p + 8, 0u); /* ForwarderChain */
- wr_u32_le(p + 12, name_rva); /* Name */
- wr_u32_le(p + 16, iat_rva); /* FirstThunk */
- }
- /* Trailing zero descriptor already zero-filled by memset. */
-
- /* Blocks 2+3: ILT + IAT. Both initially point at the same hint/name
- * record for each import; the OS loader rewrites IAT entries at
- * load time. */
- for (u32 i = 0; i < it->nimports; ++i) {
- u64 hint_rva = (u64)idata_rva + (u64)it->imports[i].hint_off;
- wr_u64_le(buf + it->imports[i].ilt_off, hint_rva);
- wr_u64_le(buf + it->imports[i].iat_off, hint_rva);
- }
- /* Per-DLL ILT/IAT terminators are u64 0, already zero-filled. */
-
- /* Block 4: hint/name records. */
- for (u32 i = 0; i < it->nimports; ++i) {
- LinkSymbol* s = LinkSyms_at(&img->syms, it->imports[i].sym - 1);
- size_t nlen = 0;
- const char* nm = coff_import_lookup_name(c, s, &nlen);
- u8* p = buf + it->imports[i].hint_off;
- wr_u16_le(p, PE_IMPORT_HINT_NONE);
- memcpy(p + 2, nm, nlen);
- /* NUL terminator + optional pad already zero. */
- }
-
- /* Block 5: DLL name strings. */
- for (u32 d = 0; d < it->ndlls; ++d) {
- Slice nm_s = pool_slice(c->global, it->dlls[d].soname);
- const char* nm = nm_s.s;
- size_t nlen = nm_s.len;
- memcpy(buf + it->dlls[d].name_off, nm, nlen);
- /* NUL already zero. */
- }
-}
-
-/* Per-LinkSymId vaddr override table for imports. Indexed by
- * LinkSymId-1; 0 means "not an import". Built once after the .idata
- * bucket RVA is final. Consumed by coff_apply_all_relocs in lieu of
- * the symbol's own vaddr field (which is 0 for imports). */
-typedef struct CoffImportVaddr {
- u64* by_sym; /* size = nsyms; 0 entries mean "not imported" */
- u32 nsyms;
-} CoffImportVaddr;
-
-static void coff_import_vaddr_build(LinkImage* img, const CoffImportTable* it,
- const CoffSection out[COFF_NBUCKETS],
- CoffImportVaddr* iv) {
- Heap* heap = img->heap;
- u64 img_base = PE_IMAGE_BASE;
- u32 text_rva = out[COFF_BUCKET_TEXT].rva;
- u32 idata_rva = out[COFF_BUCKET_IDATA].rva;
- iv->nsyms = LinkSyms_count(&img->syms);
- iv->by_sym = (u64*)heap->alloc(heap, sizeof(u64) * (size_t)(iv->nsyms + 1u),
- _Alignof(u64));
- if (!iv->by_sym)
- compiler_panic(img->c, no_loc(),
- "link_emit_coff: oom on import vaddr table");
- memset(iv->by_sym, 0, sizeof(u64) * (size_t)(iv->nsyms + 1u));
- for (u32 i = 0; i < it->nimports; ++i) {
- LinkSymId sid = it->imports[i].sym;
- u64 va;
- if (it->imports[i].is_func) {
- va = img_base + (u64)text_rva + (u64)it->imports[i].stub_off;
- } else {
- va = img_base + (u64)idata_rva + (u64)it->imports[i].iat_off;
- }
- iv->by_sym[sid - 1u] = va;
- /* Fan out across every shadow LinkSymId with the same name so a
- * per-input undef reference resolves to the same import slot. */
- {
- LinkSymbol* canonical = LinkSyms_at(&img->syms, sid - 1u);
- for (u32 j = 0; j < iv->nsyms; ++j) {
- LinkSymbol* s = LinkSyms_at(&img->syms, j);
- if (s->name == canonical->name && s->imported) {
- iv->by_sym[s->id - 1u] = va;
- }
- }
- }
- }
-}
-
-static void coff_import_vaddr_free(LinkImage* img, CoffImportVaddr* iv) {
- Heap* heap = img->heap;
- if (iv->by_sym) {
- heap->free(heap, iv->by_sym,
- sizeof(u64) * (size_t)(iv->nsyms + 1u));
- }
-}
-
-/* Resolve Compiler.target.arch -> IMAGE_FILE_MACHINE_* via the per-arch
- * coff ops table. Panic if the arch has no COFF descriptor or the
- * machine value is one cfree doesn't ship (Phase 1 supports AMD64 and
- * ARM64 only). */
-static u16 coff_machine_or_panic(Compiler* c) {
- const ArchImpl* arch = arch_for_compiler(c);
- u16 m;
- if (!arch || !arch->coff)
- compiler_panic(c, no_loc(), "link_emit_coff: no COFF arch descriptor");
- m = arch->coff->machine;
- if (m != IMAGE_FILE_MACHINE_AMD64 && m != IMAGE_FILE_MACHINE_ARM64)
- compiler_panic(c, no_loc(),
- "link_emit_coff: unsupported machine 0x%x", (unsigned)m);
- return m;
-}
-
-static int coff_section_name_starts(Compiler* c, const LinkSection* ls,
- const char* prefix) {
- size_t pn = slice_from_cstr(prefix).len;
- Slice s_s = ls->name ? pool_slice(c->global, ls->name) : SLICE_NULL;
- const char* s = s_s.s;
- size_t n = s_s.len;
- return s && n >= pn && memcmp(s, prefix, pn) == 0;
-}
-
-static int coff_section_name_cmp(Compiler* c, const LinkSection* a,
- const LinkSection* b) {
- Slice as_s = a->name ? pool_slice(c->global, a->name) : SLICE_NULL;
- Slice bs_s = b->name ? pool_slice(c->global, b->name) : SLICE_NULL;
- const char* as = as_s.s ? as_s.s : "";
- const char* bs = bs_s.s ? bs_s.s : "";
- size_t an = as_s.len, bn = bs_s.len;
- size_t n = an < bn ? an : bn;
- int cmp = n ? memcmp(as, bs, n) : 0;
- if (cmp) return cmp;
- if (an < bn) return -1;
- if (an > bn) return 1;
- if (a->id < b->id) return -1;
- if (a->id > b->id) return 1;
- return 0;
-}
-
-static void coff_place_section(LinkImage* img, CoffSection out[COFF_NBUCKETS],
- CoffSecMap* map, u64 bucket_cur[COFF_NBUCKETS],
- u32 bucket_cap[COFF_NBUCKETS],
- const LinkSection* ls) {
- Heap* heap = img->heap;
- CoffBucket b2 = coff_bucket_for(ls);
- u32 align = ls->align ? ls->align : 1u;
- u64 cur = bucket_cur[b2];
- cur = ALIGN_UP(cur, (u64)align);
- map[ls->id - 1].bucket = (u8)b2;
- /* Record the bucket-local offset; the absolute RVA / file offset
- * are filled in after bucket placement (RVAs need
- * SectionAlignment, file offsets need FileAlignment). */
- map[ls->id - 1].new_rva = (u32)cur;
- if (b2 != COFF_BUCKET_BSS) {
- /* Copy bytes from the source segment buffer into the bucket. */
- if (ls->size) {
- u32 need = (u32)(cur + ls->size);
- if (need > bucket_cap[b2]) {
- (void)VEC_GROW(heap, out[b2].bytes, bucket_cap[b2], need);
- }
- memset(out[b2].bytes + bucket_cur[b2], 0,
- (size_t)(cur - bucket_cur[b2]));
- if (ls->sem != SSEM_NOBITS) {
- const LinkSegment* seg = &img->segments[ls->segment_id - 1];
- const u8* src = img->segment_bytes[seg->id - 1] +
- (size_t)(ls->file_offset - seg->file_offset);
- memcpy(out[b2].bytes + cur, src, (size_t)ls->size);
- } else {
- memset(out[b2].bytes + cur, 0, (size_t)ls->size);
- }
- }
- }
- cur += ls->size;
- bucket_cur[b2] = cur;
- out[b2].size = (u32)cur;
-}
-
-static void coff_insert_sorted_section(Compiler* c, const LinkSection** a,
- u32* n, const LinkSection* ls) {
- u32 i = *n;
- while (i > 0 && coff_section_name_cmp(c, ls, a[i - 1u]) < 0) {
- a[i] = a[i - 1u];
- --i;
- }
- a[i] = ls;
- *n += 1u;
-}
-
-/* ---- pass 1: bucket input sections, assemble bytes, assign deltas ----
- * CoffSecMap is defined above (alongside CoffTlsLayout) because the
- * TLS planning helpers need to consume one. */
-
-/* Build the four payload buckets (.text/.rdata/.data/.bss).
- *
- * `map[secid-1]` is populated for every kept LinkSection with the
- * section's new RVA, new file offset, the bucket it landed in, and the
- * delta to add to in-section vaddrs. Bucket buffers are
- * heap-allocated; the caller frees them after emit. */
-static void coff_build_buckets(LinkImage* img, CoffSection out[COFF_NBUCKETS],
- CoffSecMap* map) {
- Heap* heap = img->heap;
- Compiler* c = img->c;
- const LinkSection** tls_sorted = NULL;
- const LinkSection** crt_sorted = NULL;
- u32 ntls_sorted = 0;
- u32 ncrt_sorted = 0;
- u32 i, b;
-
- for (b = 0; b < COFF_NBUCKETS; ++b) {
- memset(&out[b], 0, sizeof(out[b]));
- }
- out[COFF_BUCKET_TEXT].name = ".text";
- out[COFF_BUCKET_TEXT].characteristics =
- IMAGE_SCN_CNT_CODE | IMAGE_SCN_MEM_EXECUTE | IMAGE_SCN_MEM_READ;
- out[COFF_BUCKET_TEXT].has_file_bytes = 1;
- out[COFF_BUCKET_RDATA].name = ".rdata";
- out[COFF_BUCKET_RDATA].characteristics =
- IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ;
- out[COFF_BUCKET_RDATA].has_file_bytes = 1;
- out[COFF_BUCKET_IDATA].name = ".idata";
- out[COFF_BUCKET_IDATA].characteristics =
- IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ;
- out[COFF_BUCKET_IDATA].has_file_bytes = 1;
- out[COFF_BUCKET_DATA].name = ".data";
- out[COFF_BUCKET_DATA].characteristics =
- IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ |
- IMAGE_SCN_MEM_WRITE;
- out[COFF_BUCKET_DATA].has_file_bytes = 1;
- /* The Windows loader uses .tls as a *template*: the bytes on disk
- * seed each thread's per-TLS copy at thread creation, and threads
- * write to their copies, not the template. The PE section is still
- * marked writable because that's what mingw and link.exe emit; the
- * loader special-cases it via the TLS directory. */
- out[COFF_BUCKET_TLS].name = ".tls";
- out[COFF_BUCKET_TLS].characteristics =
- IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ |
- IMAGE_SCN_MEM_WRITE;
- out[COFF_BUCKET_TLS].has_file_bytes = 1;
- out[COFF_BUCKET_BSS].name = ".bss";
- out[COFF_BUCKET_BSS].characteristics =
- IMAGE_SCN_CNT_UNINITIALIZED_DATA | IMAGE_SCN_MEM_READ |
- IMAGE_SCN_MEM_WRITE;
- out[COFF_BUCKET_BSS].has_file_bytes = 0;
- out[COFF_BUCKET_RELOC].name = ".reloc";
- out[COFF_BUCKET_RELOC].characteristics =
- IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ |
- IMAGE_SCN_MEM_DISCARDABLE;
- out[COFF_BUCKET_RELOC].has_file_bytes = 1;
-
- /* Track per-bucket cursors. Bucket sizes are bounded by the sum of
- * input section sizes plus per-section alignment padding; we grow
- * lazily via VEC_GROW. */
- u64 bucket_cur[COFF_NBUCKETS];
- u32 bucket_cap[COFF_NBUCKETS];
- for (b = 0; b < COFF_NBUCKETS; ++b) {
- bucket_cur[b] = 0;
- bucket_cap[b] = 0;
- }
-
- tls_sorted = img->nsections
- ? (const LinkSection**)heap->alloc(
- heap, sizeof(*tls_sorted) * img->nsections,
- _Alignof(const LinkSection*))
- : NULL;
- crt_sorted = img->nsections
- ? (const LinkSection**)heap->alloc(
- heap, sizeof(*crt_sorted) * img->nsections,
- _Alignof(const LinkSection*))
- : NULL;
- if (img->nsections && (!tls_sorted || !crt_sorted))
- compiler_panic(c, no_loc(), "link_emit_coff: oom sorting sections");
-
- for (i = 0; i < img->nsections; ++i) {
- const LinkSection* ls = &img->sections[i];
- if (!(ls->flags & SF_ALLOC)) continue;
- if (ls->flags & SF_TLS) {
- coff_insert_sorted_section(c, tls_sorted, &ntls_sorted, ls);
- continue;
- }
- if (coff_section_name_starts(c, ls, ".CRT$")) {
- coff_insert_sorted_section(c, crt_sorted, &ncrt_sorted, ls);
- continue;
- }
- coff_place_section(img, out, map, bucket_cur, bucket_cap, ls);
- }
-
- for (i = 0; i < ntls_sorted; ++i) {
- coff_place_section(img, out, map, bucket_cur, bucket_cap, tls_sorted[i]);
- }
- for (i = 0; i < ncrt_sorted; ++i) {
- coff_place_section(img, out, map, bucket_cur, bucket_cap, crt_sorted[i]);
- }
-
- /* Track caps so we can free with the right size later (heap->free
- * needs the original allocation size). Stash into size_raw
- * temporarily — overwritten below with the proper PE value. */
- for (b = 0; b < COFF_NBUCKETS; ++b) out[b].size_raw = bucket_cap[b];
- if (tls_sorted)
- heap->free(heap, tls_sorted, sizeof(*tls_sorted) * img->nsections);
- if (crt_sorted)
- heap->free(heap, crt_sorted, sizeof(*crt_sorted) * img->nsections);
-}
-
-/* Assign RVAs and file offsets to the buckets that participate in the
- * image. Returns the file offset at which trailing pad-to-EOF should
- * land (== file size). */
-static u64 coff_assign_layout(CoffSection out[COFF_NBUCKETS],
- u32 headers_file_size,
- u32 first_section_rva) {
- u32 rva = first_section_rva;
- u64 file = ALIGN_UP((u64)headers_file_size, (u64)PE_FILE_ALIGNMENT);
- u32 b;
- for (b = 0; b < COFF_NBUCKETS; ++b) {
- if (out[b].size == 0) {
- out[b].in_image = 0;
- out[b].rva = 0;
- out[b].file_offset = 0;
- out[b].size_raw = 0;
- continue;
- }
- out[b].in_image = 1;
- out[b].rva = (u32)ALIGN_UP((u64)rva, (u64)PE_SECTION_ALIGNMENT);
- if (out[b].has_file_bytes) {
- out[b].file_offset = (u32)file;
- out[b].size_raw = (u32)ALIGN_UP((u64)out[b].size,
- (u64)PE_FILE_ALIGNMENT);
- file += out[b].size_raw;
- } else {
- out[b].file_offset = 0;
- out[b].size_raw = 0;
- }
- rva = out[b].rva + out[b].size;
- }
- return file;
-}
-
-/* Build the .reloc bytes by grouping absolute relocs by 4-KiB page.
- * The map[] array maps LinkSectionId-1 to the per-section post-PE-relayout
- * RVA, so we can compute each reloc's site_rva = section_rva + (orig
- * write_vaddr - orig section_vaddr).
- *
- * Layout per page:
- * u32 page_rva
- * u32 size_of_block (8 + n_entries*2, padded to a multiple of 4)
- * u16 entries[]: (type << 12) | (offset & 0xfff)
- * optional trailing u16 = 0 (IMAGE_REL_BASED_ABSOLUTE) for u32 alignment */
-typedef struct CoffRelocEntry {
- u32 site_rva;
- u16 type;
- u16 pad;
-} CoffRelocEntry;
-
-static int coff_reloc_entry_cmp(const void* a, const void* b) {
- const CoffRelocEntry* ea = (const CoffRelocEntry*)a;
- const CoffRelocEntry* eb = (const CoffRelocEntry*)b;
- if (ea->site_rva < eb->site_rva) return -1;
- if (ea->site_rva > eb->site_rva) return 1;
- return 0;
-}
-
-static void coff_build_reloc_section(LinkImage* img,
- const CoffSection out[COFF_NBUCKETS],
- const CoffSecMap* map,
- CoffSection* reloc,
- const CoffRelocEntry* extras,
- u32 n_extras) {
- Heap* heap = img->heap;
- Compiler* c = img->c;
- u32 nrel = LinkRelocs_count(&img->relocs);
- CoffRelocEntry* entries = NULL;
- u32 nentries = 0;
- u32 cap = 0;
- u32 i;
-
- if (!img->pie) {
- reloc->bytes = NULL;
- reloc->size = 0;
- return;
- }
- for (i = 0; i < nrel; ++i) {
- const LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
- const LinkSection* ls;
- u64 site_old_vaddr;
- u32 site_rva;
- u16 type;
- if (!coff_reloc_needs_base_reloc(r->kind)) continue;
- if (r->link_section_id == LINK_SEC_NONE) continue;
- ls = &img->sections[r->link_section_id - 1];
- /* r->write_vaddr is in the pre-relayout coordinate system (same as
- * ls->vaddr), so the offset into the section is stable. Add the
- * containing bucket's final RVA to land at the image RVA. */
- site_old_vaddr = r->write_vaddr;
- u8 sb = map[ls->id - 1].bucket;
- site_rva = out[sb].rva + map[ls->id - 1].new_rva +
- (u32)(site_old_vaddr - ls->vaddr);
- if (r->kind == R_ABS64) {
- type = (u16)IMAGE_REL_BASED_DIR64;
- } else {
- type = (u16)IMAGE_REL_BASED_HIGHLOW;
- }
- if (nentries == cap) {
- (void)VEC_GROW(heap, entries, cap, nentries + 1u);
- }
- entries[nentries].site_rva = site_rva;
- entries[nentries].type = type;
- entries[nentries].pad = 0;
- ++nentries;
- }
- /* Append caller-supplied extras (TLS directory absolute-VA fields,
- * etc.). These are already site-RVAs in the final image. */
- for (i = 0; i < n_extras; ++i) {
- if (nentries == cap) {
- (void)VEC_GROW(heap, entries, cap, nentries + 1u);
- }
- entries[nentries] = extras[i];
- ++nentries;
- }
- if (nentries == 0) {
- reloc->bytes = NULL;
- reloc->size = 0;
- if (entries) heap->free(heap, entries, cap * sizeof(*entries));
- (void)c;
- return;
- }
- /* Sort entries by RVA so we can group runs sharing a 4-KiB page. */
- qsort(entries, nentries, sizeof(*entries), coff_reloc_entry_cmp);
-
- /* Two-pass: first compute the total size (so we can allocate the
- * blob exactly), then emit. */
- u32 blob_size = 0;
- u32 run_start = 0;
- while (run_start < nentries) {
- u32 page = entries[run_start].site_rva & ~0xfffu;
- u32 run_end = run_start;
- while (run_end < nentries &&
- (entries[run_end].site_rva & ~0xfffu) == page) {
- ++run_end;
- }
- u32 n = run_end - run_start;
- u32 block = COFF_BASE_RELOCATION_SIZE + n * 2u;
- block = (u32)ALIGN_UP((u64)block, 4ull);
- blob_size += block;
- run_start = run_end;
- }
- reloc->bytes = (u8*)heap->alloc(heap, blob_size, 4);
- if (!reloc->bytes && blob_size)
- compiler_panic(c, no_loc(), "link_emit_coff: oom on .reloc blob");
- memset(reloc->bytes, 0, blob_size);
- reloc->size = blob_size;
- /* Stash allocation size for free path. */
- reloc->size_raw = blob_size;
-
- u32 cursor = 0;
- run_start = 0;
- while (run_start < nentries) {
- u32 page = entries[run_start].site_rva & ~0xfffu;
- u32 run_end = run_start;
- while (run_end < nentries &&
- (entries[run_end].site_rva & ~0xfffu) == page) {
- ++run_end;
- }
- u32 n = run_end - run_start;
- u32 raw_size = COFF_BASE_RELOCATION_SIZE + n * 2u;
- u32 block = (u32)ALIGN_UP((u64)raw_size, 4ull);
- u8* p = reloc->bytes + cursor;
- wr_u32_le(p, page);
- wr_u32_le(p + 4, block);
- u32 k;
- for (k = 0; k < n; ++k) {
- u16 entry = (u16)(((u16)entries[run_start + k].type << 12) |
- (entries[run_start + k].site_rva & 0xfffu));
- wr_u16_le(p + 8 + k * 2u, entry);
- }
- /* Optional trailing pad: a single IMAGE_REL_BASED_ABSOLUTE (0). */
- if (block > raw_size) {
- wr_u16_le(p + 8 + n * 2u, 0);
- }
- cursor += block;
- run_start = run_end;
- }
- heap->free(heap, entries, cap * sizeof(*entries));
-}
-
-/* Patch each LinkRelocApply against the PE-relayout coordinates and
- * apply. `bucket_bytes[bucket]` gives the writable buffer for that
- * bucket; the per-section delta in map[] turns the old in-section
- * offsets into bucket-local offsets.
- *
- * Imported targets (LinkSymbol.imported == 1) have no vaddr of their
- * own — instead the .idata pass populated `iv->by_sym[id-1]` with the
- * function stub's vaddr (for callable imports) or the IAT slot's
- * vaddr (for data imports). This is the spot where that table is
- * consulted in lieu of the symbol's own zero vaddr. */
-static void coff_apply_all_relocs(LinkImage* img,
- const CoffSection out[COFF_NBUCKETS],
- const CoffSecMap* map,
- const CoffImportVaddr* iv) {
- Compiler* c = img->c;
- u32 i;
- u64 img_base = PE_IMAGE_BASE;
- u32 nrel = LinkRelocs_count(&img->relocs);
- for (i = 0; i < nrel; ++i) {
- LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
- const LinkSymbol* tgt = LinkSyms_at(&img->syms, r->target - 1);
- const LinkSection* sec;
- const LinkSection* tgt_sec;
- u64 S, P;
- u8* P_bytes;
- u8 bucket;
- u32 site_off_in_sec;
- u32 site_bucket_off;
- if (r->link_section_id == LINK_SEC_NONE) continue;
- sec = &img->sections[r->link_section_id - 1];
- bucket = map[sec->id - 1].bucket;
- if (!out[bucket].has_file_bytes || !out[bucket].bytes) {
- /* Shouldn't happen — .bss has no relocations. */
- continue;
- }
- site_off_in_sec = (u32)(r->write_vaddr - sec->vaddr);
- site_bucket_off = map[sec->id - 1].new_rva + site_off_in_sec;
- P_bytes = out[bucket].bytes + site_bucket_off;
- /* P = ImageBase + bucket_rva + map[].new_rva + site_off_in_sec
- * — i.e. the final runtime address of the patch site. */
- P = img_base + (u64)out[bucket].rva +
- (u64)map[sec->id - 1].new_rva + site_off_in_sec;
-
- /* Resolve S: target symbol's new image-relative address. Look up
- * the LinkSection that contains the symbol's original vaddr, then
- * apply that section's delta. */
- if (tgt->imported) {
- /* IAT-routed: stub vaddr (functions) / slot vaddr (data). */
- if (!iv || iv->by_sym[r->target - 1u] == 0)
- compiler_panic(c, no_loc(),
- "link_emit_coff: imported target lacks IAT slot");
- S = iv->by_sym[r->target - 1u];
- } else if (tgt->kind == SK_ABS) {
- S = tgt->vaddr;
- } else if (tgt->defined) {
- tgt_sec = coff_symbol_section(img, tgt);
- if (!tgt_sec) {
- compiler_panic(c, no_loc(),
- "link_emit_coff: symbol vaddr 0x%llx has no "
- "containing section",
- (unsigned long long)tgt->vaddr);
- }
- u8 tb = map[tgt_sec->id - 1].bucket;
- u64 sym_off = tgt->vaddr - tgt_sec->vaddr;
- S = img_base + (u64)out[tb].rva +
- (u64)map[tgt_sec->id - 1].new_rva + sym_off;
- } else {
- /* Undef and not imported — shouldn't survive resolve_undefs. */
- compiler_panic(c, no_loc(),
- "link_emit_coff: unresolved non-imported symbol");
- }
- /* COFF-only section-relative kinds: the SECREL value is the
- * symbol's offset from the start of its containing output section
- * (PE bucket), and SECTION is the 1-based PE section index.
- * link_reloc_apply only sees S and P, so we patch these inline
- * before delegating common kinds. */
- if (r->kind == R_COFF_SECREL || r->kind == R_COFF_SECTION ||
- r->kind == R_COFF_AARCH64_SECREL_LOW12A ||
- r->kind == R_COFF_AARCH64_SECREL_HIGH12A) {
- if (!tgt->defined || tgt->kind == SK_ABS) {
- compiler_panic(c, no_loc(),
- "link_emit_coff: COFF SECREL/SECTION requires a "
- "defined section-bound target symbol");
- }
- u8 tb = map[tgt_sec->id - 1].bucket;
- u64 sym_off_in_bucket =
- (u64)map[tgt_sec->id - 1].new_rva + (tgt->vaddr - tgt_sec->vaddr);
- if (r->kind == R_COFF_SECREL) {
- u64 v = sym_off_in_bucket + (u64)r->addend;
- wr_u32_le(P_bytes, (u32)(v & 0xffffffffu));
- } else if (r->kind == R_COFF_SECTION) {
- /* PE section indices are 1-based; buckets are 0-based, so add 1. */
- wr_u16_le(P_bytes, (u16)((tb + 1u) & 0xffffu));
- } else {
- /* AArch64 SECREL_{LOW,HIGH}12A: patch the imm12 field of an
- * existing ADD-imm12 instruction. LOW12A = bits [11:0] of the
- * SECREL; HIGH12A = bits [23:12]. The instruction's sh bit was
- * already set by the codegen (0 for LOW, 1 for HIGH). */
- u64 v = sym_off_in_bucket + (u64)r->addend;
- u32 imm12 = (r->kind == R_COFF_AARCH64_SECREL_HIGH12A)
- ? (u32)((v >> 12) & 0xfffu)
- : (u32)(v & 0xfffu);
- u32 instr = rd_u32_le(P_bytes);
- instr = (instr & ~(0xfffu << 10)) | (imm12 << 10);
- wr_u32_le(P_bytes, instr);
- }
- continue;
- }
- link_reloc_apply(c, r->kind, P_bytes, S, r->addend, P);
- }
-}
-
-/* ---- header marshalling ----
- *
- * Each helper streams its on-disk shape to the writer field-by-field;
- * we avoid sizeof(struct) on the packed PE wire types since they carry
- * implicit-padding hazards on hosts that disagree with #pragma pack(1)
- * defaults. */
-
-static void coff_write_dos_stub(Writer* w) {
- u8 buf[PE_DOS_HDR_SIZE];
- memset(buf, 0, sizeof(buf));
- /* e_magic ("MZ") + e_lfanew (offset of PE signature). All other
- * legacy fields zero. */
- buf[0] = (u8)(IMAGE_DOS_SIGNATURE & 0xffu);
- buf[1] = (u8)((IMAGE_DOS_SIGNATURE >> 8) & 0xffu);
- wr_u32_le(buf + 0x3c, PE_DOS_E_LFANEW);
- cfree_writer_write(w, buf, sizeof(buf));
-}
-
-static void coff_write_file_header(Writer* w, u16 machine, u16 nsec,
- u16 characteristics) {
- coff_wr_u16(w, machine);
- coff_wr_u16(w, nsec);
- coff_wr_u32(w, 0u); /* TimeDateStamp */
- coff_wr_u32(w, 0u); /* PointerToSymbolTable */
- coff_wr_u32(w, 0u); /* NumberOfSymbols */
- coff_wr_u16(w, (u16)PE_OPT_HDR_SIZE); /* SizeOfOptionalHeader */
- coff_wr_u16(w, characteristics);
-}
-
-/* Per-section meta used by both the data-directory fill and the
- * IMAGE_SECTION_HEADER emit. Compactly captures everything the writer
- * needs to know about the four-or-five output sections. */
-typedef struct CoffOutHdr {
- const char* name;
- u32 vsize;
- u32 rva;
- u32 size_raw;
- u32 file_offset;
- u32 characteristics;
-} CoffOutHdr;
-
-static void coff_write_optional_header(Writer* w, u32 entry_rva,
- const CoffSection out[COFF_NBUCKETS],
- u32 headers_size_padded,
- u32 image_size,
- int pie,
- u16 subsystem,
- const CoffImportTable* it,
- const CoffTlsLayout* tls) {
- /* Standard fields. */
- coff_wr_u16(w, IMAGE_NT_OPTIONAL_HDR64_MAGIC);
- coff_wr_u8(w, PE_LINKER_MAJOR);
- coff_wr_u8(w, PE_LINKER_MINOR);
- /* SizeOfCode / SizeOfInitializedData / SizeOfUninitializedData. */
- u32 size_code = out[COFF_BUCKET_TEXT].in_image
- ? out[COFF_BUCKET_TEXT].size_raw : 0;
- u32 size_init = (out[COFF_BUCKET_RDATA].in_image
- ? out[COFF_BUCKET_RDATA].size_raw : 0) +
- (out[COFF_BUCKET_DATA].in_image
- ? out[COFF_BUCKET_DATA].size_raw : 0);
- u32 size_uninit = out[COFF_BUCKET_BSS].in_image
- ? out[COFF_BUCKET_BSS].size : 0;
- coff_wr_u32(w, size_code);
- coff_wr_u32(w, size_init);
- coff_wr_u32(w, size_uninit);
- coff_wr_u32(w, entry_rva);
- coff_wr_u32(w, out[COFF_BUCKET_TEXT].in_image
- ? out[COFF_BUCKET_TEXT].rva : 0);
- /* Windows-specific fields. */
- coff_wr_u64(w, PE_IMAGE_BASE);
- coff_wr_u32(w, PE_SECTION_ALIGNMENT);
- coff_wr_u32(w, PE_FILE_ALIGNMENT);
- coff_wr_u16(w, PE_OS_MAJOR);
- coff_wr_u16(w, PE_OS_MINOR);
- coff_wr_u16(w, 0u); /* MajorImageVersion */
- coff_wr_u16(w, 0u); /* MinorImageVersion */
- coff_wr_u16(w, PE_SUBSYS_MAJOR);
- coff_wr_u16(w, PE_SUBSYS_MINOR);
- coff_wr_u32(w, 0u); /* Win32VersionValue */
- coff_wr_u32(w, image_size);
- coff_wr_u32(w, headers_size_padded);
- coff_wr_u32(w, 0u); /* CheckSum */
- coff_wr_u16(w, subsystem ? subsystem : IMAGE_SUBSYSTEM_WINDOWS_CUI);
- coff_wr_u16(w, PE_DLL_CHARS);
- coff_wr_u64(w, PE_STACK_RESERVE);
- coff_wr_u64(w, PE_STACK_COMMIT);
- coff_wr_u64(w, PE_HEAP_RESERVE);
- coff_wr_u64(w, PE_HEAP_COMMIT);
- coff_wr_u32(w, 0u); /* LoaderFlags */
- coff_wr_u32(w, (u32)PE_NUM_DATA_DIRS);
- /* DataDirectory[16]. Populated entries:
- * [1] IMPORT — descriptor table RVA + total descriptor bytes
- * [5] BASERELOC — when PIE and .reloc is in the image
- * [12] IAT — first IAT block RVA + sum of per-DLL IAT sizes
- * Everything else stays zero. */
- u32 i;
- int has_idata = it && it->nimports > 0 &&
- out[COFF_BUCKET_IDATA].in_image;
- for (i = 0; i < PE_NUM_DATA_DIRS; ++i) {
- if (i == IMAGE_DIRECTORY_ENTRY_IMPORT && has_idata) {
- coff_wr_u32(w, out[COFF_BUCKET_IDATA].rva + it->desc_off);
- coff_wr_u32(w, it->desc_size);
- } else if (i == IMAGE_DIRECTORY_ENTRY_IAT && has_idata) {
- coff_wr_u32(w, out[COFF_BUCKET_IDATA].rva + it->iat_base);
- coff_wr_u32(w, it->iat_total);
- } else if (i == IMAGE_DIRECTORY_ENTRY_BASERELOC && pie &&
- out[COFF_BUCKET_RELOC].in_image) {
- coff_wr_u32(w, out[COFF_BUCKET_RELOC].rva);
- coff_wr_u32(w, out[COFF_BUCKET_RELOC].size);
- } else if (i == IMAGE_DIRECTORY_ENTRY_TLS && tls && tls->present) {
- coff_wr_u32(w, out[COFF_BUCKET_RDATA].rva + tls->dir_rdata_off);
- coff_wr_u32(w, COFF_TLS_DIRECTORY64_SIZE);
- } else {
- coff_wr_u32(w, 0u);
- coff_wr_u32(w, 0u);
- }
- }
-}
-
-static void coff_write_section_header(Writer* w, const char* name,
- u32 vsize, u32 rva, u32 size_raw,
- u32 file_offset,
- u32 characteristics) {
- u8 nm[8] = {0, 0, 0, 0, 0, 0, 0, 0};
- size_t n = slice_from_cstr(name).len;
- if (n > 8) n = 8;
- memcpy(nm, name, n);
- cfree_writer_write(w, nm, 8);
- coff_wr_u32(w, vsize);
- coff_wr_u32(w, rva);
- coff_wr_u32(w, size_raw);
- coff_wr_u32(w, file_offset);
- coff_wr_u32(w, 0u); /* PointerToRelocations */
- coff_wr_u32(w, 0u); /* PointerToLinenumbers */
- coff_wr_u16(w, 0u); /* NumberOfRelocations */
- coff_wr_u16(w, 0u); /* NumberOfLinenumbers */
- coff_wr_u32(w, characteristics);
-}
-
-/* ---- main entry ---- */
-
-void link_emit_coff(LinkImage* img, Writer* w) {
- Heap* heap = img->heap;
- Compiler* c = img->c;
- u16 machine = coff_machine_or_panic(c);
- if (img->entry_sym == LINK_SYM_NONE)
- compiler_panic(c, no_loc(),
- "link_emit_coff: no resolved entry symbol");
-
- /* ---- pass 1: build buckets + per-section delta map ---- */
- CoffSection out[COFF_NBUCKETS];
- CoffSecMap* map = (CoffSecMap*)heap->alloc(
- heap, sizeof(CoffSecMap) * (img->nsections + 1u),
- _Alignof(CoffSecMap));
- if (!map && img->nsections)
- compiler_panic(c, no_loc(), "link_emit_coff: oom on section map");
- memset(map, 0, sizeof(CoffSecMap) * (img->nsections + 1u));
-
- /* coff_build_buckets stashes per-bucket allocation caps in size_raw;
- * we read them back into a local before size_raw is recomputed by
- * coff_assign_layout so the cleanup path can free with the right
- * size. */
- coff_build_buckets(img, out, map);
- /* coff_build_buckets stashes per-bucket allocation caps in size_raw
- * (the only bucket field we own for the duration of layout); read
- * them out before coff_assign_layout overwrites the field. .reloc
- * and .idata aren't touched by coff_build_buckets — their caps are
- * filled in below once coff_build_reloc_section / coff_emit_idata
- * run. */
- u32 bucket_caps[COFF_NBUCKETS];
- u32 b;
- for (b = 0; b < COFF_NBUCKETS; ++b) bucket_caps[b] = out[b].size_raw;
-
- /* ---- pass 1b: collect imports and reserve .idata + .text stubs ----
- *
- * Builds the per-DLL / per-import layout and appends one IAT-routing
- * stub per imported function to the .text bucket. The .idata bucket
- * size is set here (so it counts in nsec); the stub vaddrs and
- * IAT-slot vaddrs are finalised after coff_assign_layout. */
- CoffImportTable imports;
- int have_imports = coff_collect_imports(img, &imports);
- if (have_imports) {
- coff_plan_idata_layout(img, &imports);
- coff_append_stubs(img, &imports, &out[COFF_BUCKET_TEXT],
- &bucket_caps[COFF_BUCKET_TEXT]);
- /* Reserve the .idata bucket size so coff_assign_layout / nsec
- * accounting sees it. Actual bytes are written by coff_emit_idata
- * once the bucket RVA is known. */
- out[COFF_BUCKET_IDATA].size = imports.idata_size;
- }
-
- /* ---- pass 1c: plan the TLS directory record ----
- *
- * If any SF_TLS sections survived, reserve 40 bytes at the tail of
- * .rdata for the IMAGE_TLS_DIRECTORY64. Bytes are zeroed now and
- * filled in by coff_emit_tls_dir once the bucket RVAs are final. */
- CoffTlsLayout tls;
- coff_plan_tls_layout(img, out, &bucket_caps[COFF_BUCKET_RDATA], &tls);
-
- /* ---- pass 2: decide whether .reloc will be in the image ----
- *
- * The headers' file size (and therefore every section's file
- * offset) depends on the section-table entry count, so we need to
- * commit to "is .reloc emitted?" before laying out file offsets.
- * .reloc lights up iff PIE and at least one absolute reloc points
- * into a kept section, OR a TLS directory is emitted (its four u64
- * VA fields all need base-relocs). */
- int emit_reloc = 0;
- if (img->pie) {
- u32 i;
- u32 nrel = LinkRelocs_count(&img->relocs);
- for (i = 0; i < nrel; ++i) {
- const LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
- if (!coff_reloc_needs_base_reloc(r->kind)) continue;
- if (r->link_section_id == LINK_SEC_NONE) continue;
- emit_reloc = 1;
- break;
- }
- if (!emit_reloc && tls.present) emit_reloc = 1;
- }
-
- u32 nsec = 0;
- for (b = 0; b < COFF_NBUCKETS; ++b) {
- if (b == COFF_BUCKET_RELOC) {
- if (emit_reloc) ++nsec; /* tentative; size set below */
- continue;
- }
- if (out[b].size) ++nsec;
- }
- u32 headers_size_unpadded =
- PE_DOS_HDR_SIZE + PE_SIG_SIZE + PE_FILE_HDR_SIZE + PE_OPT_HDR_SIZE +
- nsec * PE_SECTION_HDR_SIZE;
- u32 headers_size_padded =
- (u32)ALIGN_UP((u64)headers_size_unpadded, (u64)PE_FILE_ALIGNMENT);
-
- /* First layout pass: fixes RVAs / file offsets for buckets that
- * already have a finalised size (.text, .rdata, .idata, .data, .bss).
- * .reloc's RVA is provisional — it depends on .reloc's own size,
- * which is still 0 at this point. */
- (void)coff_assign_layout(out, headers_size_padded, PE_FIRST_SECTION_RVA);
-
- /* ---- pass 2b: emit .idata bytes + per-arch IAT stubs ----
- *
- * The .idata bucket's RVA is final after the first assign_layout;
- * stubs need it (the indirect-jump displacement targets an IAT slot)
- * and .idata's own descriptor / ILT / IAT records all carry RVAs.
- * coff_import_vaddr_build builds the per-LinkSymId override table
- * that apply_all_relocs consults in place of the (zero) symbol
- * vaddr for imported targets. */
- CoffImportVaddr import_vaddr;
- memset(&import_vaddr, 0, sizeof(import_vaddr));
- if (have_imports) {
- coff_emit_idata(img, &imports, out, &bucket_caps[COFF_BUCKET_IDATA]);
- coff_emit_stubs(img, &imports, out);
- coff_import_vaddr_build(img, &imports, out, &import_vaddr);
- }
-
- /* Write the TLS directory bytes now that bucket RVAs are final. */
- coff_emit_tls_dir(img, out, map, &tls);
-
- /* ---- pass 3: build .reloc using the now-final bucket RVAs ----
- *
- * coff_build_reloc_section reads out[bucket].rva indirectly via
- * map[].new_rva + (write_vaddr - sec->vaddr) → site offset within
- * the bucket; the absolute site_rva is bucket.rva + that offset.
- * Patch site RVAs are page-quantised in the emitted blob, so this
- * is the spot where the bucket RVAs need to be already final.
- *
- * TLS directory's four absolute-VA fields ride into the entries via
- * the `extras` array — they aren't ordinary symbol relocations, so
- * they don't show up in img->relocs. */
- if (emit_reloc) {
- CoffRelocEntry tls_extras[4];
- u32 n_tls_extras = 0;
- if (tls.present) {
- u32 dir_rva = out[COFF_BUCKET_RDATA].rva + tls.dir_rdata_off;
- static const u32 field_offs[4] = {
- COFF_TLSDIR_OFF_START_ADDR, COFF_TLSDIR_OFF_END_ADDR,
- COFF_TLSDIR_OFF_INDEX_ADDR, COFF_TLSDIR_OFF_CALLBACKS,
- };
- u32 k;
- for (k = 0; k < 4; ++k) {
- if (field_offs[k] == COFF_TLSDIR_OFF_CALLBACKS &&
- !tls.callbacks_sym)
- continue;
- tls_extras[n_tls_extras].site_rva = dir_rva + field_offs[k];
- tls_extras[n_tls_extras].type = (u16)IMAGE_REL_BASED_DIR64;
- tls_extras[n_tls_extras].pad = 0;
- ++n_tls_extras;
- }
- }
- coff_build_reloc_section(img, out, map, &out[COFF_BUCKET_RELOC],
- tls_extras, n_tls_extras);
- bucket_caps[COFF_BUCKET_RELOC] = out[COFF_BUCKET_RELOC].size_raw;
- /* size_raw was stashed by build; assign_layout below recomputes it
- * as the FileAlignment-padded length. */
- (void)coff_assign_layout(out, headers_size_padded,
- PE_FIRST_SECTION_RVA);
- }
-
- /* `_tls_used` is the public mingw/PE name for the TLS directory
- * record. Keep it in lockstep with the optional-header TLS data
- * directory, rather than leaving references bound to mingw's tlssup.o
- * placeholder record. */
- coff_define_tls_used(img, out, &tls);
-
- /* ---- pass 4: resolve entry symbol's PE RVA ----
- *
- * Done before apply so the optional-header field has its final
- * value. */
- const LinkSymbol* entry_sym =
- LinkSyms_at(&img->syms, img->entry_sym - 1);
- if (!entry_sym->defined || entry_sym->kind == SK_ABS)
- compiler_panic(c, no_loc(),
- "link_emit_coff: entry symbol is not a defined "
- "image-relative function");
- const LinkSection* entry_sec = coff_section_at(img, entry_sym->vaddr);
- if (!entry_sec)
- compiler_panic(c, no_loc(),
- "link_emit_coff: entry symbol has no containing "
- "section");
- u8 entry_bucket = map[entry_sec->id - 1].bucket;
- u32 entry_rva = out[entry_bucket].rva +
- map[entry_sec->id - 1].new_rva +
- (u32)(entry_sym->vaddr - entry_sec->vaddr);
-
- /* ---- pass 5: apply all relocations into bucket bytes ---- */
- coff_apply_all_relocs(img, out, map,
- have_imports ? &import_vaddr : NULL);
-
- /* ---- pass 6: compute SizeOfImage (in-memory size) ---- */
- u32 image_size = 0;
- for (b = 0; b < COFF_NBUCKETS; ++b) {
- if (!out[b].in_image) continue;
- u32 end = out[b].rva + out[b].size;
- if (end > image_size) image_size = end;
- }
- image_size = (u32)ALIGN_UP((u64)image_size, (u64)PE_SECTION_ALIGNMENT);
-
- /* ---- pass 7: write everything ---- */
- u16 file_chars = IMAGE_FILE_EXECUTABLE_IMAGE |
- IMAGE_FILE_LARGE_ADDRESS_AWARE;
- if (!img->pie || !out[COFF_BUCKET_RELOC].in_image) {
- file_chars |= IMAGE_FILE_RELOCS_STRIPPED;
- }
-
- coff_write_dos_stub(w);
- /* PE signature. */
- coff_wr_u32(w, IMAGE_NT_SIGNATURE);
- coff_write_file_header(w, machine, (u16)nsec, file_chars);
- u16 subsystem = img->linker ? img->linker->pe_subsystem : 0;
- coff_write_optional_header(w, entry_rva, out, headers_size_padded,
- image_size, img->pie, subsystem,
- have_imports ? &imports : NULL, &tls);
-
- /* Section table. */
- for (b = 0; b < COFF_NBUCKETS; ++b) {
- if (!out[b].in_image) continue;
- coff_write_section_header(w, out[b].name, out[b].size, out[b].rva,
- out[b].size_raw, out[b].file_offset,
- out[b].characteristics);
- }
-
- /* Pad to first section's file offset. */
- u64 cur = (u64)headers_size_unpadded;
- u64 first_file_off = headers_size_padded;
- if (cur < first_file_off) {
- coff_write_zeroes(w, first_file_off - cur);
- cur = first_file_off;
- }
-
- /* Section bodies. */
- for (b = 0; b < COFF_NBUCKETS; ++b) {
- if (!out[b].in_image) continue;
- if (!out[b].has_file_bytes) continue;
- if (cur < out[b].file_offset) {
- coff_write_zeroes(w, out[b].file_offset - cur);
- cur = out[b].file_offset;
- }
- cfree_writer_write(w, out[b].bytes, out[b].size);
- cur += out[b].size;
- if (out[b].size_raw > out[b].size) {
- coff_write_zeroes(w, out[b].size_raw - out[b].size);
- cur += out[b].size_raw - out[b].size;
- }
- }
-
- /* ---- cleanup ---- */
- for (b = 0; b < COFF_NBUCKETS; ++b) {
- if (out[b].bytes) heap->free(heap, out[b].bytes, bucket_caps[b]);
- }
- heap->free(heap, map, sizeof(CoffSecMap) * (img->nsections + 1u));
- if (have_imports) {
- coff_import_vaddr_free(img, &import_vaddr);
- coff_imports_free(img, &imports);
- }
-}
diff --git a/src/link/link_dyn.c b/src/link/link_dyn.c
@@ -1,982 +0,0 @@
-/* Phase 4 of dynamic linking: synthesize the dyn-link tables and
- * sections an ET_DYN ELF exe needs to be loadable by a real runtime
- * loader (musl ld-musl-aarch64.so.1).
- *
- * Inputs (computed by earlier passes):
- * - LinkSymbol entries with `imported = 1` (set by resolve_undefs's
- * DSO-search path; their dso_input_id names the providing DSO).
- * - LinkInputs of kind LINK_INPUT_DSO_BYTES carrying SONAMEs.
- *
- * Outputs (deposited on LinkImage.dyn):
- * - .interp PT_INTERP target string
- * - .dynsym + .dynstr symbol table + name pool
- * - .gnu.hash GNU-style hash for the loader
- * - .rela.dyn GLOB_DAT (data imports) + space for
- * R_AARCH64_RELATIVE records that
- * Phase 6 emit fills in
- * - .rela.plt JUMP_SLOT records (one per imported func)
- * - .plt allocated, body NOT emitted (Phase 5)
- * - .got.plt 3 reserved slots + 1 per PLT slot,
- * allocated, body NOT emitted
- * - .dynamic PT_DYNAMIC body, populated
- *
- * The .plt body / GOT-slot fill / CALL26 reloc rewriting are Phase 5;
- * they're called out at the relevant allocation site so the missing
- * pieces are obvious to anyone reading the output. The static-exe path
- * is unaffected — layout_dyn early-outs when emit_pie is 0.
- *
- * Allocator pattern follows layout_iplt (link_layout.c): grow segments
- * + sections via realloc, then page-align each new segment after the
- * existing image span. Synthetic sections carry input_id == LINK_INPUT_NONE
- * so downstream passes (emit_reloc_records, GC) leave them alone.
- */
-
-#include <string.h>
-
-#include "core/bytes.h"
-#include "core/heap.h"
-#include "core/pool.h"
-#include "core/slice.h"
-#include "core/util.h"
-#include "core/vec.h"
-#include "link/link.h"
-#include "link/link_arch.h"
-#include "link/link_internal.h"
-#include "obj/elf.h"
-
-static SrcLoc no_loc(void) {
- SrcLoc l = {0, 0, 0};
- return l;
-}
-
-/* ---- small allocators (mirror layout_iplt's helpers) ---- */
-
-static u32 dyn_alloc_segments(LinkImage* img, u32 nseg) {
- Heap* h = img->heap;
- u32 base = img->nsegments;
- u32 new_nseg = base + nseg;
- LinkSegment* nsegs = (LinkSegment*)h->realloc(
- h, img->segments, sizeof(*img->segments) * img->nsegments,
- sizeof(*img->segments) * new_nseg, _Alignof(LinkSegment));
- u8** nsbufs = (u8**)h->realloc(
- h, img->segment_bytes, sizeof(*img->segment_bytes) * img->nsegments,
- sizeof(*img->segment_bytes) * new_nseg, _Alignof(u8*));
- size_t* nscaps = (size_t*)h->realloc(
- h, img->segment_bytes_cap,
- sizeof(*img->segment_bytes_cap) * img->nsegments,
- sizeof(*img->segment_bytes_cap) * new_nseg, _Alignof(size_t));
- if (!nsegs || !nsbufs || !nscaps)
- compiler_panic(img->c, no_loc(), "link: oom on dyn segments");
- img->segments = nsegs;
- img->segment_bytes = nsbufs;
- img->segment_bytes_cap = nscaps;
- return base;
-}
-
-static u32 dyn_alloc_sections(LinkImage* img, u32 nsec) {
- Heap* h = img->heap;
- u32 base = img->nsections;
- u32 new_nsec = base + nsec;
- LinkSection* nsections = (LinkSection*)h->realloc(
- h, img->sections, sizeof(*img->sections) * img->nsections,
- sizeof(*img->sections) * new_nsec, _Alignof(LinkSection));
- if (!nsections) compiler_panic(img->c, no_loc(), "link: oom on dyn sections");
- img->sections = nsections;
- return base;
-}
-
-/* ---- byte-builder for .dynstr / .gnu.hash ---- */
-
-typedef struct ByteBuf {
- Heap* heap;
- u8* data;
- u32 len;
- u32 cap;
-} ByteBuf;
-
-static void bb_init(ByteBuf* b, Heap* h) {
- b->heap = h;
- b->data = NULL;
- b->len = 0;
- b->cap = 0;
-}
-static void bb_reserve(ByteBuf* b, u32 need) {
- if (need <= b->cap) return;
- (void)VEC_GROW(b->heap, b->data, b->cap, need);
-}
-static u32 bb_append(ByteBuf* b, const void* src, u32 n) {
- u32 off = b->len;
- bb_reserve(b, b->len + n);
- if (n) memcpy(b->data + b->len, src, n);
- b->len += n;
- return off;
-}
-static u32 bb_append_str(ByteBuf* b, const char* s, u32 n) {
- /* Linear dedup over what we've appended so far. Strtabs are small. */
- if (n == 0) return 0;
- if (b->len > n) {
- u32 i;
- for (i = 0; i + n < b->len; ++i) {
- if (b->data[i + n] == 0 && memcmp(b->data + i, s, n) == 0) return i;
- }
- }
- u32 off = b->len;
- bb_reserve(b, b->len + n + 1u);
- memcpy(b->data + b->len, s, n);
- b->data[b->len + n] = 0;
- b->len += n + 1u;
- return off;
-}
-
-/* ---- GNU-hash computation (psABI v1 hash) ----
- * Body layout:
- * u32 nbuckets
- * u32 symoffset (first hashed dynsym index)
- * u32 bloom_size (in 64-bit words)
- * u32 bloom_shift
- * u64 bloom[bloom_size]
- * u32 buckets[nbuckets]
- * u32 chains[ndynsym - symoffset]
- *
- * For Phase 4 we keep this very small: nbuckets = max(1, n/2),
- * bloom_size = 1, bloom_shift = 6 (64-bit ELFCLASS64). All hashed
- * symbols (sym_offset..ndynsym-1) participate in bloom + buckets +
- * chains. Slot 0..symoffset-1 are STN_UNDEF + locals, which the
- * loader doesn't hash. */
-
-static u32 gnu_hash_name(const char* s, u32 n) {
- /* h = 5381; for c in s: h = h * 33 + c */
- u32 h = 5381u;
- u32 i;
- for (i = 0; i < n; ++i) h = (h * 33u) + (u8)s[i];
- return h;
-}
-
-/* ---- partition: enumerate imports ----
- *
- * Walks LinkSyms and collects each `imported` symbol that's the
- * canonical entry in img->globals (resolve_undefs may stamp `imported`
- * onto multiple shadow slots of the same name; only the canonical one
- * lands in dynsym). The two output arrays are LinkSymIds: funcs first
- * (PLT-bound), then data (GOT-bound via GLOB_DAT). */
-
-typedef struct ImportLists {
- LinkSymId* funcs;
- u32 nfuncs;
- LinkSymId* datas;
- u32 ndatas;
-} ImportLists;
-
-static int sym_is_func_import(const LinkSymbol* s) {
- /* Most undef shadows have kind = SK_UNDEF (the obj reader keys kind
- * off shndx, not STT_*). Only useful when the canonical entry
- * carried a real type — fall through to the DSO lookup otherwise. */
- return s->kind == SK_FUNC || s->kind == SK_IFUNC;
-}
-
-/* Resolve an import's classifier kind by consulting its providing
- * DSO's dynsym. read_elf_dso preserves STT_FUNC / STT_OBJECT / etc.
- * on each defined export; the consumer's undef may have arrived as
- * SK_UNDEF (clang emits external refs as SHN_UNDEF, which the reader
- * collapses to SK_UNDEF regardless of STT_*). Returns 1 for func /
- * ifunc, 0 for everything else (or if the DSO export is missing). */
-static int dso_export_is_func(Linker* l, const LinkSymbol* s) {
- if (s->dso_input_id == LINK_INPUT_NONE) return 0;
- if (s->dso_input_id - 1u >= LinkInputs_count(&l->inputs)) return 0;
- LinkInput* in = LinkInputs_at(&l->inputs, s->dso_input_id - 1u);
- if (!in->obj) return 0;
- ObjSymIter* it = obj_symiter_new(in->obj);
- ObjSymEntry e;
- int is_func = 0;
- while (obj_symiter_next(it, &e)) {
- const ObjSym* es = e.sym;
- if (!es || es->name != s->name) continue;
- if (es->kind == SK_UNDEF) continue;
- is_func = (es->kind == SK_FUNC || es->kind == SK_IFUNC);
- break;
- }
- obj_symiter_free(it);
- return is_func;
-}
-
-static void collect_imports(Linker* l, LinkImage* img, Heap* h,
- ImportLists* il) {
- u32 i;
- u32 cap_f = 0, cap_d = 0;
- il->funcs = NULL;
- il->datas = NULL;
- il->nfuncs = il->ndatas = 0;
- for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
- LinkSymbol* s = LinkSyms_at(&img->syms, i);
- if (!s->imported) continue;
- if (s->name == 0) continue;
- /* Only the canonical (img->globals) entry per name. */
- LinkSymId canonical = symhash_get(&img->globals, s->name);
- if (canonical != LINK_SYM_NONE && canonical != s->id) continue;
- int is_func = sym_is_func_import(s) || dso_export_is_func(l, s);
- if (is_func) {
- if (VEC_GROW(h, il->funcs, cap_f, il->nfuncs + 1u))
- compiler_panic(img->c, no_loc(), "link: oom on import-funcs");
- il->funcs[il->nfuncs++] = s->id;
- } else {
- if (VEC_GROW(h, il->datas, cap_d, il->ndatas + 1u))
- compiler_panic(img->c, no_loc(), "link: oom on import-datas");
- il->datas[il->ndatas++] = s->id;
- }
- }
-}
-
-static void free_imports(Heap* h, ImportLists* il) {
- if (il->funcs) h->free(h, il->funcs, sizeof(*il->funcs) * il->nfuncs);
- if (il->datas) h->free(h, il->datas, sizeof(*il->datas) * il->ndatas);
-}
-
-/* ---- DT_NEEDED set: each DSO input that contributed at least one
- * import. Order is input order so the loader sees deps in declaration
- * order. */
-static void collect_needed(Linker* l, LinkImage* img, LinkDynState* dyn) {
- Heap* h = img->heap;
- u8* used;
- u32 ninputs = LinkInputs_count(&l->inputs);
- u32 i, nused = 0;
-
- used = (u8*)h->alloc(h, ninputs ? ninputs : 1u, 1);
- if (!used) compiler_panic(img->c, no_loc(), "link: oom on needed map");
- memset(used, 0, ninputs ? ninputs : 1u);
-
- /* Mark every DSO that ended up satisfying at least one import. */
- for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
- LinkSymbol* s = LinkSyms_at(&img->syms, i);
- if (!s->imported) continue;
- if (s->dso_input_id == LINK_INPUT_NONE) continue;
- if (s->dso_input_id - 1u >= ninputs) continue;
- used[s->dso_input_id - 1u] = 1;
- }
- /* Always pull every explicitly-supplied DSO into DT_NEEDED, even if
- * no import landed on it — matches GNU ld without --as-needed.
- * Phase 4 doesn't plumb --as-needed through to the resolver, so the
- * default "needed" behavior is the right baseline. */
- for (i = 0; i < ninputs; ++i) {
- LinkInput* in = LinkInputs_at(&l->inputs, i);
- if (in->kind == LINK_INPUT_DSO_BYTES && in->soname != 0) used[i] = 1;
- }
- for (i = 0; i < ninputs; ++i)
- if (used[i]) ++nused;
-
- dyn->needed =
- nused ? (Sym*)h->alloc(h, sizeof(Sym) * nused, _Alignof(Sym)) : NULL;
- if (nused && !dyn->needed)
- compiler_panic(img->c, no_loc(), "link: oom on needed list");
- dyn->nneeded = 0;
- for (i = 0; i < ninputs; ++i) {
- LinkInput* in = LinkInputs_at(&l->inputs, i);
- if (!used[i]) continue;
- if (in->soname == 0) continue;
- dyn->needed[dyn->nneeded++] = in->soname;
- }
- h->free(h, used, ninputs ? ninputs : 1u);
-}
-
-/* ---- dynsym + dynstr build ----
- *
- * Slot 0: STN_UNDEF (zero entry). The loader ignores names with index
- * 0; we still emit a dynstr entry at offset 0 (the leading NUL).
- *
- * Slots 1..nimports: imported symbols (functions first, then data).
- * st_shndx = SHN_UNDEF; the loader fills in the value at bind time.
- * st_value/size are zero — the static linker has no value for an
- * imported symbol.
- *
- * No `--export-dynamic` plumbing in Phase 4: only imports + the null
- * slot land in .dynsym. Adding exports is mechanical (walk
- * img->globals, append entries with st_shndx = matching .text/.data
- * section index) but isn't on the test/musl path. */
-
-static void build_dynsym(LinkImage* img, LinkDynState* dyn,
- const ImportLists* il, ByteBuf* dynstr) {
- Heap* h = img->heap;
- u32 nimports = il->nfuncs + il->ndatas;
- u32 ndynsym = 1u + nimports; /* +1 for null slot */
- u32 i;
-
- dyn->ndynsym = ndynsym;
- dyn->dynsym = (DynSymRec*)h->alloc(h, sizeof(*dyn->dynsym) * ndynsym,
- _Alignof(DynSymRec));
- if (!dyn->dynsym) compiler_panic(img->c, no_loc(), "link: oom on dynsym");
- memset(dyn->dynsym, 0, sizeof(*dyn->dynsym) * ndynsym);
-
- /* Slot 0: STN_UNDEF. dynstr leads with a NUL so st_name=0 reads as
- * the empty string. */
- {
- u8 z = 0;
- bb_append(dynstr, &z, 1);
- }
-
- /* Per-symbol: dedupe `sym_dynidx` lookup table. Sized to LinkSymId
- * upper bound. Clean (zero-filled) by alloc convention; we set
- * indices for imports below. */
- dyn->sym_dynidx_size = LinkSyms_count(&img->syms) + 1u;
- dyn->sym_dynidx = (u32*)h->alloc(
- h, sizeof(*dyn->sym_dynidx) * dyn->sym_dynidx_size, _Alignof(u32));
- if (!dyn->sym_dynidx)
- compiler_panic(img->c, no_loc(), "link: oom on sym_dynidx");
- memset(dyn->sym_dynidx, 0, sizeof(*dyn->sym_dynidx) * dyn->sym_dynidx_size);
- /* sym_plt_vaddr is populated alongside the PLT body emit below; here
- * we only allocate the parallel array. */
- dyn->sym_plt_vaddr = (u64*)h->alloc(
- h, sizeof(*dyn->sym_plt_vaddr) * dyn->sym_dynidx_size, _Alignof(u64));
- if (!dyn->sym_plt_vaddr)
- compiler_panic(img->c, no_loc(), "link: oom on sym_plt_vaddr");
- memset(dyn->sym_plt_vaddr, 0,
- sizeof(*dyn->sym_plt_vaddr) * dyn->sym_dynidx_size);
-
- /* All imports have STB_GLOBAL so first_global is right after the
- * single STN_UNDEF slot. (When local exports land via
- * --export-dynamic, this needs to grow.) */
- dyn->first_global = 1u;
-
- u32 idx = 1u;
- for (i = 0; i < il->nfuncs; ++i) {
- LinkSymId lsid = il->funcs[i];
- LinkSymbol* s = LinkSyms_at(&img->syms, lsid - 1);
- DynSymRec* r = &dyn->dynsym[idx];
- Slice nm_s = pool_slice(img->c->global, s->name);
- const char* nm = nm_s.s;
- size_t namelen = nm_s.len;
- r->st_name = bb_append_str(dynstr, nm, (u32)namelen);
- r->st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
- r->st_other = STV_DEFAULT;
- r->st_shndx = SHN_UNDEF;
- r->st_value = 0;
- r->st_size = 0;
- dyn->sym_dynidx[lsid] = idx;
- ++idx;
- }
- for (i = 0; i < il->ndatas; ++i) {
- LinkSymId lsid = il->datas[i];
- LinkSymbol* s = LinkSyms_at(&img->syms, lsid - 1);
- DynSymRec* r = &dyn->dynsym[idx];
- Slice nm_s = pool_slice(img->c->global, s->name);
- const char* nm = nm_s.s;
- size_t namelen = nm_s.len;
- u8 elf_type = STT_OBJECT;
- if (s->kind == SK_TLS)
- elf_type = STT_TLS;
- else if (s->kind == SK_NOTYPE)
- elf_type = STT_NOTYPE;
- r->st_name = bb_append_str(dynstr, nm, (u32)namelen);
- r->st_info = ELF64_ST_INFO(STB_GLOBAL, elf_type);
- r->st_other = STV_DEFAULT;
- r->st_shndx = SHN_UNDEF;
- r->st_value = 0;
- r->st_size = 0;
- dyn->sym_dynidx[lsid] = idx;
- ++idx;
- }
-}
-
-/* ---- .gnu.hash builder ----
- *
- * Hashed range is [first_global, ndynsym) — slot 0 (STN_UNDEF) is
- * unhashed. Layout matches loader expectations (musl, glibc, FreeBSD).
- *
- * Bucket count: max(1, hashed_count / 2), rounded up to odd so the
- * mod operation distributes more uniformly. Bloom is 1 word for
- * Phase 4 — a real implementation would scale with hashed_count, but
- * 1 word with shift=6 still satisfies the loader's correctness check
- * (any bit set is "maybe present"; false-positives only cost a chain
- * scan). */
-
-static void build_gnu_hash(Heap* h, LinkImage* img, LinkDynState* dyn,
- const ByteBuf* dynstr) {
- u32 hashed = (dyn->ndynsym > dyn->first_global)
- ? (dyn->ndynsym - dyn->first_global)
- : 0u;
- u32 nbuckets = hashed ? hashed : 1u;
- /* Round nbuckets up to next odd number. */
- if ((nbuckets & 1u) == 0u) nbuckets += 1u;
- u32 bloom_size = 1u; /* 64-bit word */
- u32 bloom_shift = 6u;
- u32 sym_offset = dyn->first_global;
- u32 hdr_bytes = 16u; /* nbuckets/symoff/bloomsz/bloomshift */
- u32 bloom_bytes = bloom_size * 8u;
- u32 buckets_bytes = nbuckets * 4u;
- u32 chains_bytes = hashed * 4u;
- u32 total = hdr_bytes + bloom_bytes + buckets_bytes + chains_bytes;
-
- u8* buf = (u8*)h->alloc(h, total ? total : 1u, 4);
- if (!buf) compiler_panic(img->c, no_loc(), "link: oom on .gnu.hash");
- memset(buf, 0, total);
-
- wr_u32_le(buf + 0, nbuckets);
- wr_u32_le(buf + 4, sym_offset);
- wr_u32_le(buf + 8, bloom_size);
- wr_u32_le(buf + 12, bloom_shift);
-
- /* Bloom + buckets + chains. We need each hashed symbol's hash. */
- if (hashed) {
- u32 i;
- u32* hashes = (u32*)h->alloc(h, sizeof(u32) * hashed, _Alignof(u32));
- if (!hashes)
- compiler_panic(img->c, no_loc(), "link: oom on .gnu.hash hashes");
- for (i = 0; i < hashed; ++i) {
- const DynSymRec* r = &dyn->dynsym[sym_offset + i];
- const char* name = (const char*)dynstr->data + r->st_name;
- size_t n = name ? slice_from_cstr(name).len : 0;
- hashes[i] = gnu_hash_name(name, (u32)n);
- }
-
- /* Bloom filter: H[i] / H[i] >> shift */
- u64 bloom = 0;
- for (i = 0; i < hashed; ++i) {
- u32 h1 = hashes[i] % 64u;
- u32 h2 = (hashes[i] >> bloom_shift) % 64u;
- bloom |= ((u64)1 << h1) | ((u64)1 << h2);
- }
- wr_u64_le(buf + hdr_bytes, bloom);
-
- /* Buckets/chains: for each hashed sym, append to its bucket's
- * chain. The chain encodes (hash & ~1) per entry; the LSB is set
- * on the LAST entry in a bucket to terminate. Buckets are filled
- * with the first chain index that hashes there (1-based into the
- * dynsym, i.e. `sym_offset + i`). */
- u32* buckets = (u32*)(buf + hdr_bytes + bloom_bytes);
- u32* chains = (u32*)(buf + hdr_bytes + bloom_bytes + buckets_bytes);
- /* First pass: bucket = first sym index that hashes there. */
- for (i = 0; i < hashed; ++i) {
- u32 b = hashes[i] % nbuckets;
- if (buckets[b] == 0) buckets[b] = sym_offset + i;
- }
- /* Second pass: chain[i] = hash with LSB cleared; LSB set if next
- * sym is in a different bucket. Walk symbols in order; LSB on
- * chain[i] when sym i+1 is in a different bucket OR is the end. */
- for (i = 0; i < hashed; ++i) {
- u32 v = hashes[i] & ~1u;
- int last = (i + 1 == hashed) ||
- ((hashes[i + 1] % nbuckets) != (hashes[i] % nbuckets));
- if (last) v |= 1u;
- chains[i] = v;
- }
- /* Fix bucket→first-sym indices: if multiple syms share a bucket
- * but were inserted out of contiguous order, we need them
- * contiguous. We assumed contiguity above without enforcing it.
- * For Phase 4 with small hashed sets this is fine, but flag the
- * shortcut. */
- h->free(h, hashes, sizeof(u32) * hashed);
- }
-
- dyn->gnu_hash = buf;
- dyn->gnu_hash_len = total;
-}
-
-/* ---- .dynamic body builder ----
- *
- * Computed at layout time so the size is known before segments are
- * placed. Each entry is two u64s (d_tag, d_un.d_val|d_un.d_ptr).
- * Final entry is DT_NULL. The d_ptr fields that point at other
- * synthetic sections are filled with image-relative vaddrs; the emit
- * pass adds load-base / IMAGE_BASE only when ET_EXEC. */
-
-typedef struct DynEntry {
- u64 tag;
- u64 val; /* either d_val or d_ptr; emit just writes 8 bytes */
-} DynEntry;
-
-static u32 count_dynamic_entries(const LinkDynState* dyn) {
- /* Required: DT_STRTAB DT_STRSZ DT_SYMTAB DT_SYMENT DT_GNU_HASH
- * DT_FLAGS_1 (DF_1_NOW for eager binding)
- * DT_NULL terminator
- * Optional (only when there are .rela.dyn records):
- * DT_RELA DT_RELASZ DT_RELAENT
- * Optional (only when there are imported functions / a PLT):
- * DT_PLTGOT DT_PLTRELSZ DT_PLTREL DT_JMPREL
- * Plus DT_NEEDED per dependency. */
- u32 n = dyn->nneeded;
- n += 6; /* 5 fixed + DT_NULL */
- if (dyn->cap_rela_dyn) n += 3; /* DT_RELA + DT_RELASZ + DT_RELAENT */
- if (dyn->nrela_plt) n += 4; /* PLT-only entries */
- return n;
-}
-
-/* ---- main entry ---- */
-
-void layout_dyn(Linker* l, LinkImage* img) {
- Heap* h = img->heap;
- LinkDynState* dyn;
- ImportLists imports;
- ByteBuf dynstr;
- u64 page;
- const LinkArchDesc* arch;
-
- if (!l->emit_pie) return;
-
- arch = link_arch_desc_for(l->c);
- if (!arch)
- compiler_panic(img->c, no_loc(), "link: layout_dyn: no arch descriptor");
-
- dyn = (LinkDynState*)h->alloc(h, sizeof(*dyn), _Alignof(LinkDynState));
- if (!dyn) compiler_panic(img->c, no_loc(), "link: oom on dyn state");
- memset(dyn, 0, sizeof(*dyn));
- img->dyn = dyn;
- img->pie = 1;
-
- /* PT_INTERP path. Default to the canonical musl loader matching the
- * target arch (per-arch table in src/arch/<arch>/link.c) when the caller
- * didn't set one. Drivers like cfree-cc always override via
- * link_set_interp_path; this default is correctness for direct
- * libcfree consumers. glibc users have to set their interp
- * explicitly — we don't pick a default for them. */
- dyn->interp_path =
- l->interp_path
- ? l->interp_path
- : pool_intern_slice(l->c->global, slice_from_cstr(arch->default_musl_interp));
-
- /* Step 1: enumerate imports + DT_NEEDED. */
- collect_imports(l, img, h, &imports);
- collect_needed(l, img, dyn);
-
- /* Step 2: build .dynstr + .dynsym. .dynstr must also carry the
- * DT_NEEDED soname strings the .dynamic body references; intern
- * them after the import names so build_dynsym's de-dup also covers
- * any name that happens to collide with a soname. */
- bb_init(&dynstr, h);
- build_dynsym(img, dyn, &imports, &dynstr);
- {
- u32 ni;
- for (ni = 0; ni < dyn->nneeded; ++ni) {
- Slice s_s = pool_slice(l->c->global, dyn->needed[ni]);
- const char* s = s_s.s;
- size_t slen = s_s.len;
- if (s && slen) (void)bb_append_str(&dynstr, s, (u32)slen);
- }
- }
- dyn->dynstr = dynstr.data;
- dyn->dynstr_len = dynstr.len;
-
- /* Step 3: .gnu.hash. */
- build_gnu_hash(h, img, dyn, &dynstr);
-
- /* Step 4: pre-size all the synthetic sections.
- * .interp: strlen + 1
- * .dynsym: 24 * ndynsym
- * .dynstr: dynstr_len
- * .gnu.hash: gnu_hash_len
- * .rela.dyn: 24 * (ndatas + cap_relative) — we reserve 4096 entries
- * for RELATIVE; emit fills them. (Quick-and-dirty: the
- * static path never has so many internal absolute relocs.)
- * .rela.plt: 24 * nfuncs
- * .plt: 32 + 16 * nfuncs (PLT0 + per-slot)
- * .got.plt: 8 * (3 + nfuncs)
- * .dynamic: 16 * count_dynamic_entries
- */
- dyn->nplt = imports.nfuncs;
- dyn->nrela_plt = imports.nfuncs;
- dyn->rela_plt = imports.nfuncs
- ? (DynRela*)h->alloc(h, sizeof(DynRela) * imports.nfuncs,
- _Alignof(DynRela))
- : NULL;
- if (imports.nfuncs && !dyn->rela_plt)
- compiler_panic(img->c, no_loc(), "link: oom on rela_plt");
-
- /* RELA dyn: GLOB_DAT (one per imported abs-relocated symbol) +
- * RELATIVE (one per PIE internal abs reloc against a defined sym).
- * Phase 5 emits these dynamically during reloc-apply; pre-count the
- * exact total here (img->relocs and the resolve-time `imported` flags
- * are already settled by the time layout_dyn runs) so the section
- * isn't padded with hundreds of trailing R_*_NONE records. */
- u32 cap_rel = 0;
- {
- u32 ri;
- for (ri = 0; ri < LinkRelocs_count(&img->relocs); ++ri) {
- const LinkRelocApply* r = LinkRelocs_at(&img->relocs, ri);
- const LinkSymbol* tgt = LinkSyms_at(&img->syms, r->target - 1);
- if (r->kind != R_ABS32 && r->kind != R_ABS64) continue;
- if (tgt->imported) {
- cap_rel++; /* GLOB_DAT */
- } else if (tgt->defined && tgt->kind != SK_ABS) {
- cap_rel++; /* RELATIVE */
- }
- }
- }
- dyn->cap_rela_dyn = cap_rel;
- dyn->rela_dyn =
- dyn->cap_rela_dyn
- ? (DynRela*)h->alloc(h, sizeof(DynRela) * dyn->cap_rela_dyn,
- _Alignof(DynRela))
- : NULL;
- if (dyn->cap_rela_dyn && !dyn->rela_dyn)
- compiler_panic(img->c, no_loc(), "link: oom on rela_dyn");
- dyn->nrela_dyn = 0;
-
- Slice interp_s = pool_slice(l->c->global, dyn->interp_path);
- const char* interp_str = interp_s.s;
- size_t namelen = interp_s.len;
- u64 interp_bytes = (u64)namelen + 1u;
- u64 dynsym_bytes = (u64)dyn->ndynsym * ELF64_SYM_SIZE;
- u64 dynstr_bytes = (u64)dyn->dynstr_len;
- u64 gnuhash_bytes = (u64)dyn->gnu_hash_len;
- /* rela.dyn / rela.plt sized for full capacity; emit only writes
- * what's populated, but the section's file_size matches capacity
- * so PT_LOAD/.rela.dyn shdr sh_size add up. Trailing zero records
- * are harmless to the loader (R_AARCH64_NONE). */
- u64 rela_dyn_bytes = (u64)dyn->cap_rela_dyn * ELF64_RELA_SIZE;
- u64 rela_plt_bytes = (u64)dyn->nrela_plt * ELF64_RELA_SIZE;
- u64 plt_bytes =
- (u64)(imports.nfuncs
- ? arch->plt0_size + arch->plt_entry_size * imports.nfuncs
- : 0u);
- u64 gotplt_bytes = (u64)(imports.nfuncs ? 8u * (3u + imports.nfuncs) : 0u);
- dyn->ndyn_entries = count_dynamic_entries(dyn);
- u64 dynamic_bytes = (u64)dyn->ndyn_entries * ELF64_DYN_SIZE;
-
- /* Step 5: place segments, page-aligned after the existing image
- * span. New segments:
- * ro_seg (PF_R) — .interp + .dynsym + .dynstr + .gnu.hash +
- * .rela.dyn + .rela.plt
- * rx_seg (PF_R+X)— .plt (only when imports.nfuncs > 0)
- * rw_seg (PF_R+W)— .got.plt + .dynamic
- *
- * .dynamic lives in rw_seg because glibc's loader patches DT_*
- * d_un.d_ptr fields in-place at startup (elf_get_dynamic_info
- * adjusts STRTAB/SYMTAB/etc. by l_addr); a PF_R-only segment
- * causes SEGV_ACCERR. musl's loader doesn't do this rewrite, but
- * the RW placement is conventional and works for both.
- */
- page = 0x4000u; /* keep aligned with layout_page_size default */
- {
- /* Read the page size from layout_page_size by re-using the
- * configured execmem if present — duplicates the helper rather
- * than expose it; the value is only used for alignment. */
- const CfreeExecMem* m = (l && l->jit_host) ? l->jit_host->execmem : NULL;
- if (m && m->page_size) page = (u64)m->page_size;
- }
-
- u64 base_vaddr = 0;
- u32 i;
- for (i = 0; i < img->nsegments; ++i) {
- u64 end = img->segments[i].vaddr + img->segments[i].mem_size;
- if (end > base_vaddr) base_vaddr = end;
- }
- base_vaddr = ALIGN_UP(base_vaddr, page);
-
- /* Pack ro section offsets (relative to ro_seg.vaddr). 8-byte
- * alignment for tables; 4-byte for .interp string. */
- u64 off = 0;
- u64 interp_off = off;
- off = ALIGN_UP(off + interp_bytes, 8u);
- u64 dynsym_off = off;
- off = ALIGN_UP(off + dynsym_bytes, 8u);
- u64 dynstr_off = off;
- off = ALIGN_UP(off + dynstr_bytes, 8u);
- u64 gnuhash_off = off;
- off = ALIGN_UP(off + gnuhash_bytes, 8u);
- u64 rela_dyn_off = off;
- off = ALIGN_UP(off + rela_dyn_bytes, 8u);
- u64 rela_plt_off = off;
- off = ALIGN_UP(off + rela_plt_bytes, 8u);
- u64 ro_seg_size = off;
-
- /* When no PLT is needed, suppress the RX/.plt segment entirely. */
- int has_plt = imports.nfuncs > 0;
-
- /* Pack rw_seg offsets: .got.plt (when has_plt) followed by .dynamic. */
- u64 rw_off = 0;
- u64 gotplt_off = rw_off;
- if (has_plt) rw_off = ALIGN_UP(rw_off + gotplt_bytes, 8u);
- u64 dynamic_off = rw_off;
- rw_off = ALIGN_UP(rw_off + dynamic_bytes, 8u);
- u64 rw_seg_size = rw_off;
-
- u64 ro_vaddr = base_vaddr;
- u64 rx_vaddr = ALIGN_UP(ro_vaddr + ro_seg_size, page);
- u64 rw_vaddr = ALIGN_UP(rx_vaddr + (has_plt ? plt_bytes : 0u), page);
-
- /* rw_seg always exists (it carries .dynamic). */
- u32 nseg = 2u + (has_plt ? 1u : 0u);
- u32 seg_base = dyn_alloc_segments(img, nseg);
- u32 ro_seg_idx = seg_base + 0u;
- u32 rx_seg_idx = has_plt ? seg_base + 1u : 0u;
- u32 rw_seg_idx = seg_base + (has_plt ? 2u : 1u);
-
- LinkSegment* ro_seg = &img->segments[ro_seg_idx];
- memset(ro_seg, 0, sizeof(*ro_seg));
- ro_seg->id = (LinkSegmentId)(ro_seg_idx + 1u);
- ro_seg->flags = SF_ALLOC; /* PF_R */
- ro_seg->file_offset = ro_vaddr;
- ro_seg->vaddr = ro_vaddr;
- ro_seg->file_size = ro_seg_size;
- ro_seg->mem_size = ro_seg_size;
- ro_seg->align = (u32)page;
- ro_seg->nsections = 6;
- img->segment_bytes[ro_seg_idx] =
- ro_seg_size ? (u8*)h->alloc(h, (size_t)ro_seg_size, 16) : NULL;
- img->segment_bytes_cap[ro_seg_idx] = (size_t)ro_seg_size;
- if (ro_seg_size && !img->segment_bytes[ro_seg_idx])
- compiler_panic(img->c, no_loc(), "link: oom on ro dyn segment");
- if (ro_seg_size)
- memset(img->segment_bytes[ro_seg_idx], 0, (size_t)ro_seg_size);
-
- if (has_plt) {
- LinkSegment* rx_seg = &img->segments[rx_seg_idx];
- memset(rx_seg, 0, sizeof(*rx_seg));
- rx_seg->id = (LinkSegmentId)(rx_seg_idx + 1u);
- rx_seg->flags = SF_ALLOC | SF_EXEC;
- rx_seg->file_offset = rx_vaddr;
- rx_seg->vaddr = rx_vaddr;
- rx_seg->file_size = plt_bytes;
- rx_seg->mem_size = plt_bytes;
- rx_seg->align = (u32)page;
- rx_seg->nsections = 1;
- img->segment_bytes[rx_seg_idx] = (u8*)h->alloc(h, (size_t)plt_bytes, 16);
- img->segment_bytes_cap[rx_seg_idx] = (size_t)plt_bytes;
- if (!img->segment_bytes[rx_seg_idx])
- compiler_panic(img->c, no_loc(), "link: oom on .plt segment");
- memset(img->segment_bytes[rx_seg_idx], 0, (size_t)plt_bytes);
- /* Stash plt / got.plt vaddrs now — the PLT body emit just below
- * reads them, and the post-shift fixup in shift_image_addresses
- * (link_elf.c) keys on these fields too. */
- dyn->plt_vaddr = rx_vaddr;
- dyn->plt_size = plt_bytes;
- dyn->got_plt_vaddr = rw_vaddr;
- dyn->got_plt_size = gotplt_bytes;
- /* PLT body emit: the descriptor owns the psABI-specific bytes. */
- if (!arch->emit_plt0 || !arch->emit_plt_entry)
- compiler_panic(l->c, no_loc(), "link: PLT emit not configured");
- {
- u8* plt_b = img->segment_bytes[rx_seg_idx];
- u32 ki;
- arch->emit_plt0(plt_b, dyn->plt_vaddr, dyn->got_plt_vaddr);
- for (ki = 0; ki < imports.nfuncs; ++ki) {
- u64 entry_vaddr = dyn->plt_vaddr + arch->plt0_size +
- (u64)arch->plt_entry_size * (u64)ki;
- u64 slot_vaddr = dyn->got_plt_vaddr + 8u * (3u + ki);
- u8* p =
- plt_b + arch->plt0_size + (size_t)arch->plt_entry_size * (size_t)ki;
- arch->emit_plt_entry(p, entry_vaddr, slot_vaddr);
- }
- }
- }
- /* rw_seg always exists — it carries .dynamic, plus .got.plt when
- * imports are present. */
- {
- LinkSegment* rw_seg = &img->segments[rw_seg_idx];
- memset(rw_seg, 0, sizeof(*rw_seg));
- rw_seg->id = (LinkSegmentId)(rw_seg_idx + 1u);
- rw_seg->flags = SF_ALLOC | SF_WRITE;
- rw_seg->file_offset = rw_vaddr;
- rw_seg->vaddr = rw_vaddr;
- rw_seg->file_size = rw_seg_size;
- rw_seg->mem_size = rw_seg_size;
- rw_seg->align = (u32)page;
- rw_seg->nsections = has_plt ? 2u : 1u;
- img->segment_bytes[rw_seg_idx] = (u8*)h->alloc(h, (size_t)rw_seg_size, 16);
- img->segment_bytes_cap[rw_seg_idx] = (size_t)rw_seg_size;
- if (!img->segment_bytes[rw_seg_idx])
- compiler_panic(img->c, no_loc(), "link: oom on rw dyn segment");
- /* Zero-initialize. .got.plt[0] (&.dynamic) is filled later, after
- * shift_image_addresses has bumped dyn->dynamic_vaddr. .dynamic
- * body is built post-shift in link_emit_elf. Loader
- * patches all .got.plt slots from .rela.plt before user code
- * under DF_1_NOW. */
- memset(img->segment_bytes[rw_seg_idx], 0, (size_t)rw_seg_size);
- }
- img->nsegments += nseg;
-
- /* Step 6: synthetic LinkSection entries. Order in img->sections
- * matches the loader-friendly file order and feeds emit's
- * outshdr-merge pass. */
- u32 nsec = 7u + (has_plt ? 2u : 0u);
- u32 sec_base = dyn_alloc_sections(img, nsec);
-
- /* helper: populate a fresh LinkSection for a segment-internal range */
- /* Inline because the args differ enough (sem, name) per slot. */
- Sym name_interp = pool_intern_slice(l->c->global, SLICE_LIT(".interp"));
- Sym name_dynsym = pool_intern_slice(l->c->global, SLICE_LIT(".dynsym"));
- Sym name_dynstr = pool_intern_slice(l->c->global, SLICE_LIT(".dynstr"));
- Sym name_gnu_hash = pool_intern_slice(l->c->global, SLICE_LIT(".gnu.hash"));
- Sym name_rela_dyn = pool_intern_slice(l->c->global, SLICE_LIT(".rela.dyn"));
- Sym name_rela_plt = pool_intern_slice(l->c->global, SLICE_LIT(".rela.plt"));
- Sym name_dynamic = pool_intern_slice(l->c->global, SLICE_LIT(".dynamic"));
- Sym name_plt = pool_intern_slice(l->c->global, SLICE_LIT(".plt"));
- Sym name_got_plt = pool_intern_slice(l->c->global, SLICE_LIT(".got.plt"));
-
-#define INIT_SEC(IDX, NAME, SEG_IDX, OFF_IN_SEG, SIZE, ALIGN, FLAGS, SEM) \
- do { \
- LinkSection* ls = &img->sections[sec_base + (IDX)]; \
- memset(ls, 0, sizeof(*ls)); \
- ls->id = (LinkSectionId)(sec_base + (IDX) + 1u); \
- ls->input_id = LINK_INPUT_NONE; \
- ls->obj_section_id = OBJ_SEC_NONE; \
- ls->segment_id = img->segments[(SEG_IDX)].id; \
- ls->input_offset = (OFF_IN_SEG); \
- ls->file_offset = img->segments[(SEG_IDX)].file_offset + (OFF_IN_SEG); \
- ls->vaddr = img->segments[(SEG_IDX)].vaddr + (OFF_IN_SEG); \
- ls->size = (SIZE); \
- ls->flags = (FLAGS); \
- ls->align = (ALIGN); \
- ls->name = (NAME); \
- ls->sem = (SEM); \
- } while (0)
-
- INIT_SEC(0, name_interp, ro_seg_idx, interp_off, interp_bytes, 1, SF_ALLOC,
- SSEM_PROGBITS);
- INIT_SEC(1, name_dynsym, ro_seg_idx, dynsym_off, dynsym_bytes, 8, SF_ALLOC,
- SSEM_PROGBITS);
- INIT_SEC(2, name_dynstr, ro_seg_idx, dynstr_off, dynstr_bytes, 1, SF_ALLOC,
- SSEM_PROGBITS);
- INIT_SEC(3, name_gnu_hash, ro_seg_idx, gnuhash_off, gnuhash_bytes, 8,
- SF_ALLOC, SSEM_PROGBITS);
- INIT_SEC(4, name_rela_dyn, ro_seg_idx, rela_dyn_off, rela_dyn_bytes, 8,
- SF_ALLOC, SSEM_PROGBITS);
- INIT_SEC(5, name_rela_plt, ro_seg_idx, rela_plt_off, rela_plt_bytes, 8,
- SF_ALLOC, SSEM_PROGBITS);
- INIT_SEC(6, name_dynamic, rw_seg_idx, dynamic_off, dynamic_bytes, 8,
- SF_ALLOC | SF_WRITE, SSEM_PROGBITS);
-
- dyn->sec_interp = (LinkSectionId)(sec_base + 0 + 1u);
- dyn->sec_dynsym = (LinkSectionId)(sec_base + 1 + 1u);
- dyn->sec_dynstr = (LinkSectionId)(sec_base + 2 + 1u);
- dyn->sec_gnu_hash = (LinkSectionId)(sec_base + 3 + 1u);
- dyn->sec_rela_dyn = (LinkSectionId)(sec_base + 4 + 1u);
- dyn->sec_rela_plt = (LinkSectionId)(sec_base + 5 + 1u);
- dyn->sec_dynamic = (LinkSectionId)(sec_base + 6 + 1u);
- dyn->dynamic_vaddr = img->segments[rw_seg_idx].vaddr + dynamic_off;
- dyn->dynamic_size = dynamic_bytes;
-
- if (has_plt) {
- INIT_SEC(7, name_plt, rx_seg_idx, 0, plt_bytes, 16, SF_ALLOC | SF_EXEC,
- SSEM_PROGBITS);
- INIT_SEC(8, name_got_plt, rw_seg_idx, gotplt_off, gotplt_bytes, 8,
- SF_ALLOC | SF_WRITE, SSEM_PROGBITS);
- dyn->sec_plt = (LinkSectionId)(sec_base + 7 + 1u);
- dyn->sec_got_plt = (LinkSectionId)(sec_base + 8 + 1u);
- }
-#undef INIT_SEC
-
- img->nsections += nsec;
-
- /* Step 7: copy .interp / .dynsym / .dynstr / .gnu.hash bytes into
- * the ro segment. .dynamic body is built during emit (it embeds
- * runtime vaddrs that PIE keeps image-relative; emit just reads
- * the section ids' final vaddrs). */
- u8* ro_bytes = img->segment_bytes[ro_seg_idx];
-
- /* .interp */
- if (interp_bytes && ro_bytes)
- memcpy(ro_bytes + interp_off, interp_str, (size_t)interp_bytes);
-
- /* .dynsym: serialize DynSymRec to ELF64 wire layout. */
- {
- u32 si;
- for (si = 0; si < dyn->ndynsym; ++si) {
- u8* p = ro_bytes + dynsym_off + (u64)si * ELF64_SYM_SIZE;
- const DynSymRec* r = &dyn->dynsym[si];
- wr_u32_le(p + 0, r->st_name);
- p[4] = r->st_info;
- p[5] = r->st_other;
- wr_u16_le(p + 6, r->st_shndx);
- wr_u64_le(p + 8, r->st_value);
- wr_u64_le(p + 16, r->st_size);
- }
- }
-
- /* .dynstr */
- if (dynstr_bytes && ro_bytes && dyn->dynstr)
- memcpy(ro_bytes + dynstr_off, dyn->dynstr, dyn->dynstr_len);
-
- /* .gnu.hash */
- if (gnuhash_bytes && ro_bytes && dyn->gnu_hash)
- memcpy(ro_bytes + gnuhash_off, dyn->gnu_hash, dyn->gnu_hash_len);
-
- /* .rela.plt: emit JUMP_SLOT records, one per imported function, and
- * stash each import's PLT-entry vaddr in `sym_plt_vaddr` so the
- * apply pass can redirect CALL26/JUMP26 against the import. The
- * record's r_offset addresses the .got.plt slot the PLT stub reads
- * through; the loader patches that slot to the resolved runtime
- * address before user code runs (DF_1_NOW, BIND_NOW). Bytes are
- * written here at pre-shift vaddrs; link_emit re-serializes them
- * after shift_image_addresses bumps the dyn vaddrs by headers_load. */
- {
- u32 ki;
- for (ki = 0; ki < imports.nfuncs; ++ki) {
- LinkSymId lsid = imports.funcs[ki];
- u32 dynidx = dyn->sym_dynidx[lsid];
- u64 slot_vaddr = dyn->got_plt_vaddr + 8u * (3u + ki);
- u64 plt_entry_vaddr = dyn->plt_vaddr + arch->plt0_size +
- (u64)arch->plt_entry_size * (u64)ki;
- DynRela* r = &dyn->rela_plt[ki];
- r->r_offset = slot_vaddr;
- r->r_info = ELF64_R_INFO((u64)dynidx, arch->elf_r_jump_slot);
- r->r_addend = 0;
- /* Serialize into segment bytes (will be re-serialized post-shift). */
- u8* p = ro_bytes + rela_plt_off + (u64)ki * ELF64_RELA_SIZE;
- wr_u64_le(p + 0, r->r_offset);
- wr_u64_le(p + 8, r->r_info);
- wr_u64_le(p + 16, (u64)r->r_addend);
- /* sym_plt_vaddr is consulted by apply_all_relocs. */
- dyn->sym_plt_vaddr[lsid] = plt_entry_vaddr;
- }
- }
-
- /* .rela.dyn entries (GLOB_DAT for imports referenced via .got, and
- * RELATIVE for PIE internal abs fixups) are emitted by
- * apply_all_relocs as it walks every relocation. layout_dyn
- * leaves .rela.dyn empty here; the bytes are written post-shift in
- * link_emit_elf. */
-
- /* .got.plt prelude: for BIND_NOW we leave the body zero — the
- * loader patches every slot from .rela.plt before user code. Some
- * loaders still inspect slot 0 (&.dynamic) at startup; provide it
- * so glibc-style loaders don't fault. The loader writes the link_map
- * cookie into slot 1 at load time. */
- if (has_plt) {
- u8* gp_bytes = img->segment_bytes[rw_seg_idx];
- if (gp_bytes && gotplt_bytes >= 8u) {
- wr_u64_le(gp_bytes, dyn->dynamic_vaddr);
- /* Slots 1, 2, and per-PLT slots stay zero until the loader
- * fills them. Phase 5 would prefill the per-PLT slots with
- * the address of PLT0 to support lazy binding. */
- }
- }
-
- /* The .dynamic body is built later, after segment shifts are
- * applied during emit (link_elf.c). emit_dynamic_body takes the
- * post-shift vaddrs of every other dyn section and writes one
- * DT_* entry per index. */
-
- /* Synthesize linker-defined symbols that reference the .dynamic
- * vaddr. Scrt1.o on Linux loads `_DYNAMIC` via ADRP+ADD, and
- * libc_nonshared.a's atexit shim takes `__dso_handle` as the
- * per-image identity (we use the .dynamic vaddr — any stable
- * per-image address satisfies the contract since the shim only
- * passes it through to __cxa_atexit, which the program-side glibc
- * just stashes). */
- link_define_boundary(l, img, "_DYNAMIC", dyn->dynamic_vaddr);
- link_define_boundary(l, img, "__dso_handle", dyn->dynamic_vaddr);
-
- free_imports(h, &imports);
-}
-
-/* ---- cleanup ---- */
-
-void link_dyn_state_free(LinkImage* img) {
- Heap* h = img->heap;
- LinkDynState* dyn = img->dyn;
- if (!dyn) return;
- if (dyn->dynsym) h->free(h, dyn->dynsym, sizeof(*dyn->dynsym) * dyn->ndynsym);
- if (dyn->dynstr) h->free(h, dyn->dynstr, dyn->dynstr_len);
- if (dyn->gnu_hash) h->free(h, dyn->gnu_hash, dyn->gnu_hash_len);
- if (dyn->rela_dyn)
- h->free(h, dyn->rela_dyn, sizeof(*dyn->rela_dyn) * dyn->cap_rela_dyn);
- if (dyn->rela_plt)
- h->free(h, dyn->rela_plt, sizeof(*dyn->rela_plt) * dyn->nrela_plt);
- if (dyn->needed) h->free(h, dyn->needed, sizeof(*dyn->needed) * dyn->nneeded);
- if (dyn->sym_dynidx)
- h->free(h, dyn->sym_dynidx,
- sizeof(*dyn->sym_dynidx) * dyn->sym_dynidx_size);
- if (dyn->sym_plt_vaddr)
- h->free(h, dyn->sym_plt_vaddr,
- sizeof(*dyn->sym_plt_vaddr) * dyn->sym_dynidx_size);
- h->free(h, dyn, sizeof(*dyn));
- img->dyn = NULL;
-}
diff --git a/src/link/link_elf.c b/src/link/link_elf.c
@@ -1,1417 +0,0 @@
-/* link_emit_elf: write a static ET_EXEC ELF64 image to the
- * caller-provided Writer.
- *
- * 64-bit little-endian only. The per-arch ELF reloc-type tables in
- * obj/elf_reloc_<arch>.c handle RelocKind <-> ELF translation; this
- * file gets e_machine from the link arch descriptor.
- *
- * File layout (in write order):
- *
- * [headers PT_LOAD, PF_R, mapped at IMAGE_BASE]
- * Ehdr64
- * Phdr64[nphdr] -- one per loaded segment + headers +
- * PT_NOTE .note.gnu.build-id -- 12 + 16 = 28 bytes
- * (deterministic 16-byte id) pad to PAGE
- *
- * [PT_LOAD per kept image segment, in img->segments order]
- * segment bytes (padded to its file_offset)
- *
- * [non-allocatable sections, file-only]
- * .symtab -- ELF64_SYM_SIZE * nsyms
- * .strtab -- NUL-led blob
- * .shstrtab -- NUL-led blob
- *
- * [section header table at e_shoff]
- * Shdr64[nshdr]
- *
- * Section header schema (for nm / objdump -t / gdb consumption):
- *
- * 0 SHN_UNDEF (zero entry)
- * N one shdr per loaded sub-region: .text/.rodata/.data/.bss as
- * the corresponding RX/R/RW segments materialize (.bss split
- * out as the trailing memsz>filesz tail of the RW segment).
- * 1 .note.gnu.build-id (allocatable, in headers PT_LOAD)
- * 1 .symtab (sh_link -> .strtab; sh_info = first non-local idx)
- * 1 .strtab
- * 1 .shstrtab (Ehdr64.e_shstrndx)
- *
- * Build-id is computed deterministically over the post-relocation
- * segment bytes (FNV-1a 64 over each segment, mixed into a 128-bit
- * accumulator). The 16-byte digest is written into the note before the
- * note is emitted to the Writer.
- *
- * The image image-relative addresses on entry are bumped by
- * align_up(headers_size, PAGE) before relocs are applied, exactly as
- * before — segment bytes / symbol vaddrs land at their final IMAGE_BASE
- * absolute addresses by the time relocs run. */
-
-#include <string.h>
-
-#include "core/heap.h"
-#include "core/pool.h"
-#include "core/slice.h"
-#include "core/util.h"
-#include "core/vec.h"
-#include "link/link.h"
-#include "link/link_arch.h"
-#include "link/link_internal.h"
-#include "obj/elf.h"
-
-static SrcLoc no_loc(void) {
- SrcLoc l = {0, 0, 0};
- return l;
-}
-
-/* ---- ELF64 wire structs (subset) ---- */
-
-#define EI_NIDENT 16
-
-typedef struct __attribute__((packed)) Ehdr64 {
- u8 e_ident[EI_NIDENT];
- u16 e_type;
- u16 e_machine;
- u32 e_version;
- u64 e_entry;
- u64 e_phoff;
- u64 e_shoff;
- u32 e_flags;
- u16 e_ehsize;
- u16 e_phentsize;
- u16 e_phnum;
- u16 e_shentsize;
- u16 e_shnum;
- u16 e_shstrndx;
-} Ehdr64;
-
-typedef struct __attribute__((packed)) Phdr64 {
- u32 p_type;
- u32 p_flags;
- u64 p_offset;
- u64 p_vaddr;
- u64 p_paddr;
- u64 p_filesz;
- u64 p_memsz;
- u64 p_align;
-} Phdr64;
-
-typedef struct __attribute__((packed)) Shdr64 {
- u32 sh_name;
- u32 sh_type;
- u64 sh_flags;
- u64 sh_addr;
- u64 sh_offset;
- u64 sh_size;
- u32 sh_link;
- u32 sh_info;
- u64 sh_addralign;
- u64 sh_entsize;
-} Shdr64;
-
-#define PT_NOTE 4
-#define PT_TLS 7
-
-/* Static ET_EXEC base. ET_DYN (PIE) uses 0 — the loader picks the
- * runtime base. The active value lives in `img_base` below; the macro
- * stays for the static path's hard-coded vaddrs. */
-#define IMAGE_BASE_STATIC 0x400000ULL
-
-#define BUILD_ID_DESC_LEN 16u
-#define NOTE_NAME_GNU "GNU"
-#define NOTE_NAME_GNU_LEN 4u /* "GNU\0" */
-#define NOTE_BUILD_ID_TYPE 3u
-#define BUILD_ID_NOTE_BYTES (12u + NOTE_NAME_GNU_LEN + BUILD_ID_DESC_LEN)
-
-/* ---- byte writer helpers ---- */
-
-static void write_bytes(Writer* w, const void* data, size_t n) {
- w->write(w, data, n);
-}
-
-static void write_zeroes(Writer* w, size_t n) {
- static const u8 zeroes[256] = {0};
- while (n) {
- size_t step = n > sizeof(zeroes) ? sizeof(zeroes) : n;
- w->write(w, zeroes, step);
- n -= step;
- }
-}
-
-static u32 perms_to_pflags(u32 secflags) {
- u32 f = PF_R;
- if (secflags & SF_EXEC) f |= PF_X;
- if (secflags & SF_WRITE) f |= PF_W;
- return f;
-}
-
-/* Scripted-layout post-pass: vaddrs are already final (the script
- * pinned them via `. = …`), so only file offsets need to bump to
- * leave room for ehdr+phdrs. Mirror of shift_image_addresses but
- * touches only the file dimension. */
-static void shift_image_file_offsets(LinkImage* img, u64 delta) {
- u32 i;
- for (i = 0; i < img->nsegments; ++i) img->segments[i].file_offset += delta;
- for (i = 0; i < img->nsections; ++i) img->sections[i].file_offset += delta;
- for (i = 0; i < LinkRelocs_count(&img->relocs); ++i)
- LinkRelocs_at(&img->relocs, i)->write_file_offset += delta;
-}
-
-static void shift_image_addresses(LinkImage* img, u64 delta) {
- u32 i;
- for (i = 0; i < img->nsegments; ++i) {
- img->segments[i].file_offset += delta;
- img->segments[i].vaddr += delta;
- }
- for (i = 0; i < img->nsections; ++i) {
- img->sections[i].file_offset += delta;
- img->sections[i].vaddr += delta;
- }
- for (i = 0; i < LinkRelocs_count(&img->relocs); ++i) {
- LinkRelocs_at(&img->relocs, i)->write_file_offset += delta;
- LinkRelocs_at(&img->relocs, i)->write_vaddr += delta;
- }
- for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
- LinkSymbol* s = LinkSyms_at(&img->syms, i);
- if (s->kind == SK_ABS) continue;
- if (!s->defined) continue;
- s->vaddr += delta;
- }
- /* tls_vaddr lives in the same image-relative coordinate system as
- * the segments it tracks, so it bumps with them. */
- if (img->tls_memsz) img->tls_vaddr += delta;
- /* Dyn-link state mirrors a few segment / section vaddrs and pre-
- * populated DynRela.r_offset values from layout_dyn. Bump them so
- * the post-shift .rela.plt / .dynamic emit and apply_all_relocs see
- * the right addresses (sym_plt_vaddr is read to redirect CALL26
- * against imports). */
- if (img->dyn) {
- LinkDynState* dyn = img->dyn;
- if (dyn->plt_vaddr) dyn->plt_vaddr += delta;
- if (dyn->got_plt_vaddr) dyn->got_plt_vaddr += delta;
- if (dyn->dynamic_vaddr) dyn->dynamic_vaddr += delta;
- if (dyn->sym_plt_vaddr) {
- u32 j;
- for (j = 0; j < dyn->sym_dynidx_size; ++j)
- if (dyn->sym_plt_vaddr[j]) dyn->sym_plt_vaddr[j] += delta;
- }
- if (dyn->rela_plt) {
- u32 j;
- for (j = 0; j < dyn->nrela_plt; ++j) dyn->rela_plt[j].r_offset += delta;
- }
- /* rela_dyn is populated by apply_all_relocs (which runs after this
- * shift), so its records are already in post-shift coordinates. */
- }
-}
-
-/* AArch64 ELF ABI: the per-thread TLS block starts at TP + 16 bytes
- * (the TCB sits ahead of the TLS image). RISC-V psABI normally points
- * tp at the start of the TLS image; the cfree harness's start.c
- * places a 16-byte TCB ahead of .tdata and biases tp accordingly, so
- * the TPREL offset for both arches is (target - tls_vaddr) + 16. */
-#define TLS_TCB_SIZE 16ull
-
-static int reloc_is_tlsle(RelocKind k) {
- return k == R_AARCH64_TLSLE_ADD_TPREL_HI12 ||
- k == R_AARCH64_TLSLE_ADD_TPREL_LO12_NC || k == R_RV_TPREL_HI20 ||
- k == R_RV_TPREL_LO12_I || k == R_RV_TPREL_LO12_S;
-}
-
-/* x86_64 SysV ABI: TLS variant II — the per-thread TLS image sits at
- * *negative* offsets from %fs (which points at the TCB). start.c
- * lays out [tdata | tbss | TCB] and arch_prctl(ARCH_SET_FS, &TCB), so
- * a symbol at offset X within the TLS image is at fs-relative offset
- * (X - tls_memsz). The two ELF reloc kinds R_X86_64_TPOFF32/_TPOFF64
- * encode that signed offset directly at the reloc site (no TCB bias —
- * variant II's TCB sits *after* the image, so TPOFF is negative). */
-static int reloc_is_x64_tlsle(RelocKind k) {
- return k == R_X64_TPOFF32 || k == R_X64_TPOFF64;
-}
-
-static int reloc_is_abs(RelocKind k) { return k == R_ABS32 || k == R_ABS64; }
-
-/* Function-call relocs that may route through the PLT when the target
- * is imported. aarch64 CALL26/JUMP26, x86_64 PLT32, and risc-v CALL_PLT
- * (which cfree maps to R_PLT32) all carry the "call this address; if
- * it's not resolvable here use the PLT trampoline" contract; the apply
- * pass overwrites S with the PLT entry vaddr in that case. */
-static int reloc_is_branch26(RelocKind k) {
- return k == R_AARCH64_CALL26 || k == R_AARCH64_JUMP26 || k == R_X64_PLT32 ||
- k == R_PLT32 || k == R_RV_CALL;
-}
-
-static void emit_dyn_record(LinkImage* img, u64 site_vaddr, u32 reloc_type,
- u32 dynidx, i64 addend) {
- LinkDynState* dyn = img->dyn;
- if (!dyn || !dyn->rela_dyn) return;
- if (dyn->nrela_dyn >= dyn->cap_rela_dyn) {
- compiler_panic(img->c, no_loc(),
- "link: too many .rela.dyn records (%u >= %u); raise "
- "cap_rela_dyn in layout_dyn",
- dyn->nrela_dyn, dyn->cap_rela_dyn);
- }
- DynRela* r = &dyn->rela_dyn[dyn->nrela_dyn++];
- r->r_offset = site_vaddr;
- r->r_info = ELF64_R_INFO((u64)dynidx, reloc_type);
- r->r_addend = addend;
-}
-
-static const LinkArchDesc* elf_arch_or_panic(Compiler* c, const char* where) {
- const LinkArchDesc* arch = link_arch_desc_for(c);
- if (!arch || !arch->e_machine)
- compiler_panic(c, no_loc(), "%.*s: no ELF arch descriptor",
- SLICE_ARG(slice_from_cstr(where)));
- return arch;
-}
-
-static void emit_relative_record(LinkImage* img, u64 site_vaddr, u64 addend) {
- const LinkArchDesc* arch = elf_arch_or_panic(img->c, "link");
- emit_dyn_record(img, site_vaddr, arch->elf_r_relative, 0, (i64)addend);
-}
-
-static void emit_globdat_record(LinkImage* img, u64 site_vaddr, u32 dynidx,
- i64 addend) {
- const LinkArchDesc* arch = elf_arch_or_panic(img->c, "link");
- emit_dyn_record(img, site_vaddr, arch->elf_r_glob_dat, dynidx, addend);
-}
-
-/* RISC-V PCREL_LO12_* references the address of an AUIPC carrying the
- * paired PCREL_HI20. Given the AUIPC's site vaddr (post-shift), find
- * its PCREL_HI20 reloc and compute the displacement that AUIPC
- * encoded — the LO12 then takes the low 12 bits of the same disp.
- *
- * Linear scan over img->relocs is fine in practice: kernel images and
- * cg cases produce at most a few hundred relocs total. */
-static i64 rv_pcrel_lo12_disp(LinkImage* img, u64 auipc_vaddr, u64 img_base) {
- u32 i;
- for (i = 0; i < LinkRelocs_count(&img->relocs); ++i) {
- const LinkRelocApply* hi = LinkRelocs_at(&img->relocs, i);
- const LinkSymbol* hi_tgt;
- u64 hi_S, hi_P;
- if (hi->kind != R_RV_PCREL_HI20 && hi->kind != R_RV_GOT_HI20) continue;
- if (hi->write_vaddr + img_base != auipc_vaddr) continue;
- hi_tgt = LinkSyms_at(&img->syms, hi->target - 1);
- hi_S = (hi_tgt->kind == SK_ABS) ? hi_tgt->vaddr : hi_tgt->vaddr + img_base;
- hi_P = hi->write_vaddr + img_base;
- return (i64)hi_S + hi->addend - (i64)hi_P;
- }
- compiler_panic(img->c, no_loc(),
- "link: PCREL_LO12 at 0x%llx has no paired PCREL_HI20",
- (unsigned long long)auipc_vaddr);
- return 0;
-}
-
-static void apply_all_relocs(LinkImage* img, u64 img_base) {
- u32 i;
- int pie = img->pie;
- for (i = 0; i < LinkRelocs_count(&img->relocs); ++i) {
- LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
- const LinkSymbol* tgt = LinkSyms_at(&img->syms, r->target - 1);
- const LinkSection* sec = &img->sections[r->link_section_id - 1];
- const LinkSegment* seg = &img->segments[sec->segment_id - 1];
- u64 S, P;
- u8* P_bytes;
- if (reloc_is_tlsle(r->kind)) {
- /* S is the target's TP-relative offset: distance from the
- * TLS image start plus the 16-byte TCB. Both vaddrs are
- * in the same (post-shift, image-relative) coordinate
- * system, so img_base cancels out. */
- S = (tgt->vaddr - img->tls_vaddr) + TLS_TCB_SIZE;
- } else if (reloc_is_x64_tlsle(r->kind)) {
- /* x86_64 variant II: TP points just past the TLS image, so a
- * symbol at offset X within the image is at TP-relative offset
- * (X - tls_memsz). Cast through i64/u64 so the reloc apply
- * writes the full 32- or 64-bit signed value. */
- i64 off = (i64)(tgt->vaddr - img->tls_vaddr) - (i64)img->tls_memsz;
- S = (u64)off;
- } else if (r->kind == R_RV_PCREL_LO12_I || r->kind == R_RV_PCREL_LO12_S) {
- /* PCREL_LO12: rewrite S so that link_reloc_apply's existing
- * LO12_I/LO12_S encoder produces the right low 12 bits of the
- * paired AUIPC's PC-relative displacement. The reloc's own
- * addend is unused; signed lo12 = disp & 0xfff. */
- P = r->write_vaddr + img_base;
- P_bytes = img->segment_bytes[seg->id - 1] +
- (size_t)(r->write_file_offset - seg->file_offset);
- {
- i64 disp = rv_pcrel_lo12_disp(img, tgt->vaddr + img_base, img_base);
- RelocKind alias =
- (r->kind == R_RV_PCREL_LO12_I) ? R_RV_LO12_I : R_RV_LO12_S;
- link_reloc_apply(img->c, alias, P_bytes, (u64)disp, 0, P);
- }
- continue;
- } else {
- S = tgt->vaddr + img_base;
- if (tgt->kind == SK_ABS) S = tgt->vaddr;
- }
- P = r->write_vaddr + img_base;
- P_bytes = img->segment_bytes[seg->id - 1] +
- (size_t)(r->write_file_offset - seg->file_offset);
-
- /* Imported target: redirect / rewrite per reloc kind (Phase 5).
- *
- * - CALL26 / JUMP26: target the import's PLT entry. The PLT stub
- * reads .got.plt[3+i], which the loader pre-fills via JUMP_SLOT
- * (.rela.plt). S becomes the PLT-entry vaddr; the existing
- * apply path computes the disp from there.
- * - R_ABS{32,64}: leave the patch site at zero and emit a
- * GLOB_DAT record so the loader writes the resolved address
- * into the site at load time. This covers both
- * layout_got-emitted .got slot fills (target = import) and any
- * direct absolute reference in user data (e.g. a function
- * pointer initializer).
- * - GOT-page / LO12-NC against an import: emit_reloc_records has
- * already redirected the target from the import to the
- * synthetic .got slot symbol, so the apply path here sees the
- * slot, not the import — nothing special needed; the slot's
- * own R_ABS64 fill against the (vaddr=0) import will trip the
- * abs-import branch above and emit GLOB_DAT.
- *
- * Anything else against an imported symbol (e.g. PREL19 / ADR
- * etc.) is rare in real binaries and would need its own
- * dynamic-reloc kind; panic loudly so a future test that needs
- * it announces itself. */
- if (tgt->imported) {
- /* `tgt` may be a per-input shadow LinkSymbol — resolve_undefs
- * stamps `imported = 1` on every undef matched by name, but
- * collect_imports only stashes plt_vaddr / dynidx on the
- * canonical entry registered in img->globals. Resolve to the
- * canonical id before indexing the dyn-state arrays. */
- LinkSymId canon_id = tgt->id;
- if (tgt->name != 0) {
- LinkSymId hit = symhash_get(&img->globals, tgt->name);
- if (hit != LINK_SYM_NONE) canon_id = hit;
- }
- u32 dynidx = (img->dyn && canon_id < img->dyn->sym_dynidx_size)
- ? img->dyn->sym_dynidx[canon_id]
- : 0u;
- if (reloc_is_branch26(r->kind)) {
- u64 plt_v = (img->dyn && canon_id < img->dyn->sym_dynidx_size)
- ? img->dyn->sym_plt_vaddr[canon_id]
- : 0u;
- if (plt_v == 0)
- compiler_panic(img->c, no_loc(),
- "link: imported sym has no PLT entry (CALL26)");
- S = plt_v + img_base;
- link_reloc_apply(img->c, r->kind, P_bytes, S, r->addend, P);
- continue;
- }
- if (reloc_is_abs(r->kind)) {
- if (dynidx == 0)
- compiler_panic(img->c, no_loc(),
- "link: imported sym has no .dynsym entry");
- emit_globdat_record(img, r->write_vaddr, dynidx, r->addend);
- /* Site bytes are irrelevant: the loader's GLOB_DAT writes
- * (sym_value + r_addend) into r_offset before user code runs,
- * overwriting whatever's there. Leaving the existing zero
- * fill saves a write. */
- continue;
- }
- {
- Slice nm_s = tgt->name ? pool_slice(img->c->global, tgt->name)
- : SLICE_NULL;
- const char* nm = nm_s.s ? nm_s.s : "";
- size_t nl = nm_s.len;
- compiler_panic(
- img->c, no_loc(),
- "link: unhandled reloc kind %u against imported symbol '%.*s'",
- (unsigned)r->kind, (int)nl, nm);
- }
- }
-
- /* PIE: an absolute reloc against a defined non-imported symbol
- * stays image-relative in the file (the loader adds load-base via
- * a synthesized R_AARCH64_RELATIVE). img_base is 0 for PIE so
- * S above is already image-relative — the apply writes that into
- * the site, and the RELATIVE record tells the loader to add
- * load_base on top. */
- if (pie && reloc_is_abs(r->kind) && tgt->defined && tgt->kind != SK_ABS) {
- emit_relative_record(img, r->write_vaddr, tgt->vaddr);
- }
- link_reloc_apply(img->c, r->kind, P_bytes, S, r->addend, P);
- }
-}
-
-/* The build-id payload is a format-agnostic image identity hash —
- * see link_image_id_compute in link_image_id.c. Mach-O wraps the
- * same bytes in LC_UUID; ELF wraps them in a .note.gnu.build-id. */
-
-/* ---- string-table builder ---- */
-
-typedef struct StrBuilder {
- Heap* heap;
- u8* data;
- u32 len;
- u32 cap;
-} StrBuilder;
-
-static void strb_init(StrBuilder* s, Heap* h, u32 reserve) {
- s->heap = h;
- s->cap = reserve > 16u ? reserve : 16u;
- s->data = (u8*)h->alloc(h, s->cap, 1);
- if (!s->data) s->cap = 0;
- s->len = 0;
- if (s->cap) {
- s->data[0] = 0;
- s->len = 1;
- } /* leading NUL */
-}
-
-static void strb_fini(StrBuilder* s) {
- if (s->data) s->heap->free(s->heap, s->data, s->cap);
- s->data = NULL;
- s->cap = s->len = 0;
-}
-
-static void strb_grow(StrBuilder* s, u32 need) {
- (void)VEC_GROW(s->heap, s->data, s->cap, need);
-}
-
-static u32 strb_add(StrBuilder* s, const char* str, u32 slen) {
- u32 off;
- if (slen == 0) return 0;
- /* Linear dedup: scan existing data for a matching NUL-terminated
- * substring. Strtabs are small enough to make this acceptable. */
- if (s->len > slen) {
- u32 i;
- for (i = 0; i + slen < s->len; ++i) {
- if (s->data[i + slen] == 0 && memcmp(s->data + i, str, slen) == 0)
- return i;
- }
- }
- off = s->len;
- strb_grow(s, s->len + slen + 1u);
- memcpy(s->data + s->len, str, slen);
- s->data[s->len + slen] = 0;
- s->len += slen + 1u;
- return off;
-}
-
-static u32 strb_add_cstr(StrBuilder* s, const char* str) {
- return strb_add(s, str, (u32)slice_from_cstr(str).len);
-}
-
-/* ---- symtab builder ---- */
-
-typedef struct SymRec {
- u32 st_name;
- u8 st_info;
- u8 st_other;
- u16 st_shndx;
- u64 st_value;
- u64 st_size;
-} SymRec;
-
-static u8 sym_kind_to_st_type(u8 kind) {
- switch (kind) {
- case SK_FUNC:
- return STT_FUNC;
- case SK_OBJ:
- return STT_OBJECT;
- case SK_SECTION:
- return STT_SECTION;
- case SK_FILE:
- return STT_FILE;
- case SK_TLS:
- return STT_TLS;
- case SK_IFUNC:
- return STT_GNU_IFUNC;
- case SK_NOTYPE:
- case SK_ABS:
- case SK_UNDEF:
- default:
- return STT_NOTYPE;
- }
-}
-
-static u8 sym_bind_to_st_bind(u8 bind) {
- switch (bind) {
- case SB_GLOBAL:
- return STB_GLOBAL;
- case SB_WEAK:
- return STB_WEAK;
- case SB_LOCAL:
- default:
- return STB_LOCAL;
- }
-}
-
-/* Produces one Elf64_Sym record on the wire from a SymRec. */
-static void write_sym_rec(Writer* w, const SymRec* r) {
- u8 buf[ELF64_SYM_SIZE];
- buf[0] = (u8)(r->st_name);
- buf[1] = (u8)(r->st_name >> 8);
- buf[2] = (u8)(r->st_name >> 16);
- buf[3] = (u8)(r->st_name >> 24);
- buf[4] = r->st_info;
- buf[5] = r->st_other;
- buf[6] = (u8)(r->st_shndx);
- buf[7] = (u8)(r->st_shndx >> 8);
- {
- u32 i;
- for (i = 0; i < 8; ++i) buf[8 + i] = (u8)(r->st_value >> (i * 8));
- for (i = 0; i < 8; ++i) buf[16 + i] = (u8)(r->st_size >> (i * 8));
- }
- write_bytes(w, buf, sizeof buf);
-}
-
-/* ---- section header layout ---- *
- *
- * Per-segment cuts: each kept image segment contributes 1 .text/.rodata
- * shdr for its file portion, plus a separate .bss shdr for the trailing
- * NOBITS portion of an RW segment (memsz > filesz). The headers PT_LOAD
- * contributes a single .note.gnu.build-id shdr. Trailing non-alloc
- * shdrs: .symtab .strtab .shstrtab (always 3). */
-
-typedef struct OutShdr {
- u32 shdr_idx; /* 1-based; assigned during planning */
- LinkSegmentId segment_id;
- Sym name;
- u16 sem; /* SecSem from source LinkSection */
- u32 flags; /* SF_* from source LinkSection */
- u32 align;
- u64 vaddr;
- u64 file_offset;
- u64 size;
- int is_nobits;
-} OutShdr;
-
-static u16 sym_shndx_for(const LinkSymbol* s, const OutShdr* outshdrs,
- u32 noutshdr) {
- if (!s->defined) return SHN_UNDEF;
- if (s->kind == SK_ABS) return SHN_ABS;
- if (s->kind == SK_FILE) return SHN_ABS;
- if (s->kind == SK_COMMON) return SHN_COMMON;
- /* Find an output shdr whose [vaddr, vaddr+size) covers s->vaddr.
- * Boundary symbols match at the upper edge. */
- {
- u32 i;
- for (i = 0; i < noutshdr; ++i) {
- u64 lo = outshdrs[i].vaddr;
- u64 hi = lo + outshdrs[i].size;
- if (s->vaddr >= lo && s->vaddr <= hi) return (u16)outshdrs[i].shdr_idx;
- }
- }
- return SHN_ABS;
-}
-
-static u32 sec_sem_to_sht(u16 sem) {
- switch (sem) {
- case SSEM_PROGBITS:
- return SHT_PROGBITS;
- case SSEM_NOBITS:
- return SHT_NOBITS;
- case SSEM_NOTE:
- return SHT_NOTE;
- case SSEM_INIT_ARRAY:
- return SHT_INIT_ARRAY;
- case SSEM_FINI_ARRAY:
- return SHT_FINI_ARRAY;
- case SSEM_PREINIT_ARRAY:
- return SHT_PREINIT_ARRAY;
- default:
- return SHT_PROGBITS;
- }
-}
-
-static u64 sec_flags_to_shf(u32 flags) {
- u64 r = 0;
- if (flags & SF_ALLOC) r |= SHF_ALLOC;
- if (flags & SF_EXEC) r |= SHF_EXECINSTR;
- if (flags & SF_WRITE) r |= SHF_WRITE;
- if (flags & SF_TLS) r |= SHF_TLS;
- if (flags & SF_MERGE) r |= SHF_MERGE;
- if (flags & SF_STRINGS) r |= SHF_STRINGS;
- if (flags & SF_LINK_ORDER) r |= SHF_LINK_ORDER;
- if (flags & SF_RETAIN) r |= SHF_GNU_RETAIN;
- return r;
-}
-
-void link_emit_elf(LinkImage* img, Writer* w) {
- Heap* heap = img->heap;
- Compiler* c = img->c;
- const LinkArchDesc* arch = elf_arch_or_panic(c, "link_emit_elf");
- u32 e_machine = arch->e_machine;
- if (img->entry_sym == LINK_SYM_NONE)
- compiler_panic(c, no_loc(), "link_emit_elf: no resolved entry symbol");
- /* IFUNC trampolines: layout_iplt builds the .iplt stubs + .igot.plt
- * slots and (when emit_static_exe was set) synthesizes a
- * .init_array entry that calls __cfree_ifunc_init at startup. The
- * rt member walks .iplt.pairs and fills each slot before user code
- * runs. The ELF writer doesn't have to do anything special here. */
-
- /* PIE / ET_DYN: img_base is 0 (the loader picks the runtime base;
- * absolute relocs against internal symbols are emitted as
- * R_AARCH64_RELATIVE in .rela.dyn). Otherwise classic ET_EXEC at
- * IMAGE_BASE_STATIC.
- *
- * Scripted: the linker script pinned absolute vaddrs (e.g.
- * `. = 0x40080000`); img_base stays 0 and the headers PT_LOAD /
- * build-id note are dropped — the script's image is consumed by a
- * raw loader (qemu -kernel, a bootloader) that doesn't need a
- * self-describing memory image. */
- int pie = img->pie;
- int scripted = img->scripted;
- u64 img_base = (pie || scripted) ? 0ULL : IMAGE_BASE_STATIC;
-
- /* ---- plan number of program headers ----
- *
- * 1 headers PT_LOAD + nsegments PT_LOAD + 1 PT_NOTE (build-id)
- * + 1 PT_TLS when this image carries any TLS sections.
- * + 4 dyn phdrs (PT_PHDR / PT_INTERP / PT_DYNAMIC / PT_GNU_STACK) on PIE.
- *
- * Scripted images skip the headers PT_LOAD and PT_NOTE: phdrs are
- * just the per-segment PT_LOADs. */
- u32 has_tls = img->tls_memsz ? 1u : 0u;
- u32 nphdr_extra_dyn = pie ? 4u : 0u;
- u32 nphdr_headers = scripted ? 0u : 1u;
- u32 nphdr_buildid = scripted ? 0u : 1u;
- u32 nphdr_total = nphdr_headers + img->nsegments + nphdr_buildid + has_tls +
- nphdr_extra_dyn;
- u64 build_id_note_bytes = scripted ? 0ULL : BUILD_ID_NOTE_BYTES;
- u64 headers_size =
- sizeof(Ehdr64) + (u64)nphdr_total * sizeof(Phdr64) + build_id_note_bytes;
- u64 headers_load = ALIGN_UP(headers_size, (u64)PAGE_SIZE);
-
- /* The build-id note lives inside the headers PT_LOAD at this offset. */
- u64 build_id_off = sizeof(Ehdr64) + (u64)nphdr_total * sizeof(Phdr64);
- u64 build_id_addr = img_base + build_id_off;
-
- /* ---- shift image addresses, apply relocations ----
- *
- * Must happen before segshdrs/symtab construction so they observe
- * post-shift vaddrs (the values that will land in the file). */
- if (scripted)
- shift_image_file_offsets(img, headers_load);
- else
- shift_image_addresses(img, headers_load);
- apply_all_relocs(img, img_base);
-
- /* ---- write .dynamic body + re-serialize .rela.dyn (PIE only) ----
- *
- * Both depend on post-shift vaddrs. .dynamic embeds image-relative
- * pointers to .dynsym/.dynstr/.gnu.hash/.rela.dyn/.rela.plt/.got.plt
- * (the loader adds load_base at runtime). .rela.dyn picked up
- * RELATIVE records during apply_all_relocs; rewrite the section
- * bytes to include them. */
- if (pie && img->dyn) {
- LinkDynState* dyn = img->dyn;
- const LinkSection* sec_dynamic = &img->sections[dyn->sec_dynamic - 1];
- const LinkSection* sec_dynsym = &img->sections[dyn->sec_dynsym - 1];
- const LinkSection* sec_dynstr = &img->sections[dyn->sec_dynstr - 1];
- const LinkSection* sec_gnuhash = &img->sections[dyn->sec_gnu_hash - 1];
- const LinkSection* sec_reladyn = &img->sections[dyn->sec_rela_dyn - 1];
- const LinkSection* sec_relaplt = (dyn->sec_rela_plt != LINK_SEC_NONE)
- ? &img->sections[dyn->sec_rela_plt - 1]
- : NULL;
- const LinkSection* sec_gotplt = (dyn->sec_got_plt != LINK_SEC_NONE)
- ? &img->sections[dyn->sec_got_plt - 1]
- : NULL;
- const LinkSegment* dseg = &img->segments[sec_dynamic->segment_id - 1];
- u8* dyn_bytes_at = img->segment_bytes[dseg->id - 1] +
- (size_t)(sec_dynamic->file_offset - dseg->file_offset);
-
- /* Build DT_* entries in order. Layout matches count_dynamic_entries. */
- u32 written = 0;
- u8* p = dyn_bytes_at;
-#define DT_PUT(TAG, VAL) \
- do { \
- wr_u64_le(p, (u64)(TAG)); \
- wr_u64_le(p + 8, (u64)(VAL)); \
- p += 16; \
- written++; \
- } while (0)
-
- /* DT_NEEDED entries — d_un.d_val is the offset of the soname
- * within .dynstr. The dynstr was built in layout_dyn with
- * dedup; look each soname up by name to compute its offset. */
- {
- u32 ni;
- for (ni = 0; ni < dyn->nneeded; ++ni) {
- Sym soname = dyn->needed[ni];
- Slice nm_s = pool_slice(c->global, soname);
- const char* nm = nm_s.s;
- size_t namelen = nm_s.len;
- /* Linear search dynstr for this name. */
- u32 off = 0;
- if (nm && namelen) {
- u32 si;
- for (si = 0; si + namelen < dyn->dynstr_len; ++si) {
- if (dyn->dynstr[si + namelen] == 0 &&
- memcmp(dyn->dynstr + si, nm, namelen) == 0) {
- off = si;
- break;
- }
- }
- /* Should always be present — collect_needed populated dynstr
- * via build_dynsym? Actually build_dynsym only added import
- * names. We need to also add NEEDED sonames. */
- if (off == 0) {
- /* Fallback: append to dynstr. Phase 4 layout_dyn pre-sized
- * .dynstr exactly to its current content; appending here
- * would overflow the section. Instead, panic with a clear
- * message — the soname was supposed to be added during
- * layout. */
- compiler_panic(c, no_loc(),
- "link_emit_elf: DT_NEEDED soname missing from "
- ".dynstr");
- }
- }
- DT_PUT(DT_NEEDED, off);
- }
- }
-
- DT_PUT(DT_STRTAB, img_base + sec_dynstr->vaddr);
- DT_PUT(DT_STRSZ, sec_dynstr->size);
- DT_PUT(DT_SYMTAB, img_base + sec_dynsym->vaddr);
- DT_PUT(DT_SYMENT, 24);
- DT_PUT(DT_GNU_HASH, img_base + sec_gnuhash->vaddr);
- /* DT_PLT* / DT_JMPREL only make sense when there's a PLT. Emitting
- * them with size=0 / vaddr=0 (or pointing past the end of any
- * PT_LOAD) trips llvm-readelf's "address not in any segment" check
- * and confuses some loaders' DT walk. */
- if (dyn->nrela_plt) {
- DT_PUT(DT_PLTGOT, sec_gotplt ? (img_base + sec_gotplt->vaddr) : 0);
- DT_PUT(DT_PLTRELSZ, sec_relaplt ? sec_relaplt->size : 0);
- DT_PUT(DT_PLTREL, DT_RELA);
- DT_PUT(DT_JMPREL, sec_relaplt ? (img_base + sec_relaplt->vaddr) : 0);
- }
- if (dyn->cap_rela_dyn) {
- DT_PUT(DT_RELA, img_base + sec_reladyn->vaddr);
- DT_PUT(DT_RELASZ, sec_reladyn->size);
- DT_PUT(DT_RELAENT, 24);
- }
- DT_PUT(DT_FLAGS_1, DF_1_NOW);
- DT_PUT(DT_NULL, 0);
-#undef DT_PUT
-
- /* Pad any pre-allocated tail with DT_NULL. */
- while (written < dyn->ndyn_entries) {
- wr_u64_le(p, 0);
- wr_u64_le(p + 8, 0);
- p += 16;
- written++;
- }
-
- /* Re-serialize .rela.dyn body. GLOB_DAT records (imports against
- * .got slots) and RELATIVE records (PIE internal abs64 fixups)
- * are both populated during apply_all_relocs; .rela.dyn was empty
- * coming out of layout_dyn. Trailing capacity stays zero —
- * readers stop at the first R_AARCH64_NONE record. */
- {
- const LinkSegment* rdseg = &img->segments[sec_reladyn->segment_id - 1];
- u8* rd_bytes = img->segment_bytes[rdseg->id - 1] +
- (size_t)(sec_reladyn->file_offset - rdseg->file_offset);
- u32 i;
- for (i = 0; i < dyn->nrela_dyn; ++i) {
- const DynRela* rr = &dyn->rela_dyn[i];
- u8* rp = rd_bytes + (u64)i * ELF64_RELA_SIZE;
- wr_u64_le(rp + 0, rr->r_offset);
- wr_u64_le(rp + 8, rr->r_info);
- wr_u64_le(rp + 16, (u64)rr->r_addend);
- }
- }
-
- /* Re-serialize .rela.plt body. JUMP_SLOT records were written by
- * layout_dyn at pre-shift vaddrs; shift_image_addresses bumped
- * dyn->rela_plt[i].r_offset along with the rest, so the post-shift
- * values match the .got.plt slot vaddrs the loader will patch. */
- if (sec_relaplt && dyn->nrela_plt) {
- const LinkSegment* rpseg = &img->segments[sec_relaplt->segment_id - 1];
- u8* rp_bytes = img->segment_bytes[rpseg->id - 1] +
- (size_t)(sec_relaplt->file_offset - rpseg->file_offset);
- u32 i;
- for (i = 0; i < dyn->nrela_plt; ++i) {
- const DynRela* rr = &dyn->rela_plt[i];
- u8* rp = rp_bytes + (u64)i * ELF64_RELA_SIZE;
- wr_u64_le(rp + 0, rr->r_offset);
- wr_u64_le(rp + 8, rr->r_info);
- wr_u64_le(rp + 16, (u64)rr->r_addend);
- }
- }
-
- /* Re-write .got.plt[0] = &.dynamic with the post-shift vaddr.
- * layout_dyn wrote the pre-shift value into the segment bytes;
- * shift_image_addresses bumped dyn->dynamic_vaddr so we can refill
- * the slot here. Slots 1 and 2 (link_map cookie,
- * _dl_runtime_resolve) are loader-owned for lazy binding; under
- * DF_1_NOW they're never read so leaving them zero is fine. */
- if (sec_gotplt && dyn->dynamic_vaddr) {
- const LinkSegment* gpseg = &img->segments[sec_gotplt->segment_id - 1];
- u8* gp_bytes = img->segment_bytes[gpseg->id - 1] +
- (size_t)(sec_gotplt->file_offset - gpseg->file_offset);
- wr_u64_le(gp_bytes, dyn->dynamic_vaddr);
- }
- }
-
- /* ---- compute build-id (post-reloc, deterministic) ----
- *
- * Format-agnostic — Mach-O LC_UUID will hash the same bytes. */
- u8 build_id[BUILD_ID_DESC_LEN];
- link_image_id_compute(img, build_id);
-
- /* ---- plan section headers covering loaded segments ----
- *
- * Worst case: 1 file shdr per segment + 1 .bss shdr if RW has a tail.
- * shdr indices: 0=NULL, 1..nsegshdr=these, then build-id/symtab/...
- */
- /* Walk img->sections sorted by (segment_id, vaddr) and merge into
- * one OutShdr per (segment_id, name) run. layout already places
- * same-name sections adjacent within a segment, so a stable
- * by-vaddr sort followed by run-length grouping captures it. */
- OutShdr* outshdrs;
- u32 noutshdr = 0;
- u32 outshdr_cap = img->nsections + 1u;
- outshdrs = (OutShdr*)heap->alloc(heap, sizeof(*outshdrs) * outshdr_cap,
- _Alignof(OutShdr));
- if (!outshdrs) compiler_panic(c, no_loc(), "link_emit_elf: oom on outshdrs");
- memset(outshdrs, 0, sizeof(*outshdrs) * outshdr_cap);
- {
- /* Build a sort index over LinkSection ids by (segment_id, vaddr). */
- u32* order = (u32*)heap->alloc(heap, sizeof(u32) * (img->nsections + 1u),
- _Alignof(u32));
- if (!order && img->nsections)
- compiler_panic(c, no_loc(), "link_emit_elf: oom on shdr sort");
- u32 i, j;
- for (i = 0; i < img->nsections; ++i) order[i] = i;
- /* Insertion sort — section count is small. */
- for (i = 1; i < img->nsections; ++i) {
- u32 cur = order[i];
- const LinkSection* a = &img->sections[cur];
- j = i;
- while (j > 0) {
- const LinkSection* b = &img->sections[order[j - 1]];
- if ((b->segment_id < a->segment_id) ||
- (b->segment_id == a->segment_id && b->vaddr <= a->vaddr))
- break;
- order[j] = order[j - 1];
- --j;
- }
- order[j] = cur;
- }
- for (i = 0; i < img->nsections; ++i) {
- const LinkSection* ls = &img->sections[order[i]];
- OutShdr* tail = noutshdr ? &outshdrs[noutshdr - 1] : NULL;
- int merge = tail && tail->segment_id == ls->segment_id &&
- tail->name == ls->name &&
- tail->is_nobits == (ls->sem == SSEM_NOBITS);
- if (merge) {
- u64 end = ls->vaddr + ls->size;
- u64 prev_end = tail->vaddr + tail->size;
- if (end > prev_end) tail->size = end - tail->vaddr;
- if (ls->align > tail->align) tail->align = ls->align;
- } else {
- OutShdr* o = &outshdrs[noutshdr];
- o->shdr_idx = 1u + noutshdr;
- o->segment_id = ls->segment_id;
- o->name = ls->name;
- o->sem = ls->sem;
- o->flags = ls->flags;
- o->align = ls->align;
- o->vaddr = ls->vaddr;
- o->file_offset = ls->file_offset;
- o->size = ls->size;
- o->is_nobits = (ls->sem == SSEM_NOBITS);
- noutshdr++;
- }
- }
- heap->free(heap, order, sizeof(u32) * (img->nsections + 1u));
- }
-
- /* ---- build .shstrtab ---- */
- StrBuilder shstrtab;
- strb_init(&shstrtab, heap, 128);
- u32 sh_name_symtab = strb_add_cstr(&shstrtab, ".symtab");
- u32 sh_name_strtab = strb_add_cstr(&shstrtab, ".strtab");
- u32 sh_name_shstrtab = strb_add_cstr(&shstrtab, ".shstrtab");
- u32 sh_name_buildid = strb_add_cstr(&shstrtab, ".note.gnu.build-id");
- /* Per-output-shdr names — interned strings from input section names. */
- u32* outshdr_name_off =
- (u32*)heap->alloc(heap, sizeof(u32) * (noutshdr + 1u), _Alignof(u32));
- if (!outshdr_name_off && noutshdr)
- compiler_panic(c, no_loc(), "link_emit_elf: oom on shdr name table");
- {
- u32 i;
- for (i = 0; i < noutshdr; ++i) {
- const OutShdr* o = &outshdrs[i];
- if (o->name) {
- Slice nm_s = pool_slice(c->global, o->name);
- const char* nm = nm_s.s;
- size_t nlen = nm_s.len;
- outshdr_name_off[i] =
- nm && nlen ? strb_add(&shstrtab, nm, (u32)nlen) : 0;
- } else {
- outshdr_name_off[i] = 0;
- }
- }
- }
-
- u32 nshdr = 1u + noutshdr + 4u;
- u32 shndx_buildid = 1u + noutshdr;
- u32 shndx_symtab = shndx_buildid + 1u;
- u32 shndx_strtab = shndx_symtab + 1u;
- u32 shndx_shstrtab = shndx_strtab + 1u;
-
- /* ---- build .symtab + .strtab ----
- *
- * Two passes (locals first, then globals/weaks). Slot 0 is
- * STN_UNDEF. Globals are deduped via img->globals — only the
- * canonical entry per name is emitted, since per-input undef
- * records keep their own LinkSymId after resolve_undefs's
- * "copy fields from canonical def" step. sh_info = first non-local
- * idx. */
- StrBuilder strtab;
- strb_init(&strtab, heap, 256);
-
- SymRec* recs = (SymRec*)heap->alloc(
- heap, sizeof(*recs) * (LinkSyms_count(&img->syms) + 1u),
- _Alignof(SymRec));
- if (!recs) compiler_panic(c, no_loc(), "link_emit_elf: oom on symrecs");
- u32 nsyms_emit = 0;
- u32 first_global_idx;
- memset(&recs[nsyms_emit++], 0, sizeof(*recs)); /* slot 0 */
- first_global_idx = nsyms_emit;
-
- {
- u32 pass, i;
- for (pass = 0; pass < 2; ++pass) {
- int want_local = (pass == 0);
- if (!want_local) first_global_idx = nsyms_emit;
- for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
- const LinkSymbol* s = LinkSyms_at(&img->syms, i);
- int is_local = (s->bind == SB_LOCAL);
- size_t namelen = 0;
- const char* nm;
- u8 st_type, st_bind;
- u16 shndx;
- u64 st_value;
- SymRec* r;
- if (want_local != is_local) continue;
- if (s->name == 0 && s->kind != SK_FILE) continue;
- /* Dedupe globals: per-input undef-of-X and the canonical
- * def-of-X are separate img->syms entries (resolve_undefs
- * mirrors fields onto the undef). Only the canonical
- * (first registered) entry is in img->globals. Skip the
- * shadow copies. */
- if (!is_local && s->name) {
- LinkSymId canonical = symhash_get(&img->globals, s->name);
- if (canonical != LINK_SYM_NONE && canonical != s->id) continue;
- }
- {
- Slice nm_s = s->name ? pool_slice(c->global, s->name) : SLICE_NULL;
- nm = nm_s.s ? nm_s.s : "";
- namelen = nm_s.len;
- }
- shndx = sym_shndx_for(s, outshdrs, noutshdr);
- /* st_value: in ET_EXEC, defined non-ABS symbols carry
- * absolute virtual addresses (IMAGE_BASE + image
- * vaddr); ABS symbols carry their own value verbatim. */
- if (s->kind == SK_FILE)
- st_value = 0;
- else if (s->kind == SK_ABS)
- st_value = s->vaddr;
- else if (s->defined)
- st_value = img_base + s->vaddr;
- else
- st_value = 0;
- st_type = sym_kind_to_st_type(s->kind);
- st_bind = sym_bind_to_st_bind(s->bind);
- r = &recs[nsyms_emit++];
- memset(r, 0, sizeof(*r));
- r->st_name = (nm && namelen) ? strb_add(&strtab, nm, (u32)namelen) : 0;
- r->st_info = ELF64_ST_INFO(st_bind, st_type);
- r->st_other = STV_DEFAULT;
- r->st_shndx = shndx;
- r->st_value = st_value;
- r->st_size = s->size;
- }
- }
- }
-
- /* ---- compute file offsets for trailing non-alloc sections ---- */
- /* End of segment data: the highest (file_offset + file_size) across
- * loaded segments. */
- u64 end_of_segs = headers_load;
- {
- u32 i;
- for (i = 0; i < img->nsegments; ++i) {
- const LinkSegment* seg = &img->segments[i];
- u64 e = seg->file_offset + seg->file_size;
- if (e > end_of_segs) end_of_segs = e;
- }
- }
- u64 symtab_off = ALIGN_UP(end_of_segs, (u64)8u);
- u64 symtab_size = (u64)ELF64_SYM_SIZE * nsyms_emit;
- u64 strtab_off = symtab_off + symtab_size;
- u64 strtab_size = strtab.len;
- u64 shstrtab_off = strtab_off + strtab_size;
- u64 shstrtab_size = shstrtab.len;
- u64 shdr_off = ALIGN_UP(shstrtab_off + shstrtab_size, (u64)8u);
-
- /* ---- build phdrs ---- */
- Phdr64* phdrs = (Phdr64*)heap->alloc(heap, sizeof(Phdr64) * nphdr_total,
- _Alignof(Phdr64));
- if (!phdrs) compiler_panic(c, no_loc(), "link_emit_elf: oom on phdrs");
- memset(phdrs, 0, sizeof(Phdr64) * nphdr_total);
- {
- u32 pi = 0;
- /* PT_PHDR points at the phdr table itself within the headers
- * PT_LOAD. Required by the runtime loader for ET_DYN to know
- * where its own program headers live. Must appear before the
- * first PT_LOAD on dynamic exes (musl checks). */
- if (pie) {
- phdrs[pi].p_type = PT_PHDR;
- phdrs[pi].p_flags = PF_R;
- phdrs[pi].p_offset = sizeof(Ehdr64);
- phdrs[pi].p_vaddr = img_base + sizeof(Ehdr64);
- phdrs[pi].p_paddr = phdrs[pi].p_vaddr;
- phdrs[pi].p_filesz = (u64)nphdr_total * sizeof(Phdr64);
- phdrs[pi].p_memsz = phdrs[pi].p_filesz;
- phdrs[pi].p_align = 8;
- pi++;
- }
- /* Headers PT_LOAD (covers ehdr + phdrs + build-id note).
- * Scripted images don't emit one — see plan note above. */
- if (!scripted) {
- phdrs[pi].p_type = PT_LOAD;
- phdrs[pi].p_flags = PF_R;
- phdrs[pi].p_offset = 0;
- phdrs[pi].p_vaddr = img_base;
- phdrs[pi].p_paddr = img_base;
- phdrs[pi].p_filesz = headers_size;
- phdrs[pi].p_memsz = headers_size;
- phdrs[pi].p_align = PAGE_SIZE;
- pi++;
- }
- /* Per-segment PT_LOAD. */
- u32 i;
- for (i = 0; i < img->nsegments; ++i) {
- const LinkSegment* seg = &img->segments[i];
- Phdr64* p = &phdrs[pi++];
- p->p_type = PT_LOAD;
- p->p_flags = perms_to_pflags(seg->flags);
- p->p_offset = seg->file_offset;
- p->p_vaddr = img_base + seg->vaddr; /* post-shift */
- p->p_paddr = p->p_vaddr;
- p->p_filesz = seg->file_size;
- /* TLS .tbss is per-thread template space, not a loadable bss
- * region — PT_TLS already records the full memsz (incl. .tbss)
- * for the loader's per-thread allocation, so the matching
- * PT_LOAD must not extend memsz past filesz. qemu-riscv64
- * rejects PT_LOADs with memsz>filesz on non-writable mappings
- * ("PT_LOAD with non-writable bss"), and the SEG_TLS perms are
- * SF_ALLOC|SF_TLS only. */
- p->p_memsz = (seg->flags & SF_TLS) ? seg->file_size : seg->mem_size;
- p->p_align = seg->align ? seg->align : PAGE_SIZE;
- }
- /* PT_NOTE for build-id. Scripted images skip the build-id entirely. */
- if (!scripted) {
- phdrs[pi].p_type = PT_NOTE;
- phdrs[pi].p_flags = PF_R;
- phdrs[pi].p_offset = build_id_off;
- phdrs[pi].p_vaddr = build_id_addr;
- phdrs[pi].p_paddr = build_id_addr;
- phdrs[pi].p_filesz = BUILD_ID_NOTE_BYTES;
- phdrs[pi].p_memsz = BUILD_ID_NOTE_BYTES;
- phdrs[pi].p_align = 4;
- pi++;
- }
- /* PT_TLS describing the .tdata template + .tbss zero-fill.
- * vaddr/file_offset point at the same bytes the matching
- * PT_LOAD already covers — the loader uses PT_TLS to size
- * each thread's TLS block and to seed it from .tdata. */
- if (has_tls) {
- phdrs[pi].p_type = PT_TLS;
- phdrs[pi].p_flags = PF_R;
- phdrs[pi].p_offset = img->tls_vaddr;
- phdrs[pi].p_vaddr = img_base + img->tls_vaddr;
- phdrs[pi].p_paddr = phdrs[pi].p_vaddr;
- phdrs[pi].p_filesz = img->tls_filesz;
- phdrs[pi].p_memsz = img->tls_memsz;
- phdrs[pi].p_align = img->tls_align ? img->tls_align : 1u;
- pi++;
- }
- /* Dynamic phdrs. PT_INTERP and PT_DYNAMIC point at the matching
- * sections (which layout_dyn placed in the ro/rw_dyn segments).
- * PT_GNU_STACK marks the stack as non-executable (filesz=0). */
- if (pie && img->dyn) {
- LinkDynState* dyn = img->dyn;
- const LinkSection* sec_interp = &img->sections[dyn->sec_interp - 1];
- const LinkSection* sec_dynamic = &img->sections[dyn->sec_dynamic - 1];
- phdrs[pi].p_type = PT_INTERP;
- phdrs[pi].p_flags = PF_R;
- phdrs[pi].p_offset = sec_interp->file_offset;
- phdrs[pi].p_vaddr = img_base + sec_interp->vaddr;
- phdrs[pi].p_paddr = phdrs[pi].p_vaddr;
- phdrs[pi].p_filesz = sec_interp->size;
- phdrs[pi].p_memsz = sec_interp->size;
- phdrs[pi].p_align = 1;
- pi++;
- phdrs[pi].p_type = PT_DYNAMIC;
- phdrs[pi].p_flags = PF_R | PF_W;
- phdrs[pi].p_offset = sec_dynamic->file_offset;
- phdrs[pi].p_vaddr = img_base + sec_dynamic->vaddr;
- phdrs[pi].p_paddr = phdrs[pi].p_vaddr;
- phdrs[pi].p_filesz = sec_dynamic->size;
- phdrs[pi].p_memsz = sec_dynamic->size;
- phdrs[pi].p_align = 8;
- pi++;
- phdrs[pi].p_type = PT_GNU_STACK;
- phdrs[pi].p_flags = PF_R | PF_W;
- phdrs[pi].p_offset = 0;
- phdrs[pi].p_vaddr = 0;
- phdrs[pi].p_paddr = 0;
- phdrs[pi].p_filesz = 0;
- phdrs[pi].p_memsz = 0;
- phdrs[pi].p_align = 16;
- pi++;
- /* PT_GNU_RELRO would mark the read-only-after-relocation span
- * here. Phase 6 leaves it out — it's an optimization the loader
- * can live without, and our ro_seg already lives in a PF_R
- * PT_LOAD that's never made writable. */
- } else if (pie) {
- /* dyn was nominally requested but layout_dyn early-out — no
- * imports and no DSO inputs. The image still needs a PT_GNU_STACK
- * for kernels that demand it; INTERP/DYNAMIC are skipped. */
- (void)0;
- }
- (void)pi;
- }
-
- /* ---- build ehdr ---- */
- Ehdr64 ehdr;
- memset(&ehdr, 0, sizeof(ehdr));
- ehdr.e_ident[0] = ELFMAG0;
- ehdr.e_ident[1] = ELFMAG1;
- ehdr.e_ident[2] = ELFMAG2;
- ehdr.e_ident[3] = ELFMAG3;
- ehdr.e_ident[4] = ELFCLASS64;
- ehdr.e_ident[5] = ELFDATA2LSB;
- ehdr.e_ident[6] = EV_CURRENT;
- ehdr.e_ident[7] = ELFOSABI_NONE;
- ehdr.e_type = pie ? ET_DYN : ET_EXEC;
- ehdr.e_machine = (u16)e_machine;
- ehdr.e_version = EV_CURRENT;
- ehdr.e_entry = img_base + LinkSyms_at(&img->syms, img->entry_sym - 1)->vaddr;
- ehdr.e_phoff = sizeof(Ehdr64);
- ehdr.e_shoff = shdr_off;
- ehdr.e_flags = 0;
- ehdr.e_ehsize = sizeof(Ehdr64);
- ehdr.e_phentsize = sizeof(Phdr64);
- ehdr.e_phnum = (u16)nphdr_total;
- ehdr.e_shentsize = sizeof(Shdr64);
- ehdr.e_shnum = (u16)nshdr;
- ehdr.e_shstrndx = (u16)shndx_shstrtab;
-
- /* ---- write ehdr, phdrs, build-id note, pad ---- */
- u64 cur_off;
- write_bytes(w, &ehdr, sizeof(ehdr));
- write_bytes(w, phdrs, sizeof(Phdr64) * nphdr_total);
- cur_off = sizeof(ehdr) + sizeof(Phdr64) * nphdr_total;
-
- /* .note.gnu.build-id wire format:
- * u32 namesz = 4 ("GNU\0")
- * u32 descsz = 16
- * u32 type = NT_GNU_BUILD_ID (3)
- * "GNU\0"
- * <16 bytes of build-id>
- *
- * Scripted images don't carry build-id; they have no PT_NOTE phdr to
- * point at it and the file payload would just be dead bytes. */
- if (!scripted) {
- u8 nh[12];
- u32 v;
- v = NOTE_NAME_GNU_LEN;
- nh[0] = (u8)v;
- nh[1] = (u8)(v >> 8);
- nh[2] = (u8)(v >> 16);
- nh[3] = (u8)(v >> 24);
- v = BUILD_ID_DESC_LEN;
- nh[4] = (u8)v;
- nh[5] = (u8)(v >> 8);
- nh[6] = (u8)(v >> 16);
- nh[7] = (u8)(v >> 24);
- v = NOTE_BUILD_ID_TYPE;
- nh[8] = (u8)v;
- nh[9] = (u8)(v >> 8);
- nh[10] = (u8)(v >> 16);
- nh[11] = (u8)(v >> 24);
- write_bytes(w, nh, sizeof nh);
- write_bytes(w, NOTE_NAME_GNU "\0", NOTE_NAME_GNU_LEN);
- write_bytes(w, build_id, BUILD_ID_DESC_LEN);
- cur_off += BUILD_ID_NOTE_BYTES;
- }
-
- /* Pad to first segment file_offset (== headers_load). */
- {
- u32 i;
- for (i = 0; i < img->nsegments; ++i) {
- const LinkSegment* seg = &img->segments[i];
- if (seg->file_size == 0) continue;
- if (cur_off < seg->file_offset) {
- write_zeroes(w, (size_t)(seg->file_offset - cur_off));
- cur_off = seg->file_offset;
- }
- write_bytes(w, img->segment_bytes[seg->id - 1], (size_t)seg->file_size);
- cur_off += seg->file_size;
- }
- }
-
- /* ---- write trailing non-alloc sections ---- */
- if (cur_off < symtab_off) {
- write_zeroes(w, (size_t)(symtab_off - cur_off));
- cur_off = symtab_off;
- }
- {
- u32 i;
- for (i = 0; i < nsyms_emit; ++i) write_sym_rec(w, &recs[i]);
- cur_off += symtab_size;
- }
- if (strtab.len) {
- write_bytes(w, strtab.data, strtab.len);
- cur_off += strtab.len;
- }
- if (shstrtab.len) {
- write_bytes(w, shstrtab.data, shstrtab.len);
- cur_off += shstrtab.len;
- }
-
- /* ---- write section header table ---- */
- if (cur_off < shdr_off) {
- write_zeroes(w, (size_t)(shdr_off - cur_off));
- cur_off = shdr_off;
- }
- {
- Shdr64 sh;
- u32 i;
- /* shdr 0: NULL */
- memset(&sh, 0, sizeof(sh));
- write_bytes(w, &sh, sizeof(sh));
- /* Locate dyn-section names (interned earlier in layout_dyn) so
- * we can override sh_type / sh_link / sh_info / sh_entsize for
- * .dynsym / .dynstr / .gnu.hash / .rela.dyn / .rela.plt /
- * .dynamic. The sh_link cross-references (e.g., .dynsym ->
- * .dynstr) need the matching shdr indices, which we look up by
- * comparing OutShdr.name to the same Sym values. */
- Sym n_dynsym = 0, n_dynstr = 0, n_gnuhash = 0;
- Sym n_reladyn = 0, n_relaplt = 0, n_dynamic = 0;
- Sym n_gotplt = 0;
- if (pie && img->dyn) {
- n_dynsym = pool_intern_slice(c->global, SLICE_LIT(".dynsym"));
- n_dynstr = pool_intern_slice(c->global, SLICE_LIT(".dynstr"));
- n_gnuhash = pool_intern_slice(c->global, SLICE_LIT(".gnu.hash"));
- n_reladyn = pool_intern_slice(c->global, SLICE_LIT(".rela.dyn"));
- n_relaplt = pool_intern_slice(c->global, SLICE_LIT(".rela.plt"));
- n_dynamic = pool_intern_slice(c->global, SLICE_LIT(".dynamic"));
- n_gotplt = pool_intern_slice(c->global, SLICE_LIT(".got.plt"));
- }
- /* Two-pass: first find dynsym/dynstr/gotplt indices for sh_link
- * fixups, then emit. */
- u32 idx_dynsym = 0, idx_dynstr = 0, idx_gotplt = 0;
- if (pie && img->dyn) {
- for (i = 0; i < noutshdr; ++i) {
- Sym nm = outshdrs[i].name;
- u32 ix = outshdrs[i].shdr_idx;
- if (nm == n_dynsym)
- idx_dynsym = ix;
- else if (nm == n_dynstr)
- idx_dynstr = ix;
- else if (nm == n_gotplt)
- idx_gotplt = ix;
- }
- }
- /* per-name output shdrs */
- for (i = 0; i < noutshdr; ++i) {
- const OutShdr* o = &outshdrs[i];
- memset(&sh, 0, sizeof(sh));
- sh.sh_name = outshdr_name_off[i];
- sh.sh_type = sec_sem_to_sht(o->sem);
- sh.sh_flags = sec_flags_to_shf(o->flags);
- sh.sh_addr = img_base + o->vaddr;
- sh.sh_offset = o->file_offset;
- sh.sh_size = o->size;
- sh.sh_link = 0;
- sh.sh_info = 0;
- sh.sh_addralign = o->align ? o->align : 1;
- sh.sh_entsize = (o->sem == SSEM_INIT_ARRAY || o->sem == SSEM_FINI_ARRAY ||
- o->sem == SSEM_PREINIT_ARRAY)
- ? 8
- : 0;
- /* Dyn-section overrides: sh_type / sh_link / sh_info / entsize. */
- if (pie && img->dyn) {
- if (o->name == n_dynsym) {
- sh.sh_type = SHT_DYNSYM;
- sh.sh_link = idx_dynstr;
- sh.sh_info = img->dyn->first_global;
- sh.sh_entsize = 24;
- } else if (o->name == n_dynstr) {
- sh.sh_type = SHT_STRTAB;
- } else if (o->name == n_gnuhash) {
- sh.sh_type = SHT_GNU_HASH;
- sh.sh_link = idx_dynsym;
- } else if (o->name == n_reladyn) {
- sh.sh_type = SHT_RELA;
- sh.sh_link = idx_dynsym;
- sh.sh_entsize = 24;
- } else if (o->name == n_relaplt) {
- sh.sh_type = SHT_RELA;
- sh.sh_link = idx_dynsym;
- sh.sh_info = idx_gotplt;
- sh.sh_entsize = 24;
- sh.sh_flags |= SHF_INFO_LINK;
- } else if (o->name == n_dynamic) {
- sh.sh_type = SHT_DYNAMIC;
- sh.sh_link = idx_dynstr;
- sh.sh_entsize = 16;
- } else if (o->name == n_gotplt) {
- sh.sh_entsize = 8;
- }
- }
- write_bytes(w, &sh, sizeof(sh));
- }
- /* shdr: .note.gnu.build-id (allocatable; in headers PT_LOAD) */
- memset(&sh, 0, sizeof(sh));
- sh.sh_name = sh_name_buildid;
- sh.sh_type = SHT_NOTE;
- sh.sh_flags = SHF_ALLOC;
- sh.sh_addr = build_id_addr;
- sh.sh_offset = build_id_off;
- sh.sh_size = BUILD_ID_NOTE_BYTES;
- sh.sh_addralign = 4;
- write_bytes(w, &sh, sizeof(sh));
- /* shdr: .symtab */
- memset(&sh, 0, sizeof(sh));
- sh.sh_name = sh_name_symtab;
- sh.sh_type = SHT_SYMTAB;
- sh.sh_flags = 0;
- sh.sh_addr = 0;
- sh.sh_offset = symtab_off;
- sh.sh_size = symtab_size;
- sh.sh_link = shndx_strtab;
- sh.sh_info = first_global_idx;
- sh.sh_addralign = 8;
- sh.sh_entsize = ELF64_SYM_SIZE;
- write_bytes(w, &sh, sizeof(sh));
- /* shdr: .strtab */
- memset(&sh, 0, sizeof(sh));
- sh.sh_name = sh_name_strtab;
- sh.sh_type = SHT_STRTAB;
- sh.sh_offset = strtab_off;
- sh.sh_size = strtab_size;
- sh.sh_addralign = 1;
- write_bytes(w, &sh, sizeof(sh));
- /* shdr: .shstrtab */
- memset(&sh, 0, sizeof(sh));
- sh.sh_name = sh_name_shstrtab;
- sh.sh_type = SHT_STRTAB;
- sh.sh_offset = shstrtab_off;
- sh.sh_size = shstrtab_size;
- sh.sh_addralign = 1;
- write_bytes(w, &sh, sizeof(sh));
- }
-
- heap->free(heap, phdrs, sizeof(Phdr64) * nphdr_total);
- heap->free(heap, recs, sizeof(*recs) * (LinkSyms_count(&img->syms) + 1u));
- heap->free(heap, outshdrs, sizeof(*outshdrs) * outshdr_cap);
- if (outshdr_name_off)
- heap->free(heap, outshdr_name_off, sizeof(u32) * (noutshdr + 1u));
- strb_fini(&strtab);
- strb_fini(&shstrtab);
-}
diff --git a/src/link/link_internal.h b/src/link/link_internal.h
@@ -234,14 +234,6 @@ void link_capture_debug_inputs(struct Linker*, LinkImage*);
* vaddr, before resolve_undefs runs. */
#define LINK_PE_IMAGE_BASE 0x140000000ULL
-/* Defined in link_dyn.c. Phase 4: synthesize .interp/.dynsym/.dynstr/
- * .gnu.hash/.rela.dyn/.rela.plt/.plt/.got.plt/.dynamic when the link
- * is producing a PIE / ET_DYN exe. No-op when there are zero imports
- * AND no DSO inputs (in PIE-with-no-imports we still need PT_INTERP
- * and a minimal .dynamic). */
-void layout_dyn(struct Linker*, LinkImage*);
-void link_dyn_state_free(LinkImage*);
-
/* Define / upsert a synthetic global symbol resolved to `vaddr`.
* Satisfies any prior undef ref (e.g. _DYNAMIC from Scrt1.o,
* __dso_handle from libc_nonshared.a) and fans out across per-input
diff --git a/src/link/link_jit.c b/src/link/link_jit.c
@@ -119,15 +119,14 @@ struct CfreeJit {
* RISC-V psABI normally points TP at the start of the TLS image, but
* cfree's freestanding start.c (and the JIT harness) places a 16-byte
* TCB ahead of .tdata and biases TP accordingly so a single TPREL
- * convention works for both arches. Mirrors src/link/link_elf.c's
+ * convention works for both arches. Mirrors src/obj/elf/link.c's
* TLS_TCB_SIZE comment. */
#define JIT_TLS_TCB_SIZE 16ull
static int reloc_is_tlsle(RelocKind k) {
return k == R_AARCH64_TLSLE_ADD_TPREL_HI12 ||
- k == R_AARCH64_TLSLE_ADD_TPREL_LO12_NC ||
- k == R_RV_TPREL_HI20 || k == R_RV_TPREL_LO12_I ||
- k == R_RV_TPREL_LO12_S;
+ k == R_AARCH64_TLSLE_ADD_TPREL_LO12_NC || k == R_RV_TPREL_HI20 ||
+ k == R_RV_TPREL_LO12_I || k == R_RV_TPREL_LO12_S;
}
/* RISC-V PCREL_LO12_I/S target a local "anchor" symbol whose vaddr is
@@ -501,8 +500,7 @@ CfreeJit* cfree_jit_from_image(LinkImage* img) {
* vaddrs are image-relative, so the runtime alias drops
* out and we work in image-space. */
S = (tgt->vaddr - img->tls_vaddr) + JIT_TLS_TCB_SIZE;
- } else if (r->kind == R_RV_PCREL_LO12_I ||
- r->kind == R_RV_PCREL_LO12_S) {
+ } else if (r->kind == R_RV_PCREL_LO12_I || r->kind == R_RV_PCREL_LO12_S) {
/* RISC-V PCREL_LO12: target.vaddr is the paired AUIPC site
* (a local anchor symbol). Recompute the AUIPC's runtime
* displacement and feed it as S to the LO12_I/S apply path so
@@ -682,7 +680,8 @@ CfreeJit* cfree_jit_from_image(LinkImage* img) {
/* Run .init_array constructors in forward order. */
{
typedef void (*VoidFn)(void);
- void* p_start = cfree_jit_lookup(jit, CFREE_SLICE_LIT("__init_array_start"));
+ void* p_start =
+ cfree_jit_lookup(jit, CFREE_SLICE_LIT("__init_array_start"));
void* p_end = cfree_jit_lookup(jit, CFREE_SLICE_LIT("__init_array_end"));
if (p_start && p_end) {
VoidFn* fn = (VoidFn*)p_start;
@@ -750,7 +749,8 @@ void* cfree_jit_lookup(CfreeJit* jit, CfreeSlice name) {
if (!jit || !name.s) return NULL;
/* C-symbol mangling lives in obj_format_c_mangle so JIT lookups by
* source-level name find the symbol regardless of target format.
- * name.s is NUL-terminated (CFREE_SLICE_LIT / cfree_slice_cstr / interned). */
+ * name.s is NUL-terminated (CFREE_SLICE_LIT / cfree_slice_cstr / interned).
+ */
sym = obj_format_c_mangle(jit->c, name.s);
id = symhash_get(&jit->image->globals, sym);
if (id == LINK_SYM_NONE) return NULL;
@@ -1569,7 +1569,8 @@ static void jit_view_copy_debug_section(CfreeJit* jit, u32 ii,
const char* tnm = tnm_s.s;
size_t tnlen = tnm_s.len;
if (tnm) {
- Sym v_tn = pool_intern_slice(view_pool, (Slice){ .s = tnm, .len = tnlen });
+ Sym v_tn =
+ pool_intern_slice(view_pool, (Slice){.s = tnm, .len = tnlen});
ViewSec* tgt = view_sec_find(tab, ntab, v_tn);
if (tgt) {
S = (u64)tgt->snap;
@@ -1667,7 +1668,7 @@ static CfreeObjFile* jit_view_build(CfreeJit* jit) {
}
if (!nm || !jit_view_is_debug_name(nm)) continue;
v_nm = pool_intern_slice(obj_compiler(view_ob)->global,
- (Slice){ .s = nm, .len = nlen });
+ (Slice){.s = nm, .len = nlen});
vs = view_sec_find(tab, ntab, v_nm);
if (!vs) continue;
jit_view_copy_debug_section(jit, ii, (ObjSecId)(k + 1), view_ob, tab,
diff --git a/src/link/link_layout.c b/src/link/link_layout.c
@@ -24,6 +24,7 @@
#include "link/link.h"
#include "link/link_arch.h"
#include "link/link_internal.h"
+#include "obj/format.h"
LinkImage* link_image_alloc(Compiler*); /* defined in link.c */
@@ -38,8 +39,7 @@ static SrcLoc no_loc(void) {
* loader. A future cross-link with mismatched host/target page sizes
* will need a target-derived value here instead. */
u64 link_layout_page_size(Linker* l) {
- const CfreeExecMem* m =
- (l && l->jit_host) ? l->jit_host->execmem : NULL;
+ const CfreeExecMem* m = (l && l->jit_host) ? l->jit_host->execmem : NULL;
if (m && m->page_size) return (u64)m->page_size;
return 0x4000u;
}
@@ -635,8 +635,7 @@ static void link_layout_sections_scripted(Linker* l, LinkImage* img,
u32 nseg_max = 0;
for (si = 0; si < script->nsections; ++si)
- if (!slice_eq_cstr(script->sections[si].name, "/DISCARD/"))
- ++nseg_max;
+ if (!slice_eq_cstr(script->sections[si].name, "/DISCARD/")) ++nseg_max;
img->segments =
nseg_max ? (LinkSegment*)h->alloc(h, sizeof(*img->segments) * nseg_max,
_Alignof(LinkSegment))
@@ -1064,13 +1063,12 @@ LinkImage* link_resolve(Linker* l) {
if (got_map) h->free(h, got_map, sizeof(*got_map) * map_size);
if (stub_map) h->free(h, stub_map, sizeof(*stub_map) * map_size);
}
- /* layout_dyn synthesizes ELF-specific .interp / .dynsym / .dynstr /
- * .rela.dyn / .plt / .got.plt / .dynamic sections. Mach-O has its
- * own equivalent path; COFF binds imports through .idata + IAT
- * (Phase 3.2). Skip for non-ELF formats. */
- metrics_scope_begin(l->c, "link.layout_dyn");
- if (l->c->target.obj == CFREE_OBJ_ELF) layout_dyn(l, img);
- metrics_scope_end(l->c, "link.layout_dyn");
+ {
+ const ObjFormatImpl* fmt = obj_format_lookup(l->c->target.obj);
+ metrics_scope_begin(l->c, "link.layout_dyn");
+ if (fmt && fmt->layout_dyn) fmt->layout_dyn(l, img);
+ metrics_scope_end(l->c, "link.layout_dyn");
+ }
metrics_scope_begin(l->c, "link.resolve_entry");
link_resolve_entry(l, img);
metrics_scope_end(l->c, "link.resolve_entry");
diff --git a/src/link/link_macho.c b/src/link/link_macho.c
@@ -1,2603 +0,0 @@
-/* link_emit_macho — write a dyld-loadable arm64 MH_EXECUTE.
- *
- * Mach-O peer of link_emit_elf. Produces a position-independent
- * MH_EXECUTE that links against libSystem.B.dylib (or any other
- * dylib/.tbd input) via LC_LOAD_DYLIB + LC_DYLD_CHAINED_FIXUPS. The
- * binary is ad-hoc codesigned at the tail so the kernel will exec it
- * on macOS 11+.
- *
- * Layout (Apple's stock arm64 layout):
- *
- * __PAGEZERO vmaddr 0, vmsize 0x100000000, no file bytes
- * __TEXT (R-X)
- * mach_header_64
- * load commands
- * [SF_EXEC sections — .text]
- * [SF_ALLOC R-only sections — .rodata, init/fini_array, etc.]
- * __stubs (12B per import-func)
- * __DATA_CONST (RW initially, dyld marks R-only after fixups)
- * __got (8B per import — both data and func imports)
- * __DATA (R-W)
- * [SF_WRITE sections — .data, .bss]
- * __LINKEDIT (R)
- * dyld_chained_fixups blob
- * dyld_exports_trie blob
- * function starts (empty)
- * data in code (empty)
- * symtab
- * indirect symbol table (one entry per __stubs and __got slot)
- * strtab
- * code signature
- *
- * Imports are routed:
- * CALL26/JUMP26 against an imported function -> __stubs entry
- * GOT_LOAD_PAGE21/PAGEOFF12 against any import -> __got slot
- * ABS64 against an imported symbol -> chained-bind at site
- * ABS64 against a defined internal symbol -> chained-rebase at site
- *
- * arm64-only. x86_64-macos arrives with x64 codegen. */
-
-#include <string.h>
-
-#include "core/bytes.h"
-#include "core/heap.h"
-#include "core/pool.h"
-#include "core/sha256.h"
-#include "core/slice.h"
-#include "core/util.h"
-#include "core/vec.h"
-#include "link/link.h"
-#include "link/link_arch.h"
-#include "link/link_internal.h"
-#include "obj/macho.h"
-
-static SrcLoc no_loc(void) {
- SrcLoc l = {0, 0, 0};
- return l;
-}
-
-/* ---- constants ---- */
-#define MZ_PAGEZERO 0x100000000ULL
-#define MZ_PAGE 0x4000ULL
-#define MZ_GOT_SIZE 8u
-/* __DATA,__thread_ptrs slot size — one pointer per unique TLV referenced
- * via TLVP_LOAD_PAGE21/PAGEOFF12. Each slot holds the address of the
- * matching TLV descriptor in __DATA,__thread_vars. */
-#define MZ_TLVP_SIZE 8u
-
-#define DYLD_CHAINED_PTR_64 2u
-#define DYLD_CHAINED_IMPORT 1u
-
-#define VM_PROT_READ 0x1u
-#define VM_PROT_WRITE 0x2u
-#define VM_PROT_EXECUTE 0x4u
-
-#define CS_MAGIC_EMBEDDED_SIGNATURE 0xfade0cc0u
-#define CS_MAGIC_CODEDIRECTORY 0xfade0c02u
-#define CSSLOT_CODEDIRECTORY 0u
-#define CS_HASHTYPE_SHA256 2u
-#define CS_SHA256_LEN SHA256_DIGEST_LEN
-#define CS_PAGE_SIZE_LOG2 12u
-#define CS_EXECSEG_MAIN_BINARY 1u
-
-/* extra LC ids */
-#define LC_DYLD_INFO_ONLY (0x22u | 0x80000000u)
-#define LC_FUNCTION_STARTS_C 0x26u
-#define LC_DATA_IN_CODE_C 0x29u
-#define LC_CODE_SIGNATURE_C 0x1du
-
-/* ---- byte buffer ---- */
-
-typedef struct MByte {
- Heap* heap;
- u8* data;
- u32 len;
- u32 cap;
-} MByte;
-
-static void mbuf_init(MByte* b, Heap* h) {
- b->heap = h;
- b->data = NULL;
- b->len = 0;
- b->cap = 0;
-}
-static void mbuf_fini(MByte* b) {
- if (b->data) b->heap->free(b->heap, b->data, b->cap);
- b->data = NULL;
- b->cap = b->len = 0;
-}
-static void mbuf_reserve(MByte* b, u32 need) {
- if (need <= b->cap) return;
- (void)VEC_GROW(b->heap, b->data, b->cap, need);
-}
-static u32 mbuf_align(MByte* b, u32 a) {
- u32 n = (u32)ALIGN_UP((u64)b->len, (u64)a);
- if (n > b->len) {
- mbuf_reserve(b, n);
- memset(b->data + b->len, 0, n - b->len);
- b->len = n;
- }
- return b->len;
-}
-static u32 mbuf_append(MByte* b, const void* src, u32 n) {
- u32 off = b->len;
- mbuf_reserve(b, b->len + n);
- if (n) memcpy(b->data + b->len, src, n);
- b->len += n;
- return off;
-}
-static u32 mbuf_u32(MByte* b, u32 v) {
- u8 t[4];
- wr_u32_le(t, v);
- return mbuf_append(b, t, 4);
-}
-static u32 mbuf_u16(MByte* b, u16 v) {
- u8 t[2];
- wr_u16_le(t, v);
- return mbuf_append(b, t, 2);
-}
-static u32 mbuf_u64(MByte* b, u64 v) {
- u8 t[8];
- wr_u64_le(t, v);
- return mbuf_append(b, t, 8);
-}
-static u32 mbuf_u8(MByte* b, u8 v) { return mbuf_append(b, &v, 1); }
-static u32 mbuf_str(MByte* b, const char* s, u32 n) {
- u32 off = b->len;
- mbuf_reserve(b, b->len + n + 1u);
- if (n) memcpy(b->data + b->len, s, n);
- b->data[b->len + n] = 0;
- b->len += n + 1u;
- return off;
-}
-
-/* ---- imports + dylibs ---- */
-
-typedef struct MachImp {
- LinkSymId sym;
- Sym name;
- u32 dylib_ord; /* 1-based ordinal into LC_LOAD_DYLIB list */
- u32 stub_idx; /* 1-based index into __stubs (0 if data import) */
- u32 got_idx; /* 1-based index into __got */
- u32 imports_strx; /* offset into chained-fixups symbol pool */
- u8 is_func;
- u8 weak;
- /* internal=1 means this entry is an in-image symbol that's referenced
- * via GOT_LOAD_PAGE21 / LD64_GOT_LO12_NC (clang emits these for any
- * extern global so a single static-link can later become PIC). The
- * GOT slot stores the symbol's image-relative vaddr and gets a
- * chained-fixup rebase entry (or no entry at all for a weak-undef
- * resolving to NULL). No dylib_ord / stub_idx / chained-fixup bind. */
- u8 internal;
- u8 pad[1];
- u64 internal_vaddr; /* image-relative target vaddr; meaningful only when
- internal=1 */
-} MachImp;
-
-typedef struct MachDylib {
- Sym install;
-} MachDylib;
-
-/* One slot in the synthetic __DATA,__thread_ptrs section per unique TLV
- * descriptor referenced via TLVP_LOAD_PAGE21/PAGEOFF12. Modeled after
- * MachImp's internal-GOT entries: the slot holds the descriptor address
- * (REBASE for internal-to-image descriptors, BIND for dylib-imported
- * ones). The descriptor itself is laid out in __DATA,__thread_vars by
- * either the input objects (internal) or the providing dylib (imported). */
-typedef struct MachTlv {
- LinkSymId sym; /* canonical descriptor LinkSymId */
- u32 tlv_idx; /* 1-based slot index in __thread_ptrs */
- u8 imported; /* 1 == descriptor lives in a dylib (BIND), 0 == internal
- (REBASE) */
- u8 pad[3];
- u32 import_idx; /* 1-based MachImp index when imported (for chained-bind
- ordinal) */
-} MachTlv;
-
-/* ---- planned section ---- */
-
-typedef struct MSec {
- /* Source: either a LinkSection (link_sec_id != 0) or a synthetic
- * pre-built byte buffer (data + size). */
- LinkSectionId link_sec_id;
- const u8* synth_data;
- u32 synth_size;
- /* Mach-O placement */
- const char* segname;
- const char* sectname;
- /* Inline storage for segname/sectname when split from a Mach-O
- * `__SEG,__sect`-form LinkSection name. Names from string literals
- * (synth sections, derived-from-flags defaults) point at .rodata
- * and don't use these. 16 bytes matches the on-disk field width. */
- char segname_buf[16];
- char sectname_buf[16];
- u64 vaddr;
- u64 file_offset;
- u64 size;
- u32 align;
- u32 flags; /* S_TYPE | S_ATTR_* */
- u32 reserved1;
- u32 reserved2;
- u8 segidx; /* 1=__TEXT, 2=__DATA_CONST, 3=__DATA */
- u8 is_zerofill;
- u8 pad[6];
-} MSec;
-
-static void msec_repair_name_ptrs(MSec* m) {
- if (m->segname_buf[0]) m->segname = m->segname_buf;
- if (m->sectname_buf[0]) m->sectname = m->sectname_buf;
-}
-
-typedef struct MSeg {
- const char* name;
- u32 maxprot;
- u32 initprot;
- u64 vmaddr;
- u64 vmsize;
- u64 fileoff;
- u64 filesize;
- u32 nsects; /* MSec count in segment — internal layout */
- u32 first_sec; /* first index into MSec[] */
- u32 nouts; /* OutSec count in segment — what hits the file */
- u32 first_out; /* first index into OutSec[] */
-} MSeg;
-
-/* On-disk section view: one record per (segname, sectname) within a
- * segment. Mach-O requires this — emitting one section_64 per input
- * MSec yields sibling __TEXT,__text records that violate the spec.
- * Built from MSec[] after vaddr placement; reloc-apply still uses
- * MSec[] for byte-buffer addressing. */
-typedef struct OutSec {
- const char* segname;
- const char* sectname;
- u64 vaddr;
- u64 file_offset;
- u64 size;
- u32 align;
- u32 flags;
- u32 reserved1;
- u32 reserved2;
- u8 segidx;
- u8 is_zerofill;
-} OutSec;
-
-/* ---- main context ---- */
-
-typedef struct MCtx {
- LinkImage* img;
- Compiler* c;
- Heap* h;
- Writer* w;
- Linker* linker;
- const LinkArchDesc* arch;
-
- /* imports */
- MachImp* imports;
- u32 nimports;
- u32 nimports_real; /* count of imports with internal=0 (== prefix length;
- * collect_imports appends internal=1 entries last) */
- u32 nimport_funcs;
- MachDylib* dylibs;
- u32 ndylibs;
- /* sym->import index, 1-based, 0 = not an import. Sized to LinkSymId space
- * + 1. */
- u32* sym_to_imp;
- u32 sym_to_imp_size;
-
- /* sections + segments */
- MSec* secs;
- u32 nsecs;
- OutSec* outs;
- u32 nouts;
- MSeg segs[5]; /* PAGEZERO, TEXT, DATA_CONST, DATA, LINKEDIT */
- u32 nsegs;
-
- /* Synthetic byte buffers, owned. */
- u8* stubs_bytes;
- u32 stubs_size;
- u8* got_bytes;
- u32 got_size;
- /* TLV pointer slots — one entry in __DATA,__thread_ptrs per unique
- * descriptor referenced via TLVP_LOAD_PAGE21/PAGEOFF12. sym_to_tlv
- * maps LinkSymId → 1-based slot index (parallel to sym_to_imp). Slot
- * bytes are populated at apply_relocs time once shift_sections has
- * pinned descriptor vaddrs. */
- MachTlv* tlv_slots;
- u32 ntlv;
- u32* sym_to_tlv;
- u32 sym_to_tlv_size;
- u8* tlv_ptrs_bytes;
- u32 tlv_ptrs_size;
- u64 tlv_ptrs_vaddr;
- /* Vaddr of the first thread-local-storage section
- * (__thread_data / __thread_bss). Each TLV descriptor's word 2
- * stores the symbol's offset within this image rather than an
- * absolute address — see apply_relocs's S_THREAD_LOCAL_VARIABLES
- * ABS64 special case. */
- u64 tls_image_vaddr;
- u8 has_tls_image;
-
- /* Final layout (computed during plan) */
- u64 text_vaddr;
- u64 text_filesz;
- u64 stubs_vaddr;
- u64 got_vaddr;
- u64 data_const_vaddr;
- u64 data_vaddr;
- u64 data_const_filesz;
- u64 data_filesz;
- u64 data_memsz;
- u64 linkedit_vaddr;
- u64 linkedit_fileoff;
- u32 entry_offset; /* offset of entry within __TEXT segment */
-
- u64 headers_size; /* header + loadcmds */
-
- /* LINKEDIT contents */
- MByte chained_fixups;
- MByte exports_trie;
- MByte symtab; /* binary nlist_64 array */
- MByte strtab;
- MByte indirect; /* u32 array */
- MByte fn_starts;
- MByte data_in_code;
- MByte codesig;
-
- u32 chained_fixups_off;
- u32 exports_trie_off;
- u32 fn_starts_off;
- u32 data_in_code_off;
- u32 symtab_off;
- u32 indirect_off;
- u32 strtab_off;
- u32 codesig_off;
- u32 codesig_size;
- u32 nsyms;
-
- u8 uuid[16];
-} MCtx;
-
-/* ---- helpers for finding LinkSymbol vaddr ---- */
-
-static LinkSymbol* sym_at(LinkImage* img, LinkSymId id) {
- if (id == LINK_SYM_NONE || id > LinkSyms_count(&img->syms)) return NULL;
- return LinkSyms_at(&img->syms, id - 1);
-}
-
-/* ---- pass: collect imports ---- */
-
-static u32 dylib_ordinal_of(MCtx* x, Sym install) {
- for (u32 j = 0; j < x->ndylibs; ++j)
- if (x->dylibs[j].install == install) return j + 1u;
- return 0;
-}
-
-static void collect_imports(MCtx* x) {
- LinkImage* img = x->img;
- Heap* h = x->h;
-
- x->sym_to_imp_size = LinkSyms_count(&img->syms) + 1u;
- x->sym_to_imp =
- (u32*)h->alloc(h, sizeof(u32) * x->sym_to_imp_size, _Alignof(u32));
- if (!x->sym_to_imp)
- compiler_panic(x->c, no_loc(), "link_macho: oom on sym_to_imp");
- memset(x->sym_to_imp, 0, sizeof(u32) * x->sym_to_imp_size);
-
- u32 cap = 0, cap_d = 0;
- for (u32 i = 0; i < LinkSyms_count(&img->syms); ++i) {
- LinkSymbol* s = LinkSyms_at(&img->syms, i);
- if (!s->imported) continue;
- if (s->name == 0) continue;
- LinkSymId canon = symhash_get(&img->globals, s->name);
- if (canon != LINK_SYM_NONE && canon != s->id) continue;
- if (VEC_GROW(h, x->imports, cap, x->nimports + 1u))
- compiler_panic(x->c, no_loc(), "link_macho: oom on imports");
- MachImp* mi = &x->imports[x->nimports++];
- memset(mi, 0, sizeof(*mi));
- mi->sym = s->id;
- mi->name = s->name;
- mi->is_func = (s->kind == SK_FUNC || s->kind == SK_IFUNC) ? 1 : 0;
- mi->weak = (s->bind == SB_WEAK) ? 1 : 0;
- x->sym_to_imp[s->id] = x->nimports;
- }
-
- /* Back-classify: any CALL26/JUMP26 reloc target -> function. */
- for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) {
- LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
- if (!x->arch->is_branch_reloc || !x->arch->is_branch_reloc(r->kind))
- continue;
- if (r->target == LINK_SYM_NONE || r->target >= x->sym_to_imp_size) continue;
- u32 idx = x->sym_to_imp[r->target];
- if (!idx) {
- /* Resolve through canonical. */
- LinkSymbol* tgt = LinkSyms_at(&img->syms, r->target - 1);
- if (tgt->name == 0) continue;
- LinkSymId canon = symhash_get(&img->globals, tgt->name);
- if (canon == LINK_SYM_NONE || canon >= x->sym_to_imp_size) continue;
- idx = x->sym_to_imp[canon];
- if (!idx) continue;
- /* Stash so future lookups skip this loop. */
- x->sym_to_imp[r->target] = idx;
- }
- x->imports[idx - 1].is_func = 1;
- }
-
- /* Build dylib ordinal table. Pull soname from the providing DSO. */
- for (u32 i = 0; i < x->nimports; ++i) {
- MachImp* mi = &x->imports[i];
- LinkSymbol* s = sym_at(img, mi->sym);
- LinkInputId dso_id = s ? s->dso_input_id : LINK_INPUT_NONE;
- Sym install = 0;
- if (dso_id != LINK_INPUT_NONE && x->linker &&
- dso_id - 1u < LinkInputs_count(&x->linker->inputs)) {
- LinkInput* in = LinkInputs_at(&x->linker->inputs, dso_id - 1u);
- if (in->kind == LINK_INPUT_DSO_BYTES) install = in->soname;
- }
- if (install == 0)
- install = pool_intern_slice(x->c->global, SLICE_LIT("/usr/lib/libSystem.B.dylib"));
- u32 ord = dylib_ordinal_of(x, install);
- if (!ord) {
- if (VEC_GROW(h, x->dylibs, cap_d, x->ndylibs + 1u))
- compiler_panic(x->c, no_loc(), "link_macho: oom on dylibs");
- x->dylibs[x->ndylibs].install = install;
- ++x->ndylibs;
- ord = x->ndylibs;
- }
- mi->dylib_ord = ord;
- }
-
- /* Always include every DSO input's install-name. */
- if (x->linker) {
- for (u32 ii = 0; ii < LinkInputs_count(&x->linker->inputs); ++ii) {
- LinkInput* in = LinkInputs_at(&x->linker->inputs, ii);
- if (in->kind != LINK_INPUT_DSO_BYTES) continue;
- if (in->soname == 0) continue;
- if (dylib_ordinal_of(x, in->soname)) continue;
- if (VEC_GROW(h, x->dylibs, cap_d, x->ndylibs + 1u))
- compiler_panic(x->c, no_loc(), "link_macho: oom on dylibs");
- x->dylibs[x->ndylibs].install = in->soname;
- ++x->ndylibs;
- }
- }
-
- /* All entries so far are real imports; remember the partition point
- * so import/symtab table emit loops can skip the appended internals. */
- x->nimports_real = x->nimports;
-
- /* Internal GOT pass. clang on Mach-O routes every extern-global
- * reference through the GOT (GOT_LOAD_PAGE21 / LD64_GOT_LO12_NC), so
- * even a common symbol or weak-undef that ends up resolved within the
- * image still needs a __got slot. For each such reloc whose target
- * isn't an existing import, materialize a MachImp with internal=1.
- * The slot's contents are filled at write time and a chained-fixup
- * REBASE entry (or none, for weak undef → NULL) keeps it valid
- * post-ASLR. */
- for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) {
- LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
- if (!x->arch->is_got_load_reloc || !x->arch->is_got_load_reloc(r->kind))
- continue;
- if (r->target == LINK_SYM_NONE || r->target >= x->sym_to_imp_size) continue;
- if (x->sym_to_imp[r->target]) continue;
- LinkSymbol* t = sym_at(img, r->target);
- if (!t) continue;
- /* Resolve through canonical so we share a single slot per symbol. */
- LinkSymId canon = r->target;
- if (t->name != 0) {
- LinkSymId hit = symhash_get(&img->globals, t->name);
- if (hit != LINK_SYM_NONE) {
- canon = hit;
- if (x->sym_to_imp[canon]) {
- x->sym_to_imp[r->target] = x->sym_to_imp[canon];
- continue;
- }
- t = sym_at(img, canon);
- if (!t) continue;
- }
- }
- if (VEC_GROW(h, x->imports, cap, x->nimports + 1u))
- compiler_panic(x->c, no_loc(), "link_macho: oom on internal got");
- MachImp* mi = &x->imports[x->nimports++];
- memset(mi, 0, sizeof(*mi));
- mi->sym = canon;
- mi->name = t->name;
- mi->is_func = (t->kind == SK_FUNC || t->kind == SK_IFUNC) ? 1 : 0;
- mi->weak = (t->bind == SB_WEAK) ? 1 : 0;
- mi->internal = 1;
- /* internal_vaddr is read fresh from the LinkSymbol when the slot
- * gets initialized — collect_imports runs before shift_sections
- * rebases section vaddrs to Mach-O layout, so capturing here would
- * be stale by the time __got bytes are written. */
- mi->internal_vaddr = 0;
- x->sym_to_imp[canon] = x->nimports;
- if (canon != r->target) x->sym_to_imp[r->target] = x->nimports;
- }
-
- /* Assign stub_idx + got_idx. Internal entries get a slot but no stub:
- * the call site (CALL26) on internal funcs goes direct, not via stub. */
- u32 stub_run = 0;
- for (u32 i = 0; i < x->nimports; ++i) {
- MachImp* mi = &x->imports[i];
- mi->got_idx = i + 1u;
- if (mi->is_func && !mi->internal) mi->stub_idx = ++stub_run;
- }
- x->nimport_funcs = stub_run;
-}
-
-/* ---- pass: collect TLV pointer slots ----
- *
- * Mirror of collect_imports' internal-GOT pass, but for TLV descriptors:
- * each unique descriptor referenced via ARM64_RELOC_TLVP_LOAD_PAGE21 /
- * PAGEOFF12 gets one slot in the synthetic __DATA,__thread_ptrs section.
- * The slot's runtime value is the descriptor's address; we patch it at
- * apply_relocs time (REBASE for in-image descriptors, BIND for ones in
- * a dylib).
- *
- * Slots are deduplicated by canonical LinkSymId so a single descriptor
- * referenced from N call sites shares one __thread_ptrs entry. */
-static void collect_tlv(MCtx* x) {
- LinkImage* img = x->img;
- Heap* h = x->h;
- x->sym_to_tlv_size = LinkSyms_count(&img->syms) + 1u;
- x->sym_to_tlv =
- (u32*)h->alloc(h, sizeof(u32) * x->sym_to_tlv_size, _Alignof(u32));
- if (!x->sym_to_tlv)
- compiler_panic(x->c, no_loc(), "link_macho: oom on sym_to_tlv");
- memset(x->sym_to_tlv, 0, sizeof(u32) * x->sym_to_tlv_size);
-
- u32 cap = 0;
- for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) {
- LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
- if (!x->arch->is_tlvp_reloc || !x->arch->is_tlvp_reloc(r->kind)) continue;
- if (r->target == LINK_SYM_NONE || r->target >= x->sym_to_tlv_size) continue;
- /* Resolve through canonical so multiple per-input duplicate undefs
- * collapse onto one __thread_ptrs slot. */
- LinkSymId canon = r->target;
- LinkSymbol* t = sym_at(img, r->target);
- if (!t) continue;
- if (t->name != 0) {
- LinkSymId hit = symhash_get(&img->globals, t->name);
- if (hit != LINK_SYM_NONE) {
- canon = hit;
- t = sym_at(img, canon);
- if (!t) continue;
- }
- }
- if (x->sym_to_tlv[canon]) {
- if (canon != r->target) x->sym_to_tlv[r->target] = x->sym_to_tlv[canon];
- continue;
- }
- if (VEC_GROW(h, x->tlv_slots, cap, x->ntlv + 1u))
- compiler_panic(x->c, no_loc(), "link_macho: oom on tlv_slots");
- MachTlv* ts = &x->tlv_slots[x->ntlv++];
- memset(ts, 0, sizeof(*ts));
- ts->sym = canon;
- ts->tlv_idx = x->ntlv;
- ts->imported = t->imported ? 1u : 0u;
- /* If the descriptor is imported we route the bind through the
- * symbol's MachImp slot — that's where dyld's chained-import index
- * comes from. When this loop fires the imp pass has already
- * materialized the entry (real imports were processed first); the
- * lookup may also have stashed an alias for non-canonical ids. */
- if (ts->imported) {
- u32 idx = (canon < x->sym_to_imp_size) ? x->sym_to_imp[canon] : 0u;
- if (!idx && t->name != 0) {
- LinkSymId hit2 = symhash_get(&img->globals, t->name);
- if (hit2 != LINK_SYM_NONE && hit2 < x->sym_to_imp_size)
- idx = x->sym_to_imp[hit2];
- }
- ts->import_idx = idx;
- }
- x->sym_to_tlv[canon] = x->ntlv;
- if (canon != r->target) x->sym_to_tlv[r->target] = x->ntlv;
- }
-}
-
-/* ---- pass: plan Mach-O sections ----
- *
- * Walks LinkImage sections. Each non-zero-size LinkSection becomes one
- * MSec. Synthetic __stubs and __got are appended at the right segment
- * boundaries. Vaddr and file_offset are assigned in a single forward
- * pass starting at __TEXT base; __PAGEZERO and __LINKEDIT are special. */
-
-static void seg_init(MSeg* s, const char* name, u32 maxp, u32 initp) {
- memset(s, 0, sizeof(*s));
- s->name = name;
- s->maxprot = maxp;
- s->initprot = initp;
-}
-
-static int sec_is_writable(const LinkSection* ls) {
- return (ls->flags & SF_WRITE) != 0u;
-}
-static int sec_is_exec(const LinkSection* ls) {
- return (ls->flags & SF_EXEC) != 0u;
-}
-static int sec_is_zerofill(const LinkSection* ls) {
- return ls->sem == SSEM_NOBITS;
-}
-
-static int section_has_abs64_reloc(const LinkImage* img, LinkSectionId id) {
- for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) {
- const LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
- if (r->link_section_id == id && r->kind == R_ABS64) return 1;
- }
- return 0;
-}
-
-static int sec_needs_data_const(const LinkImage* img, const LinkSection* ls) {
- if (!ls || !ls->size || sec_is_exec(ls) || sec_is_writable(ls) ||
- sec_is_zerofill(ls)) {
- return 0;
- }
- return section_has_abs64_reloc(img, ls->id);
-}
-
-/* Pick (segname, sectname) for a LinkSection. Comma-form Mach-O names
- * round-trip into MSec's inline 16-byte buffers; literal defaults point
- * at .rodata strings. Caller passes the MSec for per-section storage —
- * a previous version used a shared static buffer which aliased all
- * sections to whichever name was set last. */
-static void pick_macho_names(const LinkSection* ls, Compiler* c, MSec* m) {
- Slice nm_s = pool_slice(c->global, ls->name);
- const char* nm = nm_s.s;
- size_t nlen = nm_s.len;
- if (nm) {
- /* Comma-form: "__SEG,__sect" round-tripped from a Mach-O input. */
- for (size_t i = 0; i < nlen; ++i) {
- if (nm[i] == ',') {
- u32 seg_n = (u32)(i > 15 ? 15 : i);
- memcpy(m->segname_buf, nm, seg_n);
- m->segname_buf[seg_n] = 0;
- u32 sect_n = (u32)((nlen - i - 1) > 15 ? 15 : (nlen - i - 1));
- memcpy(m->sectname_buf, nm + i + 1, sect_n);
- m->sectname_buf[sect_n] = 0;
- m->segname = m->segname_buf;
- m->sectname = m->sectname_buf;
- return;
- }
- }
- }
- /* Derive from flags. */
- if (sec_is_exec(ls)) {
- m->segname = "__TEXT";
- m->sectname = "__text";
- } else if (sec_is_writable(ls)) {
- m->segname = "__DATA";
- m->sectname = sec_is_zerofill(ls) ? "__bss" : "__data";
- } else {
- m->segname = "__TEXT";
- m->sectname = "__const";
- }
-}
-
-static void plan_layout(MCtx* x) {
- LinkImage* img = x->img;
- Heap* h = x->h;
-
- /* PAGEZERO */
- seg_init(&x->segs[0], "__PAGEZERO", 0, 0);
- x->segs[0].vmaddr = 0;
- x->segs[0].vmsize = MZ_PAGEZERO;
- x->segs[0].fileoff = 0;
- x->segs[0].filesize = 0;
- x->segs[0].nsects = 0;
- x->segs[0].first_sec = 0;
-
- /* Segments 1..4 */
- seg_init(&x->segs[1], "__TEXT", VM_PROT_READ | VM_PROT_EXECUTE,
- VM_PROT_READ | VM_PROT_EXECUTE);
- seg_init(&x->segs[2], "__DATA_CONST", VM_PROT_READ | VM_PROT_WRITE,
- VM_PROT_READ | VM_PROT_WRITE);
- seg_init(&x->segs[3], "__DATA", VM_PROT_READ | VM_PROT_WRITE,
- VM_PROT_READ | VM_PROT_WRITE);
- seg_init(&x->segs[4], "__LINKEDIT", VM_PROT_READ, VM_PROT_READ);
- x->nsegs = 5;
-
- /* Pre-allocate MSec capacity: every LinkSection + 2 synth (__stubs,
- * __got). (LinkSections from the dynamic-link layer — .dynsym / .plt
- * etc. — were synthesized by layout_dyn for ELF; we won't have them
- * since pie wasn't set on this Linker. Still, oversize by a few.) */
- u32 cap = LinkRelocs_count(&img->relocs) + img->nsections + 4u;
- x->secs = (MSec*)h->alloc(h, sizeof(MSec) * cap, _Alignof(MSec));
- if (!x->secs) compiler_panic(x->c, no_loc(), "link_macho: oom on MSec");
- memset(x->secs, 0, sizeof(MSec) * cap);
- x->nsecs = 0;
-
- /* Pass 1: __TEXT segment. Header + loadcmds reserve front. */
- /* We need the exact header_size to set first sec's file_offset. We'll
- * compute it later, but reserve a placeholder; for now use 0 and patch
- * in pass 4 (offsets get bumped). */
-
- u64 text_vaddr = MZ_PAGEZERO;
- /* We'll compute headers_size after plan; stash starting vaddr only. */
- x->segs[1].vmaddr = text_vaddr;
- x->segs[1].fileoff = 0;
- x->text_vaddr = text_vaddr;
-
- /* Collect: (a) exec sections, (b) read-only allocatable sections. */
- /* (cursor advances per-segment in pass 2; nothing to track here) */
-
- /* We don't know the header size yet; walk sections first to enumerate
- * MSec entries, then back-fill file_offset/vaddr after we know the
- * load-command count. */
-
- u32 first_text_sec = x->nsecs;
-
- for (u32 i = 0; i < img->nsections; ++i) {
- LinkSection* ls = &img->sections[i];
- if (!ls->size) continue;
- if (sec_is_writable(ls)) continue;
- if (sec_is_zerofill(ls)) continue; /* placed in __DATA */
- if (sec_needs_data_const(img, ls)) continue;
- MSec* m = &x->secs[x->nsecs++];
- memset(m, 0, sizeof(*m));
- m->link_sec_id = ls->id;
- pick_macho_names(ls, x->c, m);
- /* Force into __TEXT. */
- if (!slice_eq_cstr(slice_from_cstr(m->segname), "__TEXT"))
- m->segname = "__TEXT";
- m->align = ls->align ? ls->align : 1u;
- m->size = ls->size;
- m->segidx = 1;
- m->flags = sec_is_exec(ls) ? (0x80000000u /*S_ATTR_PURE_INSTRUCTIONS*/ |
- 0x00000400u /*S_ATTR_SOME_INSTRUCTIONS*/)
- : 0u;
- }
-
- /* __stubs synthetic */
- if (x->nimport_funcs) {
- x->stubs_size = x->nimport_funcs * x->arch->macho_stub_size;
- x->stubs_bytes = (u8*)h->alloc(h, x->stubs_size, 4);
- if (!x->stubs_bytes)
- compiler_panic(x->c, no_loc(), "link_macho: oom on stubs");
- memset(x->stubs_bytes, 0, x->stubs_size);
- MSec* m = &x->secs[x->nsecs++];
- memset(m, 0, sizeof(*m));
- m->synth_data = x->stubs_bytes;
- m->synth_size = x->stubs_size;
- m->segname = "__TEXT";
- m->sectname = "__stubs";
- m->align = 4u;
- m->size = x->stubs_size;
- m->segidx = 1;
- m->flags = 0x80000000u | 0x00000400u | 0x00000008u /*S_SYMBOL_STUBS*/;
- m->reserved1 = 0; /* fill in later: indirect-symtab base */
- m->reserved2 = x->arch->macho_stub_size;
- }
- x->segs[1].nsects = x->nsecs - first_text_sec;
- x->segs[1].first_sec = first_text_sec;
-
- /* __DATA_CONST: __got synth */
- u32 first_dc = x->nsecs;
- if (x->nimports) {
- x->got_size = x->nimports * MZ_GOT_SIZE;
- x->got_bytes = (u8*)h->alloc(h, x->got_size, 8);
- if (!x->got_bytes) compiler_panic(x->c, no_loc(), "link_macho: oom on got");
- memset(x->got_bytes, 0, x->got_size);
- MSec* m = &x->secs[x->nsecs++];
- memset(m, 0, sizeof(*m));
- m->synth_data = x->got_bytes;
- m->synth_size = x->got_size;
- m->segname = "__DATA_CONST";
- m->sectname = "__got";
- m->align = 8u;
- m->size = x->got_size;
- m->segidx = 2;
- m->flags = 0x00000006u /*S_NON_LAZY_SYMBOL_POINTERS*/;
- m->reserved1 = 0; /* indirect-symtab base */
- }
- for (u32 i = 0; i < img->nsections; ++i) {
- LinkSection* ls = &img->sections[i];
- if (!sec_needs_data_const(img, ls)) continue;
- MSec* m = &x->secs[x->nsecs++];
- memset(m, 0, sizeof(*m));
- m->link_sec_id = ls->id;
- pick_macho_names(ls, x->c, m);
- m->segname = "__DATA_CONST";
- m->align = ls->align ? ls->align : 1u;
- m->size = ls->size;
- m->segidx = 2;
- m->flags = 0;
- }
- x->segs[2].nsects = x->nsecs - first_dc;
- x->segs[2].first_sec = first_dc;
-
- /* __DATA segment: writable sections + zerofill. */
- u32 first_d = x->nsecs;
- for (u32 i = 0; i < img->nsections; ++i) {
- LinkSection* ls = &img->sections[i];
- if (!ls->size && !sec_is_zerofill(ls)) continue;
- if (!sec_is_writable(ls)) continue;
- MSec* m = &x->secs[x->nsecs++];
- memset(m, 0, sizeof(*m));
- m->link_sec_id = ls->id;
- pick_macho_names(ls, x->c, m);
- if (!slice_eq_cstr(slice_from_cstr(m->segname), "__DATA"))
- m->segname = "__DATA";
- m->align = ls->align ? ls->align : 1u;
- m->size = ls->size;
- m->segidx = 3;
- m->is_zerofill = sec_is_zerofill(ls) ? 1 : 0;
- m->flags = m->is_zerofill ? 0x00000001u /*S_ZEROFILL*/ : 0;
- /* dyld dispatches on the section type byte (low 8 bits of flags).
- * __mod_init_func / __mod_term_func sections must carry the
- * S_MOD_INIT_FUNC_POINTERS / S_MOD_TERM_FUNC_POINTERS type or dyld
- * skips them entirely — leaving constructors unrun at startup. */
- if (slice_eq_cstr(slice_from_cstr(m->sectname), "__mod_init_func"))
- m->flags = 0x00000009u /*S_MOD_INIT_FUNC_POINTERS*/;
- else if (slice_eq_cstr(slice_from_cstr(m->sectname), "__mod_term_func"))
- m->flags = 0x0000000au /*S_MOD_TERM_FUNC_POINTERS*/;
- else if (ls->flags & SF_TLS) {
- /* TLV sections: dyld dispatches by section type, not name. Map
- * __thread_vars → S_THREAD_LOCAL_VARIABLES (descriptor records),
- * __thread_data → S_THREAD_LOCAL_REGULAR (initial data),
- * __thread_bss → S_THREAD_LOCAL_ZEROFILL (zero-init data). Done
- * by sectname so per-TU inputs without a Mach-O ext_type still
- * get the right section type. */
- if (slice_eq_cstr(slice_from_cstr(m->sectname), "__thread_vars")) {
- m->flags = S_THREAD_LOCAL_VARIABLES;
- /* Each descriptor is three pointers (24B) whose first word is
- * dyld's _tlv_bootstrap thunk pointer. Clang/llvm emit
- * __thread_vars with on-disk alignment 1 (relying on layout to
- * land it on 8); force 8-alignment here so the descriptor
- * pointers fall on 8-byte boundaries — dyld's chained-fixup
- * processing assumes that. */
- if (m->align < 8u) m->align = 8u;
- } else if (m->is_zerofill)
- m->flags = S_THREAD_LOCAL_ZEROFILL;
- else
- m->flags = S_THREAD_LOCAL_REGULAR;
- }
- }
- /* __thread_ptrs synthetic (TLV pointer slots). Emitted into __DATA
- * after the user's TLV input sections so descriptors and their
- * pointers share the same segment. Each slot's runtime initial
- * value (= TLV descriptor address) is patched during apply_relocs. */
- if (x->ntlv) {
- x->tlv_ptrs_size = x->ntlv * MZ_TLVP_SIZE;
- x->tlv_ptrs_bytes = (u8*)h->alloc(h, x->tlv_ptrs_size, 8);
- if (!x->tlv_ptrs_bytes)
- compiler_panic(x->c, no_loc(), "link_macho: oom on tlv_ptrs");
- memset(x->tlv_ptrs_bytes, 0, x->tlv_ptrs_size);
- MSec* m = &x->secs[x->nsecs++];
- memset(m, 0, sizeof(*m));
- m->synth_data = x->tlv_ptrs_bytes;
- m->synth_size = x->tlv_ptrs_size;
- m->segname = "__DATA";
- m->sectname = "__thread_ptrs";
- m->align = 8u;
- m->size = x->tlv_ptrs_size;
- m->segidx = 3;
- m->flags = S_THREAD_LOCAL_VARIABLE_POINTERS;
- }
- x->segs[3].nsects = x->nsecs - first_d;
- x->segs[3].first_sec = first_d;
-
- /* Group MSecs by (segname, sectname) within each segment so vaddr
- * placement keeps same-named runs contiguous. Otherwise Phase B's
- * adjacency-based coalescing splits a single Mach-O section into
- * multiple OutSecs (e.g. `.text` from an in-memory ObjBuilder and
- * `__TEXT,__text` from a Mach-O .o input both map to `__TEXT,__text`
- * but arrive in separate link_layout groups, interleaved with other
- * sections from each input). Stable insertion sort preserves input
- * order within a name, which matters for synth __stubs/__thread_ptrs
- * order relative to peers. */
- for (u32 i = 0; i < x->nsegs; ++i) {
- MSeg* sg = &x->segs[i];
- if (sg->nsects < 2) continue;
- u32 base = sg->first_sec;
- u32 n = sg->nsects;
- for (u32 a = 1; a < n; ++a) {
- MSec key = x->secs[base + a];
- msec_repair_name_ptrs(&key);
- u32 j = a;
- while (j > 0) {
- MSec* prev = &x->secs[base + j - 1];
- /* Ordering compare for stable sort: slices don't order, keep strcmp. */
- int cmp = strcmp(prev->segname, key.segname); /* ordering */
- if (cmp == 0)
- cmp = strcmp(prev->sectname, key.sectname); /* ordering */
- if (cmp <= 0) break;
- x->secs[base + j] = x->secs[base + j - 1];
- msec_repair_name_ptrs(&x->secs[base + j]);
- --j;
- }
- x->secs[base + j] = key;
- msec_repair_name_ptrs(&x->secs[base + j]);
- }
- }
-
- /* Phase A: count OutSecs per segment (distinct sectnames) so we can
- * size the load commands before placing vaddrs. Phase B builds the
- * actual OutSec[] after placement, when vaddrs are final. */
- for (u32 i = 0; i < x->nsegs; ++i) {
- MSeg* sg = &x->segs[i];
- u32 cnt = 0;
- for (u32 a = sg->first_sec; a < sg->first_sec + sg->nsects; ++a) {
- int seen = 0;
- for (u32 b = sg->first_sec; b < a; ++b) {
- if (slice_eq_cstr(slice_from_cstr(x->secs[a].sectname),
- x->secs[b].sectname) &&
- slice_eq_cstr(slice_from_cstr(x->secs[a].segname),
- x->secs[b].segname)) {
- seen = 1;
- break;
- }
- }
- if (!seen) ++cnt;
- }
- sg->nouts = cnt;
- sg->first_out = 0; /* assigned in Phase B */
- }
-
- /* Compute load-command count + sizeofcmds, then back-fill section
- * offsets. Layout pass 2. */
- u32 nseg_real = 0;
- for (u32 i = 0; i < x->nsegs; ++i) {
- /* Skip __DATA_CONST or __DATA if no sections (edge case). */
- if (i == 0) {
- ++nseg_real;
- continue;
- } /* PAGEZERO */
- if (i == 4) {
- ++nseg_real;
- continue;
- } /* LINKEDIT always */
- if (x->segs[i].nsects > 0) ++nseg_real;
- }
- /* Each LC_SEGMENT_64 carries 72 + 80*nouts bytes (one section_64
- * record per coalesced (segname,sectname), not per MSec). */
- u32 sizeofcmds = 0;
- for (u32 i = 0; i < x->nsegs; ++i) {
- if (i == 0 || i == 4) {
- sizeofcmds += MACHO_SEGCMD64_SIZE; /* no sections */
- continue;
- }
- if (x->segs[i].nsects == 0) continue;
- sizeofcmds += MACHO_SEGCMD64_SIZE + x->segs[i].nouts * MACHO_SECT64_SIZE;
- }
- (void)nseg_real;
- /* LC_DYLD_CHAINED_FIXUPS / LC_DYLD_EXPORTS_TRIE */
- sizeofcmds += 16u + 16u;
- /* LC_SYMTAB / LC_DYSYMTAB */
- sizeofcmds += MACHO_SYMTAB_CMD_SIZE + MACHO_DYSYMTAB_CMD_SIZE;
- /* LC_LOAD_DYLINKER */
- {
- u32 ld_size = 12u + (u32)(sizeof("/usr/lib/dyld") - 1u) + 1u;
- sizeofcmds += (u32)ALIGN_UP((u64)ld_size, 8u);
- }
- /* LC_UUID + LC_BUILD_VERSION + LC_MAIN */
- sizeofcmds += 24u + 24u + 24u;
- /* LC_LOAD_DYLIB per dylib */
- for (u32 i = 0; i < x->ndylibs; ++i) {
- size_t nl = pool_slice(x->c->global, x->dylibs[i].install).len;
- u32 sz = 24u + (u32)nl + 1u;
- sizeofcmds += (u32)ALIGN_UP((u64)sz, 8u);
- }
- /* LC_FUNCTION_STARTS / LC_DATA_IN_CODE / LC_CODE_SIGNATURE */
- sizeofcmds += 16u + 16u + 16u;
-
- x->headers_size = MACHO_HDR64_SIZE + sizeofcmds;
-
- /* Now place sections in __TEXT, __DATA_CONST, __DATA. */
- u64 vaddr = MZ_PAGEZERO + x->headers_size;
- u64 fileoff = x->headers_size;
- /* Pad __TEXT sections to natural alignment. */
- for (u32 i = 0; i < x->nsegs; ++i) {
- if (i == 0 || i == 4) continue;
- MSeg* sg = &x->segs[i];
- if (i > 1) {
- /* page-align the start of __DATA_CONST and __DATA */
- vaddr = ALIGN_UP(vaddr, MZ_PAGE);
- fileoff = ALIGN_UP(fileoff, MZ_PAGE);
- }
- sg->vmaddr = (i == 1) ? MZ_PAGEZERO : vaddr;
- sg->fileoff = (i == 1) ? 0 : fileoff;
- /* __TEXT carries the headers_size + sections. */
- u64 seg_start_v = sg->vmaddr;
- u64 seg_start_f = sg->fileoff;
- /* For __TEXT, sections begin after the header area. */
- u64 cur_v = (i == 1) ? (seg_start_v + x->headers_size) : seg_start_v;
- u64 cur_f = (i == 1) ? (seg_start_f + x->headers_size) : seg_start_f;
- u64 first_zerofill_v = 0;
- int seen_zerofill = 0;
- /* Non-zerofill first */
- for (u32 j = 0; j < sg->nsects; ++j) {
- MSec* m = &x->secs[sg->first_sec + j];
- if (m->is_zerofill) continue;
- cur_v = ALIGN_UP(cur_v, (u64)m->align);
- cur_f = ALIGN_UP(cur_f, (u64)m->align);
- m->vaddr = cur_v;
- m->file_offset = cur_f;
- cur_v += m->size;
- cur_f += m->size;
- }
- first_zerofill_v = cur_v;
- /* zerofill last (no file bytes) */
- for (u32 j = 0; j < sg->nsects; ++j) {
- MSec* m = &x->secs[sg->first_sec + j];
- if (!m->is_zerofill) continue;
- cur_v = ALIGN_UP(cur_v, (u64)m->align);
- m->vaddr = cur_v;
- m->file_offset = 0;
- cur_v += m->size;
- seen_zerofill = 1;
- }
- sg->filesize = (i == 1)
- ? (cur_f - seg_start_f)
- : (first_zerofill_v ? (first_zerofill_v - seg_start_v)
- : (cur_v - seg_start_v));
- sg->vmsize = ALIGN_UP(cur_v - seg_start_v, MZ_PAGE);
- if (sg->vmsize == 0 && sg->nsects > 0) sg->vmsize = MZ_PAGE;
- if (i == 1) {
- x->stubs_vaddr = 0;
- for (u32 j = 0; j < sg->nsects; ++j) {
- MSec* m = &x->secs[sg->first_sec + j];
- if (slice_eq_cstr(slice_from_cstr(m->sectname), "__stubs"))
- x->stubs_vaddr = m->vaddr;
- }
- x->text_filesz = sg->filesize;
- }
- if (i == 2) {
- for (u32 j = 0; j < sg->nsects; ++j) {
- MSec* m = &x->secs[sg->first_sec + j];
- if (slice_eq_cstr(slice_from_cstr(m->sectname), "__got"))
- x->got_vaddr = m->vaddr;
- }
- x->data_const_vaddr = sg->vmaddr;
- x->data_const_filesz = sg->filesize;
- }
- if (i == 3) {
- for (u32 j = 0; j < sg->nsects; ++j) {
- MSec* m = &x->secs[sg->first_sec + j];
- if (slice_eq_cstr(slice_from_cstr(m->sectname), "__thread_ptrs"))
- x->tlv_ptrs_vaddr = m->vaddr;
- /* TLS storage image base: min vaddr across __thread_data and
- * __thread_bss sections. __thread_vars is excluded — it holds
- * the descriptors, not the data that maps into the per-thread
- * block. */
- if ((slice_eq_cstr(slice_from_cstr(m->sectname), "__thread_data") ||
- slice_eq_cstr(slice_from_cstr(m->sectname), "__thread_bss")) &&
- (!x->has_tls_image || m->vaddr < x->tls_image_vaddr)) {
- x->tls_image_vaddr = m->vaddr;
- x->has_tls_image = 1;
- }
- }
- x->data_vaddr = sg->vmaddr;
- x->data_filesz = sg->filesize;
- x->data_memsz = sg->vmsize;
- }
- vaddr = sg->vmaddr + sg->vmsize;
- /* Mach-O segments are mapped in page units. If a segment's memory
- * image extends past its initialized file bytes (for example
- * __DATA,__bss), the following segment's fileoff must not reuse those
- * pages or the kernel can map later file contents into the zero-fill
- * tail. */
- fileoff = sg->fileoff + ((sg->vmsize > ALIGN_UP(sg->filesize, MZ_PAGE))
- ? sg->vmsize
- : sg->filesize);
- (void)seen_zerofill;
- }
- /* LINKEDIT placeholder; size is filled after blob assembly. */
- vaddr = ALIGN_UP(vaddr, MZ_PAGE);
- fileoff = ALIGN_UP(fileoff, MZ_PAGE);
- x->segs[4].vmaddr = vaddr;
- x->segs[4].fileoff = fileoff;
- x->linkedit_vaddr = vaddr;
- x->linkedit_fileoff = fileoff;
-
- /* Encode __stubs bytes now that vaddrs are settled. Internal-GOT
- * entries have stub_idx=0 (direct CALL26, no stub) and must be
- * skipped so the (stub_idx - 1u) arithmetic doesn't wrap. */
- for (u32 i = 0; i < x->nimports; ++i) {
- MachImp* mi = &x->imports[i];
- if (!mi->is_func || !mi->stub_idx) continue;
- u64 stub_v =
- x->stubs_vaddr + (mi->stub_idx - 1u) * x->arch->macho_stub_size;
- u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE;
- x->arch->emit_macho_stub(
- x->stubs_bytes + (mi->stub_idx - 1u) * x->arch->macho_stub_size, stub_v,
- got_v);
- }
-
- /* Phase B: build OutSec[] now that all MSec vaddrs are final. Walk
- * MSecs sorted by (segidx, vaddr) and coalesce adjacent same-name
- * runs. Mirrors link_elf.c's OutShdr build at link_elf.c:879. */
- {
- u32* order =
- (u32*)h->alloc(h, sizeof(u32) * (x->nsecs + 1u), _Alignof(u32));
- if (!order && x->nsecs)
- compiler_panic(x->c, no_loc(), "link_macho: oom on outsec sort");
- for (u32 i = 0; i < x->nsecs; ++i) order[i] = i;
- /* Insertion sort — section count is small. */
- for (u32 i = 1; i < x->nsecs; ++i) {
- u32 cur = order[i];
- MSec* a = &x->secs[cur];
- u32 j = i;
- while (j > 0) {
- MSec* b = &x->secs[order[j - 1]];
- if ((b->segidx < a->segidx) ||
- (b->segidx == a->segidx && b->vaddr <= a->vaddr))
- break;
- order[j] = order[j - 1];
- --j;
- }
- order[j] = cur;
- }
- u32 cap = x->nsecs + 1u;
- x->outs = (OutSec*)h->alloc(h, sizeof(OutSec) * cap, _Alignof(OutSec));
- if (!x->outs) compiler_panic(x->c, no_loc(), "link_macho: oom on OutSec");
- memset(x->outs, 0, sizeof(OutSec) * cap);
- x->nouts = 0;
- for (u32 i = 0; i < x->nsecs; ++i) {
- MSec* m = &x->secs[order[i]];
- OutSec* tail = x->nouts ? &x->outs[x->nouts - 1] : NULL;
- int merge = tail && tail->segidx == m->segidx &&
- slice_eq_cstr(slice_from_cstr(tail->sectname),
- m->sectname) &&
- slice_eq_cstr(slice_from_cstr(tail->segname), m->segname);
- if (merge) {
- if (tail->flags != m->flags || tail->is_zerofill != m->is_zerofill)
- compiler_panic(
- x->c, no_loc(),
- "link_macho: coalesce mismatch on %.*s,%.*s (flags/zerofill)",
- SLICE_ARG(slice_from_cstr(m->segname)),
- SLICE_ARG(slice_from_cstr(m->sectname)));
- u64 end = m->vaddr + m->size;
- u64 prev_end = tail->vaddr + tail->size;
- if (end > prev_end) tail->size = end - tail->vaddr;
- if (m->align > tail->align) tail->align = m->align;
- } else {
- OutSec* o = &x->outs[x->nouts++];
- o->segname = m->segname;
- o->sectname = m->sectname;
- o->vaddr = m->vaddr;
- o->file_offset = m->file_offset;
- o->size = m->size;
- o->align = m->align;
- o->flags = m->flags;
- o->reserved1 = m->reserved1;
- o->reserved2 = m->reserved2;
- o->segidx = m->segidx;
- o->is_zerofill = m->is_zerofill;
- }
- }
- h->free(h, order, sizeof(u32) * (x->nsecs + 1u));
- /* Recompute per-segment OutSec span; Phase A's count was for
- * sizeofcmds sizing — recompute it here as the source of truth and
- * assert agreement. */
- for (u32 i = 0; i < x->nsegs; ++i) {
- x->segs[i].first_out = 0;
- }
- u32 prev_nouts[5];
- for (u32 i = 0; i < x->nsegs; ++i) prev_nouts[i] = x->segs[i].nouts;
- for (u32 i = 0; i < x->nsegs; ++i) x->segs[i].nouts = 0;
- for (u32 i = 0; i < x->nouts; ++i) {
- u8 sx = x->outs[i].segidx;
- if (x->segs[sx].nouts == 0) x->segs[sx].first_out = i;
- ++x->segs[sx].nouts;
- }
- for (u32 i = 0; i < x->nsegs; ++i) {
- if (prev_nouts[i] != x->segs[i].nouts)
- compiler_panic(x->c, no_loc(),
- "link_macho: OutSec count drift seg %u (%u vs %u)",
- (u32)i, prev_nouts[i], x->segs[i].nouts);
- }
- }
-}
-
-/* ---- pass: shift LinkImage into final vaddrs/file_offsets ----
- *
- * The sections in img->sections are still in their original
- * link_layout coordinates. Map each LinkSection -> its MSec and copy
- * the final vaddr/file_offset so reloc-apply walks correctly. */
-
-static void shift_sections(MCtx* x) {
- LinkImage* img = x->img;
- /* Build a quick lookup: link_sec_id -> MSec*. */
- for (u32 i = 0; i < x->nsecs; ++i) {
- MSec* m = &x->secs[i];
- if (!m->link_sec_id) continue;
- /* Walk link_section_id slot. */
- LinkSection* ls = &img->sections[m->link_sec_id - 1u];
- /* shift relocs whose write_vaddr/file_offset live within this
- * section's original [old_vaddr, old_vaddr+size). */
- u64 old_v = ls->vaddr;
- u64 old_f = ls->file_offset;
- u64 new_v = m->vaddr;
- u64 new_f = m->file_offset;
- if (old_v == new_v && old_f == new_f) continue;
- /* Update the LinkSection itself. */
- ls->vaddr = new_v;
- ls->file_offset = new_f;
- /* Update relocs that target this section. */
- for (u32 ri = 0; ri < LinkRelocs_count(&img->relocs); ++ri) {
- LinkRelocApply* r = LinkRelocs_at(&img->relocs, ri);
- if (r->link_section_id != ls->id) continue;
- r->write_vaddr = new_v + (r->write_vaddr - old_v);
- r->write_file_offset = new_f + (r->write_file_offset - old_f);
- }
- /* Update LinkSyms that belong to this LinkSection. Match by
- * section_id rather than vaddr range — multiple input sections
- * may share the same pre-shift vaddr (each bucket in
- * link_layout starts at offset 0). */
- for (u32 si = 0; si < LinkSyms_count(&img->syms); ++si) {
- LinkSymbol* s = LinkSyms_at(&img->syms, si);
- if (!s->defined) continue;
- if (s->kind == SK_ABS) continue;
- if (s->section_id != ls->id) continue;
- s->vaddr = new_v + (s->vaddr - old_v);
- }
- }
-}
-
-/* ---- pass: apply relocations + collect chained-fixup sites ----
- *
- * Reloc dispatch:
- * target=imported func + CALL26/JUMP26 -> S = stub vaddr
- * target=import + GOT_LOAD_PAGE21/PAGEOFF12 -> S = got slot vaddr
- * target=import + ABS64 -> write 0; collect bind site
- * target=internal + ABS64 -> write target VA; collect rebase site
- * everything else -> standard apply
- *
- * Patch sites for chained fixups are 8-byte slots; for ABS32 we do not
- * support fixups (no chained-fixup format for 32-bit pointers in
- * standard arm64 — would need DYLD_CHAINED_PTR_32). Internal R_ABS32
- * still works (no slide adjustment is wrong technically, but for
- * compile-time-known offsets it suffices).
- */
-
-typedef struct FixSite {
- u8 segidx; /* 2 = __DATA_CONST, 3 = __DATA */
- u8 is_bind; /* 0 = rebase, 1 = bind */
- u8 pad[2];
- u32 import_idx; /* 1-based import index for binds, 0 for rebases */
- u64 vaddr; /* absolute VA of the slot */
- u64 rebase_target; /* unslid target VA; only used for rebases */
-} FixSite;
-
-typedef struct FixList {
- Heap* heap;
- FixSite* a;
- u32 n;
- u32 cap;
-} FixList;
-
-static void fix_init(FixList* fl, Heap* h) {
- fl->heap = h;
- fl->a = NULL;
- fl->n = 0;
- fl->cap = 0;
-}
-static void fix_fini(FixList* fl) {
- if (fl->a) fl->heap->free(fl->heap, fl->a, sizeof(*fl->a) * fl->cap);
- fl->a = NULL;
- fl->n = fl->cap = 0;
-}
-static void fix_push(FixList* fl, const FixSite* s) {
- if (VEC_GROW(fl->heap, fl->a, fl->cap, fl->n + 1u)) return;
- fl->a[fl->n++] = *s;
-}
-
-/* find MSec covering an absolute vaddr */
-static MSec* msec_for_vaddr(MCtx* x, u64 v) {
- for (u32 i = 0; i < x->nsecs; ++i) {
- MSec* m = &x->secs[i];
- if (v >= m->vaddr && v < m->vaddr + m->size) return m;
- }
- return NULL;
-}
-
-static u8* bytes_for_section(MCtx* x, MSec* m, LinkImage* img) {
- if (m->synth_data) {
- /* Synthetic — caller reads/writes via x->stubs_bytes / x->got_bytes. */
- if (m->synth_data == x->stubs_bytes) return x->stubs_bytes;
- if (m->synth_data == x->got_bytes) return x->got_bytes;
- return NULL;
- }
- /* Backed by a LinkSection: find the LinkSegment buffer that section
- * sits in (link_layout.c stored input section bytes there). */
- LinkSection* ls = &img->sections[m->link_sec_id - 1u];
- u32 segid = ls->segment_id;
- if (segid == LINK_SEG_NONE) return NULL;
- return img->segment_bytes[segid - 1u];
-}
-
-/* Map the LinkSection that backs a write_vaddr to an MSec, then to the
- * underlying byte buffer. */
-static u8* patch_ptr(MCtx* x, LinkImage* img, const LinkRelocApply* r,
- MSec** out_msec) {
- /* Look up via the LinkSection. After shift_sections the section
- * vaddr is the Mach-O vaddr; the corresponding MSec backs it. */
- if (r->link_section_id == LINK_SEC_NONE) return NULL;
- LinkSection* ls = &img->sections[r->link_section_id - 1u];
- /* Find the MSec by link_sec_id. */
- MSec* m = NULL;
- for (u32 i = 0; i < x->nsecs; ++i) {
- if (x->secs[i].link_sec_id == ls->id) {
- m = &x->secs[i];
- break;
- }
- }
- if (!m) return NULL;
- /* The LinkSegment's bytes are valid (not shifted), but the offset
- * within them is the original input_offset. Use input_offset for
- * the byte offset, since the LinkSegment buffer wasn't reshuffled. */
- /* link_layout.c set ls->file_offset = seg.file_offset + input_offset
- * originally. ls->vaddr similarly. After our shift, they're new.
- * The byte offset within the segment buffer is still input_offset. */
- u8* base = bytes_for_section(x, m, img);
- if (!base) return NULL;
- u32 within_section = (u32)(r->write_vaddr - m->vaddr);
- /* The segment buffer's first byte corresponds to ls->input_offset==0
- * for the FIRST section in the segment. But that's a complication.
- * For simplicity we recompute the segment-relative byte offset by
- * (file_offset - segment.file_offset) where segment.file_offset is
- * unchanged. Wait: the original layout produced `ls->file_offset =
- * seg.file_offset + input_offset`, and we may have changed
- * ls->file_offset. Let's just use input_offset stored on the
- * LinkSection. */
- u32 in_off = (u32)(ls->input_offset + within_section);
- if (out_msec) *out_msec = m;
- return base + in_off;
-}
-
-/* Symbol-relative resolved-address S, accounting for imports. */
-static int sym_S(MCtx* x, LinkImage* img, LinkSymId id, u64* out_S,
- int* out_imp_idx) {
- *out_S = 0;
- *out_imp_idx = 0;
- if (id == LINK_SYM_NONE) return 0;
- LinkSymbol* s = sym_at(img, id);
- if (!s) return 0;
- /* Look up the import index — real imports plus internal-GOT entries
- * the collect_imports pass materialized for GOT-routed internal refs. */
- u32 idx = 0;
- if (id < x->sym_to_imp_size) idx = x->sym_to_imp[id];
- if (!idx && s->name != 0) {
- LinkSymId canon = symhash_get(&img->globals, s->name);
- if (canon != LINK_SYM_NONE && canon < x->sym_to_imp_size)
- idx = x->sym_to_imp[canon];
- }
- if (s->imported) {
- *out_imp_idx = (int)idx;
- return 1;
- }
- /* Internal symbol that has a GOT slot — surface the import index so
- * the GOT_LOAD reloc paths in apply_relocs find it, but also expose
- * S=vaddr so non-GOT relocs (CALL26 etc.) still apply directly. */
- *out_imp_idx = (int)idx;
- *out_S = s->vaddr;
- return 0;
-}
-
-static void apply_relocs(MCtx* x, FixList* fl) {
- LinkImage* img = x->img;
- for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) {
- LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
- if (r->target == LINK_SYM_NONE) continue;
- MSec* msec = NULL;
- u8* P_bytes = patch_ptr(x, img, r, &msec);
- if (!P_bytes) continue;
- u64 P = r->write_vaddr;
-
- u64 S;
- int imp_idx;
- int is_imp = sym_S(x, img, r->target, &S, &imp_idx);
-
- /* TLVP relocs route through a __thread_ptrs slot regardless of
- * whether the descriptor target is in-image or imported. Resolved
- * before the import / internal split because an imported TLV
- * descriptor doesn't use the __got slot (its address lives in
- * __thread_ptrs with its own chained bind). */
- if (x->arch->is_tlvp_reloc && x->arch->is_tlvp_reloc(r->kind)) {
- u32 tlv_idx =
- (r->target < x->sym_to_tlv_size) ? x->sym_to_tlv[r->target] : 0u;
- if (!tlv_idx)
- compiler_panic(x->c, no_loc(),
- "link_macho: TLVP reloc has no __thread_ptrs slot");
- u64 slot_v = x->tlv_ptrs_vaddr + (tlv_idx - 1u) * MZ_TLVP_SIZE;
- link_reloc_apply(x->c, r->kind, P_bytes, slot_v, r->addend, P);
- continue;
- }
-
- if (is_imp) {
- MachImp* mi = (imp_idx > 0) ? &x->imports[imp_idx - 1] : NULL;
- if (x->arch->is_branch_reloc && x->arch->is_branch_reloc(r->kind)) {
- if (!mi || !mi->stub_idx)
- compiler_panic(x->c, no_loc(),
- "link_macho: import has no stub for branch");
- u64 stub_v =
- x->stubs_vaddr + (mi->stub_idx - 1u) * x->arch->macho_stub_size;
- link_reloc_apply(x->c, r->kind, P_bytes, stub_v, r->addend, P);
- continue;
- }
- if (x->arch->is_got_load_reloc && x->arch->is_got_load_reloc(r->kind)) {
- if (!mi)
- compiler_panic(x->c, no_loc(),
- "link_macho: GOT reloc for unknown import");
- u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE;
- link_reloc_apply(x->c, r->kind, P_bytes, got_v, r->addend, P);
- continue;
- }
- if (x->arch->is_direct_page_reloc &&
- x->arch->is_direct_page_reloc(r->kind)) {
- /* Direct page/lo12 against an import: route through __got. */
- if (!mi)
- compiler_panic(x->c, no_loc(),
- "link_macho: PAGE/LO12 against unknown import");
- u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE;
- link_reloc_apply(x->c, r->kind, P_bytes, got_v, r->addend, P);
- continue;
- }
- if (r->kind == R_ABS64) {
- /* Direct 8-byte absolute against an import: bind the slot. */
- wr_u64_le(P_bytes, 0);
- FixSite fs = {(u8)msec->segidx, 1, {0}, (u32)imp_idx, P, 0};
- fix_push(fl, &fs);
- continue;
- }
- compiler_panic(x->c, no_loc(),
- "link_macho: unhandled reloc kind %u against imported "
- "symbol",
- (u32)r->kind);
- }
-
- /* Internal relocs. */
- if (r->kind == R_ABS64) {
- /* Special case: ABS64 reloc inside a TLV descriptor record
- * (__thread_vars section) targeting in-image TLS storage. This
- * is the descriptor's word-2 "offset" field — dyld interprets it
- * as the per-thread offset of the storage within the TLS image,
- * NOT as an absolute address. Apple's ld writes the literal
- * offset and emits no chained-fixup entry; replicate that so the
- * chain skips over this slot (chained_fixups already does the
- * right thing: no fixsite -> no chain link). */
- if (msec && (msec->flags & SECTION_TYPE) == S_THREAD_LOCAL_VARIABLES &&
- x->has_tls_image) {
- u64 offset = (S + (u64)r->addend) - x->tls_image_vaddr;
- wr_u64_le(P_bytes, offset);
- continue;
- }
- /* Rebase site. */
- wr_u64_le(P_bytes, S + (u64)r->addend);
- FixSite fs = {(u8)msec->segidx, 0, {0}, 0, P, S + (u64)r->addend};
- fix_push(fl, &fs);
- continue;
- }
- /* Internal symbol routed through __got (clang emits GOT_LOAD_PAGE21
- * for any extern global, even if the def is in-image). imp_idx
- * was populated by collect_imports' internal-GOT pass; redirect
- * the page/lo12 reloc to the GOT slot's vaddr. */
- if (imp_idx > 0 && x->arch->is_got_load_reloc &&
- x->arch->is_got_load_reloc(r->kind)) {
- MachImp* mi = &x->imports[imp_idx - 1];
- u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE;
- link_reloc_apply(x->c, r->kind, P_bytes, got_v, r->addend, P);
- continue;
- }
- /* Generic apply. */
- link_reloc_apply(x->c, r->kind, P_bytes, S, r->addend, P);
- }
-
- /* Per-slot chained fixup. Real imports → bind (dyld resolves at
- * load). Internal GOT entries → rebase pointing at the symbol's
- * image-relative vaddr; a target vaddr of 0 (weak undef → NULL) gets
- * no fixup, just a literal zero slot — chained fixups treat 0 as a
- * gap and won't disturb it. */
- for (u32 i = 0; i < x->nimports; ++i) {
- MachImp* mi = &x->imports[i];
- u64 slot_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE;
- if (mi->internal) {
- /* Re-read the symbol's final vaddr now that shift_sections has
- * rebased every defined symbol into the Mach-O image layout
- * (collect_imports snapshotted too early). */
- LinkSymbol* s = sym_at(img, mi->sym);
- u64 tgt_v = s ? s->vaddr : 0;
- u8* slot = x->got_bytes + (mi->got_idx - 1u) * MZ_GOT_SIZE;
- wr_u64_le(slot, tgt_v);
- if (tgt_v == 0) continue; /* weak-undef → NULL */
- FixSite fs = {2u, 0, {0}, 0, slot_v, tgt_v};
- fix_push(fl, &fs);
- } else {
- /* clear slot bytes (already zero) — dyld writes via chain */
- FixSite fs = {2u, 1, {0}, i + 1u, slot_v, 0};
- fix_push(fl, &fs);
- }
- }
-
- /* Per-slot TLV pointer fixups. Mirror of the __got loop above: each
- * __thread_ptrs slot points at the descriptor record. When the
- * descriptor is in-image (internal) we REBASE to its final vaddr; when
- * it lives in a dylib we BIND through the descriptor's MachImp. The
- * slot itself lives in __DATA (segidx=3), distinct from __got's
- * __DATA_CONST (segidx=2). */
- for (u32 i = 0; i < x->ntlv; ++i) {
- MachTlv* ts = &x->tlv_slots[i];
- u64 slot_v = x->tlv_ptrs_vaddr + (ts->tlv_idx - 1u) * MZ_TLVP_SIZE;
- u8* slot = x->tlv_ptrs_bytes + (ts->tlv_idx - 1u) * MZ_TLVP_SIZE;
- if (ts->imported) {
- if (!ts->import_idx)
- compiler_panic(x->c, no_loc(),
- "link_macho: imported TLV without matching import slot");
- wr_u64_le(slot, 0);
- FixSite fs = {3u, 1, {0}, ts->import_idx, slot_v, 0};
- fix_push(fl, &fs);
- } else {
- LinkSymbol* s = sym_at(img, ts->sym);
- u64 tgt_v = s ? s->vaddr : 0;
- wr_u64_le(slot, tgt_v);
- if (tgt_v == 0) continue; /* weak-undef descriptor → NULL */
- FixSite fs = {3u, 0, {0}, 0, slot_v, tgt_v};
- fix_push(fl, &fs);
- }
- }
-}
-
-/* ---- chained fixups blob assembler ----
- *
- * For each segment that has fixups, build a dyld_chained_starts_in_segment
- * with one chain per page (MZ_PAGE). Within a page, sort sites by
- * offset, encode each as DYLD_CHAINED_PTR_64, and link via the `next`
- * field (4-byte units, 0 = end of chain).
- */
-
-typedef struct PageChain {
- u32 first_offset_in_page; /* relative to page start */
- u32 nsites;
- u32 first_site_idx; /* into a per-segment site array */
-} PageChain;
-
-static int site_cmp_by_vaddr(const void* a, const void* b) {
- const FixSite* x = a;
- const FixSite* y = b;
- if (x->vaddr < y->vaddr) return -1;
- if (x->vaddr > y->vaddr) return 1;
- return 0;
-}
-
-/* tiny insertion sort to avoid pulling qsort */
-static void sort_sites(FixSite* a, u32 n) {
- for (u32 i = 1; i < n; ++i) {
- FixSite tmp = a[i];
- u32 j = i;
- while (j > 0 && site_cmp_by_vaddr(&a[j - 1], &tmp) > 0) {
- a[j] = a[j - 1];
- --j;
- }
- a[j] = tmp;
- }
-}
-
-static void emit_pointer(u8* slot, int is_bind, u32 ord_or_target_lo,
- u32 high_or_target_hi, u32 next4) {
- /* DYLD_CHAINED_PTR_64:
- * bind : ordinal:24, addend:8, reserved:19, next:12, bind:1=1
- * rebase: target:36 (vmaddr), high8:8, reserved:7, next:12, bind:1=0
- */
- u64 v = 0;
- if (is_bind) {
- u64 ordinal = (u64)ord_or_target_lo & 0xffffffull; /* 24 bits */
- u64 addend = 0;
- u64 next = (u64)next4 & 0xfffull;
- v = ordinal | (addend << 24) | (0ull /* reserved */ << 32) | (next << 51) |
- ((u64)1 << 63);
- } else {
- /* rebase: target is full vmaddr; we get hi:lo split. */
- u64 target = ((u64)high_or_target_hi << 32) | (u64)ord_or_target_lo;
- target &= ((u64)1 << 36) - 1u; /* 36 bits */
- u64 high8 = 0;
- u64 next = (u64)next4 & 0xfffull;
- v = target | (high8 << 36) | (0ull /* reserved */ << 44) | (next << 51) |
- ((u64)0 << 63);
- }
- wr_u64_le(slot, v);
-}
-
-static void build_chained_fixups(MCtx* x, FixList* fl) {
- Heap* h = x->h;
- MByte* out = &x->chained_fixups;
- mbuf_init(out, h);
-
- /* Header (32 B):
- * uint32 fixups_version (=0)
- * uint32 starts_offset
- * uint32 imports_offset
- * uint32 symbols_offset
- * uint32 imports_count
- * uint32 imports_format (=1)
- * uint32 symbols_format (=0)
- */
- u32 hdr_pos = mbuf_u32(out, 0); /* fixups_version */
- (void)hdr_pos;
- u32 starts_offset_pos = mbuf_u32(out, 0);
- u32 imports_offset_pos = mbuf_u32(out, 0);
- u32 symbols_offset_pos = mbuf_u32(out, 0);
- mbuf_u32(out, x->nimports_real);
- mbuf_u32(out, DYLD_CHAINED_IMPORT);
- mbuf_u32(out, 0); /* symbols uncompressed */
- /* dyld expects 8-byte alignment of the starts table. */
- mbuf_align(out, 4);
-
- /* dyld_chained_starts_in_image:
- * uint32 seg_count
- * uint32 seg_info_offset[seg_count]
- *
- * seg_count must equal mach-O segment count (5).
- * seg_info_offset[i] = 0 means no fixups in that segment.
- */
- u32 starts_off = out->len;
- wr_u32_le(out->data + starts_offset_pos, starts_off);
- mbuf_u32(out, x->nsegs);
- /* Reserve seg_info_offset[]. */
- u32 seg_info_offsets_pos = out->len;
- for (u32 i = 0; i < x->nsegs; ++i) mbuf_u32(out, 0);
-
- /* Sort fixsites by vaddr globally. */
- sort_sites(fl->a, fl->n);
-
- /* Per segment, emit dyld_chained_starts_in_segment when fixups present. */
- for (u32 si = 0; si < x->nsegs; ++si) {
- /* count sites in this segment */
- u32 first = (u32)-1, count = 0;
- for (u32 k = 0; k < fl->n; ++k) {
- if (fl->a[k].segidx == si) {
- if (first == (u32)-1) first = k;
- ++count;
- }
- }
- if (!count) continue;
- /* Page-align this struct to 4. */
- mbuf_align(out, 4);
- u32 sis_off = out->len;
- /* Patch seg_info_offset[si] to (sis_off - starts_off). */
- wr_u32_le(out->data + seg_info_offsets_pos + si * 4u, sis_off - starts_off);
-
- /* Compute page count for this segment. */
- u64 seg_va = x->segs[si].vmaddr;
- u64 seg_size = x->segs[si].vmsize ? x->segs[si].vmsize : MZ_PAGE;
- u32 page_count = (u32)((seg_size + MZ_PAGE - 1u) / MZ_PAGE);
-
- /* dyld_chained_starts_in_segment:
- * uint32 size
- * uint16 page_size
- * uint16 pointer_format
- * uint64 segment_offset (offset of segment's first byte from
- * mach_header)
- * uint32 max_valid_pointer (0 for 64-bit)
- * uint16 page_count
- * uint16 page_start[page_count] (0xFFFF = no fixups in page)
- */
- u32 sis_size_pos = mbuf_u32(out, 0); /* fill below */
- mbuf_u16(out, (u16)MZ_PAGE);
- mbuf_u16(out, (u16)DYLD_CHAINED_PTR_64);
- mbuf_u64(out, (u64)x->segs[si].fileoff); /* segment file offset */
- mbuf_u32(out, 0);
- mbuf_u16(out, (u16)page_count);
- u32 page_starts_pos = out->len;
- for (u32 p = 0; p < page_count; ++p) mbuf_u16(out, 0xFFFFu);
- /* size includes the page_start array */
- u32 sis_size = out->len - sis_size_pos + 4u;
- /* Hmm, the `size` field is the size of *this* struct. We measure
- * from sis_off through end of page_starts. */
- sis_size = out->len - sis_off;
- wr_u32_le(out->data + sis_size_pos, sis_size);
-
- /* Now: walk sites in this segment, group by page, write
- * page_start[i] = offset_in_page of first site, and chain via
- * next-field in the actual segment's bytes. */
- /* Sites are sorted globally; collect contiguous run for this seg. */
- u32 cur = first;
- while (cur < first + count) {
- u32 page_idx = (u32)((fl->a[cur].vaddr - seg_va) / MZ_PAGE);
- u32 offset_in_page = (u32)((fl->a[cur].vaddr - seg_va) % MZ_PAGE);
- wr_u16_le(out->data + page_starts_pos + page_idx * 2u,
- (u16)offset_in_page);
- /* Walk this page's chain. */
- u32 next_in_page = cur;
- while (next_in_page + 1 < first + count) {
- u64 nv = fl->a[next_in_page + 1].vaddr;
- if (nv >= seg_va + (u64)(page_idx + 1) * MZ_PAGE) break;
- ++next_in_page;
- }
- /* Encode chain pointers. */
- for (u32 k = cur; k <= next_in_page; ++k) {
- FixSite* s = &fl->a[k];
- u32 next4 = 0;
- if (k < next_in_page) {
- u64 dist = fl->a[k + 1].vaddr - s->vaddr;
- next4 = (u32)(dist / 4u);
- }
- /* Find segment bytes. Synthetic pointer sections have private
- * buffers; file-backed sections can live in any segment, including
- * pointer-bearing read-only constants in __TEXT. */
- u8* slot = NULL;
- if (s->segidx == 2 && x->got_bytes && s->vaddr >= x->got_vaddr &&
- s->vaddr < x->got_vaddr + x->got_size) {
- /* __DATA_CONST: __got slot. */
- slot = x->got_bytes + (s->vaddr - x->got_vaddr);
- } else if (x->tlv_ptrs_bytes && s->vaddr >= x->tlv_ptrs_vaddr &&
- s->vaddr < x->tlv_ptrs_vaddr + x->tlv_ptrs_size) {
- slot = x->tlv_ptrs_bytes + (s->vaddr - x->tlv_ptrs_vaddr);
- } else {
- MSec* m = msec_for_vaddr(x, s->vaddr);
- if (m && m->link_sec_id) {
- u8* base = bytes_for_section(x, m, x->img);
- if (base) {
- LinkSection* ls = &x->img->sections[m->link_sec_id - 1u];
- u32 in_off = (u32)(ls->input_offset + (s->vaddr - m->vaddr));
- slot = base + in_off;
- }
- }
- }
- if (!slot)
- compiler_panic(x->c, no_loc(),
- "link_macho: chained-fixup slot for vaddr 0x%llx not "
- "in any segment buffer",
- (unsigned long long)s->vaddr);
- if (s->is_bind) {
- /* ordinal is import index (1-based) - 1; chained-import format
- * uses 0-based. */
- if (s->import_idx == 0 || s->import_idx > x->nimports_real) {
- compiler_panic(
- x->c, no_loc(),
- "link_macho: chained bind for vaddr 0x%llx uses import index "
- "%u outside real import table size %u",
- (unsigned long long)s->vaddr, (unsigned)s->import_idx,
- (unsigned)x->nimports_real);
- }
- u32 ord = s->import_idx - 1u;
- emit_pointer(slot, 1, ord, 0, next4);
- } else {
- /* rebase target = unslid vmaddr */
- u32 lo = (u32)(s->rebase_target & 0xffffffffu);
- u32 hi = (u32)(s->rebase_target >> 32);
- emit_pointer(slot, 0, lo, hi, next4);
- }
- }
- cur = next_in_page + 1u;
- }
- }
-
- /* Imports table: one dyld_chained_import (4B) per real import.
- * Layout: lib_ordinal:8, weak:1, name_offset:23. Internal-GOT
- * entries are not bound by dyld so they're omitted here. */
- mbuf_align(out, 4);
- u32 imports_off = out->len;
- wr_u32_le(out->data + imports_offset_pos, imports_off);
- /* We need to first build the symbol pool to know name offsets. */
- u32 symbols_off = imports_off + x->nimports_real * 4u;
- /* Reserve imports area. */
- for (u32 i = 0; i < x->nimports_real; ++i) mbuf_u32(out, 0);
- /* Emit symbols (each NUL-terminated). Set name_offset on each import. */
- wr_u32_le(out->data + symbols_offset_pos, out->len);
- /* Leading NUL for offset 0. */
- mbuf_u8(out, 0);
- for (u32 i = 0; i < x->nimports_real; ++i) {
- MachImp* mi = &x->imports[i];
- Slice nm_s = pool_slice(x->c->global, mi->name);
- const char* nm = nm_s.s;
- size_t nl = nm_s.len;
- if (!nm || !nl || mi->dylib_ord == 0 || mi->dylib_ord > x->ndylibs) {
- compiler_panic(x->c, no_loc(),
- "link_macho: invalid chained import %u "
- "(name=%u dylib_ord=%u ndylibs=%u)",
- (unsigned)i, (unsigned)mi->name,
- (unsigned)mi->dylib_ord, (unsigned)x->ndylibs);
- }
- u32 off = out->len - symbols_off;
- mbuf_str(out, nm, (u32)nl);
- /* Patch the import slot. */
- u32 packed = ((u32)mi->dylib_ord & 0xffu) |
- ((u32)(mi->weak ? 1u : 0u) << 8) | ((off & 0x7fffffu) << 9);
- wr_u32_le(out->data + imports_off + i * 4u, packed);
- }
- (void)symbols_off;
-}
-
-/* ---- exports trie ---- *
- *
- * Minimal trie: one node carrying a single export "_main" with the
- * entry symbol's VA-relative offset. This is enough for dyld; binaries
- * with a real exports trie include more data but we don't need it. */
-
-static void uleb128(MByte* out, u64 v) {
- do {
- u8 byte = v & 0x7fu;
- v >>= 7;
- if (v) byte |= 0x80u;
- mbuf_u8(out, byte);
- } while (v);
-}
-
-static void build_exports_trie(MCtx* x) {
- /* Format:
- * node = (terminal_size: uleb128) (export_data)? (children_count: u8)
- * (children: [(label NUL) (offset uleb128)]*)
- *
- * We emit a trie with a single leaf at "_main" with offset
- * entry_offset (from __TEXT base).
- *
- * Easiest: single root node with children_count=1, child label = "_main",
- * child offset points to a leaf node.
- */
- MByte* out = &x->exports_trie;
- mbuf_init(out, x->h);
-
- LinkImage* img = x->img;
- LinkSymbol* esym = sym_at(img, img->entry_sym);
- if (!esym || !esym->defined) {
- /* No entry — emit a single empty terminal trie. */
- mbuf_u8(out, 0); /* terminal_size 0 */
- mbuf_u8(out, 0); /* children 0 */
- return;
- }
- Slice nm_s = pool_slice(x->c->global, esym->name);
- const char* nm = nm_s.s;
- size_t nl = nm_s.len;
- if (!nm || nl == 0) {
- mbuf_u8(out, 0);
- mbuf_u8(out, 0);
- return;
- }
- /* leaf node: terminal_size = sizeof(uleb(flags)+uleb(offset))
- * flags = 0 (regular export); offset = vaddr - __TEXT.vmaddr */
- u64 entry_off = esym->vaddr - x->text_vaddr;
-
- /* Compute leaf-node bytes length: uleb(flags=0) + uleb(offset). */
- u32 flags = 0;
- u32 leaf_payload_len;
- {
- /* count uleb bytes for flags=0 -> 1 byte */
- u32 a = 1;
- /* count uleb bytes for entry_off */
- u32 b = 0;
- u64 v = entry_off;
- do {
- ++b;
- v >>= 7;
- } while (v);
- leaf_payload_len = a + b;
- }
- /* Layout: root node first, then leaf. The root node's child entry
- * carries the absolute offset of the leaf within the trie. */
-
- /* root: terminal_size=0, children_count=1, "_main"\0, child_offset=
- * (leaf-position uleb). */
- /* We'll back-patch child_offset after we know the leaf position. */
- mbuf_u8(out, 0); /* root terminal size */
- mbuf_u8(out, 1); /* children_count */
- mbuf_str(out, nm, (u32)nl);
- /* child offset: 5 bytes max for uleb128(u32). Reserve and patch. */
- u32 child_off_pos = out->len;
- /* Reserve 5 bytes. */
- for (u32 i = 0; i < 5; ++i) mbuf_u8(out, 0);
- /* leaf node */
- u32 leaf_pos = out->len;
- /* terminal_size byte then payload */
- mbuf_u8(out, (u8)leaf_payload_len);
- uleb128(out, flags);
- uleb128(out, entry_off);
- mbuf_u8(out, 0); /* children_count */
-
- /* Patch child_offset uleb. */
- u32 v = leaf_pos;
- for (u32 i = 0; i < 5; ++i) {
- u8 b = (u8)(v & 0x7fu);
- v >>= 7;
- if (v) b |= 0x80u;
- out->data[child_off_pos + i] = b;
- if (!v && i < 4) {
- /* Remaining bytes need to be 0x00 — but we already wrote zeros;
- * we need a continuation-zero so the consumer sees 5 bytes. Set
- * top bit on lower bytes to indicate continuation, last byte = 0. */
- /* Actually: ULEB needs proper termination. Force final byte to
- * 0 with no continuation by setting bit-7=0 on the last
- * non-zero byte and also forcing remaining bytes to be 0x80
- * extension or trim. Simpler: set last byte explicitly. */
- out->data[child_off_pos + i] = (u8)(out->data[child_off_pos + i] & 0x7fu);
- for (u32 j = i + 1; j < 5; ++j) out->data[child_off_pos + j] = 0x80;
- out->data[child_off_pos + 4] = 0x00;
- break;
- }
- }
- /* Pad trie to 8 bytes. */
- mbuf_align(out, 8);
-}
-
-/* ---- symtab + strtab + indirect symtab ---- */
-
-typedef struct NlistRec {
- u32 strx;
- u8 type;
- u8 sect; /* 1-based section index (Mach-O) */
- u16 desc;
- u64 value;
-} NlistRec;
-
-static void build_symtab(MCtx* x) {
- Heap* h = x->h;
- LinkImage* img = x->img;
- mbuf_init(&x->symtab, h);
- mbuf_init(&x->strtab, h);
- mbuf_init(&x->indirect, h);
-
- /* strtab leading NUL */
- mbuf_u8(&x->strtab, 0);
-
- /* Approach:
- * - Add one local nlist per defined LinkSymbol (locals + non-imported
- * externs) — but to keep things simple we only emit external defined
- * syms (mainly _main), plus all imports as N_UNDF|N_EXT.
- *
- * Mach-O dyld requires the symtab order: locals first, ext-defs next,
- * undef last (matched by LC_DYSYMTAB ranges).
- */
-
- /* Pass A: defined externals. */
- u32 n_local = 0;
- u32 n_extdef = 0;
- u32 n_undef = 0;
-
- /* For now we emit only externals + imports. No locals. */
- /* extdef pass */
- for (u32 i = 0; i < LinkSyms_count(&img->syms); ++i) {
- LinkSymbol* s = LinkSyms_at(&img->syms, i);
- if (!s->defined) continue;
- if (s->bind != SB_GLOBAL && s->bind != SB_WEAK) continue;
- if (s->name == 0) continue;
- if (s->kind == SK_ABS) continue; /* skip abs externs */
- /* Locate which OutSec contains this vaddr to figure out n_sect.
- * n_sect is the 1-based index into the flat section_64 table the
- * file actually contains (post-coalesce), matching what we emit
- * in emit_load_command_segment. */
- u8 n_sect = 0;
- for (u32 k = 0; k < x->nouts; ++k) {
- OutSec* o = &x->outs[k];
- if (s->vaddr >= o->vaddr && s->vaddr < o->vaddr + o->size) {
- n_sect = (u8)(k + 1u);
- break;
- }
- if (s->vaddr == o->vaddr + o->size) {
- n_sect = (u8)(k + 1u);
- break;
- }
- }
- Slice nm_s = pool_slice(x->c->global, s->name);
- const char* nm = nm_s.s;
- size_t nl = nm_s.len;
- u32 strx = x->strtab.len;
- if (nm && nl) mbuf_str(&x->strtab, nm, (u32)nl);
-
- u8 t[16];
- u8 nt = N_SECT | N_EXT;
- if (s->bind == SB_WEAK) {
- /* N_WEAK_DEF in n_desc (not a flag in n_type) */
- }
- wr_u32_le(t + 0, strx);
- t[4] = nt;
- t[5] = n_sect;
- wr_u16_le(t + 6, s->bind == SB_WEAK ? N_WEAK_DEF : 0);
- wr_u64_le(t + 8, s->vaddr);
- mbuf_append(&x->symtab, t, 16);
- ++n_extdef;
- }
-
- /* undef imports — real imports only. Internal-GOT entries don't get
- * N_UNDF nlist records since they're defined in the image. */
- u32 imp_first_symtab_idx = n_extdef;
- for (u32 i = 0; i < x->nimports_real; ++i) {
- MachImp* mi = &x->imports[i];
- Slice nm_s = pool_slice(x->c->global, mi->name);
- const char* nm = nm_s.s;
- size_t nl = nm_s.len;
- u32 strx = x->strtab.len;
- if (nm && nl) mbuf_str(&x->strtab, nm, (u32)nl);
-
- u8 t[16];
- wr_u32_le(t + 0, strx);
- t[4] = N_UNDF | N_EXT;
- t[5] = 0;
- /* n_desc carries dylib ordinal in high byte (REFERENCED_DYNAMICALLY etc.)
- */
- u16 desc = (u16)(((u16)mi->dylib_ord & 0xff) << 8);
- if (mi->weak) desc |= N_WEAK_REF;
- wr_u16_le(t + 6, desc);
- wr_u64_le(t + 8, 0);
- mbuf_append(&x->symtab, t, 16);
- ++n_undef;
- }
-
- /* indirect symtab: one entry per __stubs slot, then one per __got
- * slot. Internal-GOT slots use INDIRECT_SYMBOL_LOCAL (0x80000000)
- * since they have no nlist entry. */
- u32 indirect_start = 0;
- /* Patch reserved1 of each synth OutSec. __stubs and __got are each
- * singleton OutSecs (synth sections never coalesce with user input),
- * so a sectname match identifies them unambiguously. */
- for (u32 i = 0; i < x->nouts; ++i) {
- OutSec* o = &x->outs[i];
- if (slice_eq_cstr(slice_from_cstr(o->sectname), "__stubs") && o->size) {
- o->reserved1 = indirect_start;
- for (u32 k = 0; k < x->nimports; ++k) {
- MachImp* mi = &x->imports[k];
- if (!mi->stub_idx) continue;
- u32 sym_idx = imp_first_symtab_idx + k;
- mbuf_u32(&x->indirect, sym_idx);
- ++indirect_start;
- }
- }
- }
- for (u32 i = 0; i < x->nouts; ++i) {
- OutSec* o = &x->outs[i];
- if (slice_eq_cstr(slice_from_cstr(o->sectname), "__got") && o->size) {
- o->reserved1 = indirect_start;
- for (u32 k = 0; k < x->nimports; ++k) {
- MachImp* mi = &x->imports[k];
- u32 sym_idx = mi->internal ? 0x80000000u /* INDIRECT_SYMBOL_LOCAL */
- : (imp_first_symtab_idx + k);
- mbuf_u32(&x->indirect, sym_idx);
- ++indirect_start;
- }
- }
- }
-
- x->nsyms = n_local + n_extdef + n_undef;
- (void)n_local;
- (void)imp_first_symtab_idx;
-}
-
-/* ---- LINKEDIT layout assembly ----
- *
- * Place blobs in the order Apple prefers:
- * chained_fixups, exports_trie, fn_starts, data_in_code,
- * symtab, indirect, strtab, codesig
- */
-
-static void layout_linkedit(MCtx* x) {
- /* fn_starts and data_in_code are both empty. */
- mbuf_init(&x->fn_starts, x->h);
- mbuf_init(&x->data_in_code, x->h);
- mbuf_init(&x->codesig, x->h);
-
- u64 cur = x->linkedit_fileoff;
- /* chained fixups */
- cur = ALIGN_UP(cur, 8u);
- x->chained_fixups_off = (u32)cur;
- cur += x->chained_fixups.len;
- /* exports trie */
- cur = ALIGN_UP(cur, 8u);
- x->exports_trie_off = (u32)cur;
- cur += x->exports_trie.len;
- /* function starts (empty placeholder, but allocate one byte) */
- cur = ALIGN_UP(cur, 8u);
- x->fn_starts_off = (u32)cur;
- /* data in code */
- cur = ALIGN_UP(cur, 8u);
- x->data_in_code_off = (u32)cur;
- /* symtab */
- cur = ALIGN_UP(cur, 8u);
- x->symtab_off = (u32)cur;
- cur += x->symtab.len;
- /* indirect symtab */
- cur = ALIGN_UP(cur, 4u);
- x->indirect_off = (u32)cur;
- cur += x->indirect.len;
- /* strtab */
- cur = ALIGN_UP(cur, 8u);
- x->strtab_off = (u32)cur;
- cur += x->strtab.len;
- /* code signature: end-aligned to 16 */
- cur = ALIGN_UP(cur, 16u);
- x->codesig_off = (u32)cur;
-
- /* Linkedit segment file_size includes everything up to (but not yet
- * including) codesig. Codesig is computed below. */
- u64 le_size = cur - x->linkedit_fileoff;
- /* Set linkedit segment size; will be increased after codesig. */
- x->segs[4].filesize = le_size;
- x->segs[4].vmsize = ALIGN_UP(le_size, MZ_PAGE);
- if (!x->segs[4].vmsize) x->segs[4].vmsize = MZ_PAGE;
-}
-
-/* ---- ad-hoc code signature (CodeDirectory + SuperBlob) ----
- *
- * Produces a minimal embedded SuperBlob with a single CodeDirectory.
- * The CD is sha256-hashed over CS_PAGE_SIZE_LOG2 = 4096-byte pages of
- * the file (excluding the codesig itself). The kernel verifies the
- * CD's hash chain on exec.
- *
- * Output format (in big-endian for SuperBlob/CodeDirectory headers):
- * [SuperBlob]
- * u32 magic (0xfade0cc0)
- * u32 length
- * u32 count (=1)
- * [Slot]
- * u32 type (=0 CSSLOT_CODEDIRECTORY)
- * u32 offset (=20) -- relative to start of SuperBlob
- * [CodeDirectory]
- * u32 magic (0xfade0c02)
- * u32 length (bytes including all hashes)
- * u32 version (>=0x20400 for execSeg fields)
- * u32 flags (=0 ad-hoc — actually flags must include 0x2
- * (kSecCodeSignatureAdhoc)) u32 hashOffset (offset of first slot hash) u32
- * identOffset (offset of identifier string) u32 nSpecialSlots (=0) u32
- * nCodeSlots u32 codeLimit (file bytes covered) u8 hashSize (=32) u8
- * hashType (=2 sha256) u8 platform (=0) u8 pageSize (=12 for 4096) u32
- * spare2 (=0) u32 scatterOffset (=0) u32 teamOffset (=0) u32 spare3 (=0)
- * u64 codeLimit64 (=0)
- * u64 execSegBase (=__TEXT.fileoff)
- * u64 execSegLimit (=__TEXT.filesize)
- * u64 execSegFlags (=1 main binary)
- * [identifier bytes "a.out\0"]
- * [codeslot hashes nCodeSlots * 32 B]
- *
- * Hashes computed AFTER everything else is final — including the codesig
- * blob's own offset in the file (the hash range stops just before
- * codeLimit). */
-
-static void wr_u64_be(u8* p, u64 v) {
- for (u32 i = 0; i < 8; ++i) p[7 - i] = (u8)(v >> (i * 8));
-}
-
-/* Build the codesig blob with placeholder hashes; size is precise so
- * file layout is final after this. */
-static void build_codesig_skeleton(MCtx* x, u32 code_limit, const char* ident) {
- u32 code_page = 1u << CS_PAGE_SIZE_LOG2; /* 4096 */
- u32 nslots = (code_limit + code_page - 1u) / code_page;
-
- /* CodeDirectory size:
- * header 88 bytes through execSegFlags
- * identifier (ident_len + 1)
- * hashes (nslots * 32)
- */
- u32 ident_len = (u32)slice_from_cstr(ident).len + 1u;
- u32 cd_hdr = 88u;
- u32 cd_size = cd_hdr + ident_len + nslots * CS_SHA256_LEN;
- /* SuperBlob: 12 hdr + 8 slot + cd. */
- u32 sb_size = 12u + 8u + cd_size;
-
- MByte* out = &x->codesig;
- mbuf_init(out, x->h);
- mbuf_reserve(out, sb_size);
- memset(out->data, 0, sb_size);
- out->len = sb_size;
-
- u8* sb = out->data;
- /* SuperBlob header */
- wr_u32_be(sb + 0, CS_MAGIC_EMBEDDED_SIGNATURE);
- wr_u32_be(sb + 4, sb_size);
- wr_u32_be(sb + 8, 1); /* count */
- /* slot 0: type=CSSLOT_CODEDIRECTORY, offset=20 */
- wr_u32_be(sb + 12, CSSLOT_CODEDIRECTORY);
- wr_u32_be(sb + 16, 20u);
-
- /* CodeDirectory */
- u8* cd = sb + 20;
- wr_u32_be(cd + 0, CS_MAGIC_CODEDIRECTORY);
- wr_u32_be(cd + 4, cd_size);
- wr_u32_be(cd + 8, 0x20400u); /* version with execSeg */
- wr_u32_be(cd + 12, 0x2u); /* flags = adhoc */
- wr_u32_be(cd + 16, cd_hdr + ident_len); /* hashOffset */
- wr_u32_be(cd + 20, cd_hdr); /* identOffset */
- wr_u32_be(cd + 24, 0); /* nSpecialSlots */
- wr_u32_be(cd + 28, nslots);
- wr_u32_be(cd + 32, code_limit);
- cd[36] = (u8)CS_SHA256_LEN;
- cd[37] = (u8)CS_HASHTYPE_SHA256;
- cd[38] = 0; /* platform */
- cd[39] = (u8)CS_PAGE_SIZE_LOG2;
- wr_u32_be(cd + 40, 0); /* spare2 */
- wr_u32_be(cd + 44, 0); /* scatterOffset */
- wr_u32_be(cd + 48, 0); /* teamOffset */
- wr_u32_be(cd + 52, 0); /* spare3 */
- wr_u64_be(cd + 56, 0); /* codeLimit64 */
- wr_u64_be(cd + 64, x->segs[1].fileoff); /* execSegBase */
- wr_u64_be(cd + 72, x->segs[1].filesize); /* execSegLimit */
- wr_u64_be(cd + 80, CS_EXECSEG_MAIN_BINARY);
-
- /* identifier */
- memcpy(cd + cd_hdr, ident, ident_len);
-
- x->codesig_size = sb_size;
-}
-
-static void compute_codesig(MCtx* x, const u8* full_file, u32 file_len_excl_cs,
- const char* ident) {
- u32 code_page = 1u << CS_PAGE_SIZE_LOG2;
- u32 nslots = (file_len_excl_cs + code_page - 1u) / code_page;
- u32 ident_len = (u32)slice_from_cstr(ident).len + 1u;
- u8* cd = x->codesig.data + 12 + 8;
- u8* hashes = cd + 88u + ident_len;
-
- for (u32 i = 0; i < nslots; ++i) {
- u32 off = i * code_page;
- u32 take = (off + code_page <= file_len_excl_cs) ? code_page
- : (file_len_excl_cs - off);
- Sha256 s;
- sha256_init(&s);
- sha256_update(&s, full_file + off, take);
- /* Pages shorter than code_page get the standard SHA over the
- * partial bytes — Apple's tools do exactly this (no zero padding
- * on the tail). */
- sha256_final(&s, hashes + i * CS_SHA256_LEN);
- }
-}
-
-/* ---- final emission ---- */
-
-static void emit_load_command_segment(MByte* lc, MCtx* x, u32 segidx) {
- MSeg* sg = &x->segs[segidx];
- u32 seg_cmd_size = MACHO_SEGCMD64_SIZE + sg->nouts * MACHO_SECT64_SIZE;
- u32 base = lc->len;
- mbuf_u32(lc, LC_SEGMENT_64);
- mbuf_u32(lc, seg_cmd_size);
- /* segname: 16 bytes zero-padded */
- u8 nm[16];
- memset(nm, 0, 16);
- size_t nlen = slice_from_cstr(sg->name).len;
- if (nlen > 16) nlen = 16;
- memcpy(nm, sg->name, nlen);
- mbuf_append(lc, nm, 16);
- mbuf_u64(lc, sg->vmaddr);
- mbuf_u64(lc, sg->vmsize);
- mbuf_u64(lc, sg->fileoff);
- mbuf_u64(lc, sg->filesize);
- mbuf_u32(lc, sg->maxprot);
- mbuf_u32(lc, sg->initprot);
- mbuf_u32(lc, sg->nouts);
- mbuf_u32(lc, 0); /* flags */
-
- for (u32 j = 0; j < sg->nouts; ++j) {
- OutSec* o = &x->outs[sg->first_out + j];
- u8 sname[16], gname[16];
- memset(sname, 0, 16);
- memset(gname, 0, 16);
- size_t sl = o->sectname ? slice_from_cstr(o->sectname).len : 0;
- if (sl > 16) sl = 16;
- if (sl) memcpy(sname, o->sectname, sl);
- size_t gl = slice_from_cstr(sg->name).len; /* segname must match */
- if (gl > 16) gl = 16;
- memcpy(gname, sg->name, gl);
- mbuf_append(lc, sname, 16);
- mbuf_append(lc, gname, 16);
- mbuf_u64(lc, o->vaddr);
- mbuf_u64(lc, o->size);
- mbuf_u32(lc, (u32)o->file_offset);
- /* align is power of 2; encode as log2. */
- u32 a = o->align ? o->align : 1u;
- u32 al = 0;
- while ((1u << al) < a) ++al;
- mbuf_u32(lc, al);
- mbuf_u32(lc, 0); /* reloff */
- mbuf_u32(lc, 0); /* nreloc */
- mbuf_u32(lc, o->flags);
- mbuf_u32(lc, o->reserved1);
- mbuf_u32(lc, o->reserved2);
- mbuf_u32(lc, 0); /* reserved3 */
- }
- (void)base;
-}
-
-void link_emit_macho(LinkImage* img, Writer* w);
-
-void link_emit_macho(LinkImage* img, Writer* w) {
- MCtx x;
- memset(&x, 0, sizeof(x));
- x.img = img;
- x.c = img->c;
- x.h = img->heap;
- x.w = w;
- x.linker = img->linker;
- x.arch = link_arch_desc_for(img->c);
-
- if (!x.arch || !x.arch->macho_cputype || !x.arch->emit_macho_stub ||
- !x.arch->macho_stub_size)
- compiler_panic(x.c, no_loc(),
- "link_emit_macho: no Mach-O descriptor for target");
- if (img->entry_sym == LINK_SYM_NONE)
- compiler_panic(x.c, no_loc(), "link_emit_macho: no resolved entry");
-
- collect_imports(&x);
- collect_tlv(&x);
- plan_layout(&x);
- shift_sections(&x);
-
- /* entry offset within __TEXT segment. */
- LinkSymbol* esym = sym_at(img, img->entry_sym);
- if (!esym || !esym->defined)
- compiler_panic(x.c, no_loc(), "link_emit_macho: entry symbol undefined");
- if (esym->vaddr < x.text_vaddr)
- compiler_panic(x.c, no_loc(),
- "link_emit_macho: entry symbol below __TEXT base");
- x.entry_offset = (u32)(esym->vaddr - x.text_vaddr);
-
- /* image-id UUID. */
- u8 image_id[LINK_IMAGE_ID_BYTES];
- link_image_id_compute(img, image_id);
- memcpy(x.uuid, image_id, 16);
-
- /* Reloc apply collects fixsites. */
- FixList fl;
- fix_init(&fl, x.h);
- apply_relocs(&x, &fl);
-
- /* Build LINKEDIT contents. */
- build_chained_fixups(&x, &fl);
- build_exports_trie(&x);
- build_symtab(&x);
- layout_linkedit(&x);
-
- /* Compute code-sig skeleton sized to file bytes excluding sig. */
- u32 code_limit = x.codesig_off;
- build_codesig_skeleton(&x, code_limit, "a.out");
- /* Now extend linkedit segment to include codesig. */
- u64 le_size = (u64)x.codesig_off + (u64)x.codesig_size - x.linkedit_fileoff;
- x.segs[4].filesize = le_size;
- x.segs[4].vmsize = ALIGN_UP(le_size, MZ_PAGE);
-
- /* Build load commands buffer. */
- MByte lc;
- mbuf_init(&lc, x.h);
-
- /* LC_SEGMENT_64 for each segment with sections (and PAGEZERO/LINKEDIT). */
- emit_load_command_segment(&lc, &x, 0); /* PAGEZERO */
- emit_load_command_segment(&lc, &x, 1); /* TEXT */
- if (x.segs[2].nsects > 0)
- emit_load_command_segment(&lc, &x, 2); /* DATA_CONST */
- if (x.segs[3].nsects > 0) emit_load_command_segment(&lc, &x, 3); /* DATA */
- emit_load_command_segment(&lc, &x, 4); /* LINKEDIT */
-
- /* LC_DYLD_CHAINED_FIXUPS (linkedit_data_command: 16B) */
- mbuf_u32(&lc, LC_DYLD_CHAINED_FIXUPS);
- mbuf_u32(&lc, 16);
- mbuf_u32(&lc, x.chained_fixups_off);
- mbuf_u32(&lc, x.chained_fixups.len);
-
- /* LC_DYLD_EXPORTS_TRIE */
- mbuf_u32(&lc, LC_DYLD_EXPORTS_TRIE);
- mbuf_u32(&lc, 16);
- mbuf_u32(&lc, x.exports_trie_off);
- mbuf_u32(&lc, x.exports_trie.len);
-
- /* LC_SYMTAB */
- mbuf_u32(&lc, LC_SYMTAB);
- mbuf_u32(&lc, MACHO_SYMTAB_CMD_SIZE);
- mbuf_u32(&lc, x.symtab_off);
- mbuf_u32(&lc, x.nsyms);
- mbuf_u32(&lc, x.strtab_off);
- mbuf_u32(&lc, x.strtab.len);
-
- /* LC_DYSYMTAB */
- /* nlocal=0, nextdef=#defined-globals, nundef=#imports. We tracked
- * those during build_symtab; recompute by inspecting strtab... easier
- * to recount: defined globals are total - imports. */
- u32 nlocal = 0;
- u32 nundef = x.nimports_real;
- u32 nextdef = (x.nsyms > nundef) ? x.nsyms - nundef - nlocal : 0;
- mbuf_u32(&lc, LC_DYSYMTAB);
- mbuf_u32(&lc, MACHO_DYSYMTAB_CMD_SIZE);
- mbuf_u32(&lc, 0); /* ilocalsym */
- mbuf_u32(&lc, nlocal);
- mbuf_u32(&lc, nlocal);
- mbuf_u32(&lc, nextdef);
- mbuf_u32(&lc, nlocal + nextdef);
- mbuf_u32(&lc, nundef);
- mbuf_u32(&lc, 0);
- mbuf_u32(&lc, 0); /* tocoff, ntoc */
- mbuf_u32(&lc, 0);
- mbuf_u32(&lc, 0); /* modtaboff, nmodtab */
- mbuf_u32(&lc, 0);
- mbuf_u32(&lc, 0); /* extrefsymoff, nextrefsyms */
- mbuf_u32(&lc, x.indirect_off);
- mbuf_u32(&lc, x.indirect.len / 4u);
- mbuf_u32(&lc, 0);
- mbuf_u32(&lc, 0); /* extreloff, nextrel */
- mbuf_u32(&lc, 0);
- mbuf_u32(&lc, 0); /* locreloff, nlocrel */
-
- /* LC_LOAD_DYLINKER */
- {
- const char* dyld = "/usr/lib/dyld";
- u32 dyld_len = (u32)slice_from_cstr(dyld).len;
- u32 cmd_size = (u32)ALIGN_UP((u64)(12u + dyld_len + 1u), 8u);
- mbuf_u32(&lc, LC_LOAD_DYLINKER);
- mbuf_u32(&lc, cmd_size);
- mbuf_u32(&lc, 12u); /* name offset within cmd */
- u32 wrote = mbuf_str(&lc, dyld, dyld_len);
- (void)wrote;
- /* Pad to cmd_size. */
- while (lc.len < (u32)((u64)mbuf_align(&lc, 1) + 0)) {
- /* no-op */
- break;
- }
- /* Re-align to cmd_size. */
- u32 want = (u32)(lc.len);
- /* Walk back: lc grew by 12 + (strlen+1). Pad to cmd_size. */
- u32 cmd_start_back = lc.len - (12u + dyld_len + 1u);
- u32 pad_needed = cmd_size - (lc.len - cmd_start_back);
- while (pad_needed-- > 0) mbuf_u8(&lc, 0);
- (void)want;
- }
-
- /* LC_UUID */
- mbuf_u32(&lc, LC_UUID);
- mbuf_u32(&lc, 24);
- mbuf_append(&lc, x.uuid, 16);
-
- /* LC_BUILD_VERSION */
- mbuf_u32(&lc, LC_BUILD_VERSION);
- mbuf_u32(&lc, 24);
- mbuf_u32(&lc, 1); /* PLATFORM_MACOS */
- mbuf_u32(&lc, (12u << 16) | 0); /* minos 12.0.0 */
- mbuf_u32(&lc, (12u << 16) | 0); /* sdk 12.0.0 */
- mbuf_u32(&lc, 0); /* ntools */
-
- /* LC_MAIN — entryoff is offset within __TEXT segment from its file
- * start (0). */
- mbuf_u32(&lc, LC_MAIN);
- mbuf_u32(&lc, 24);
- mbuf_u64(&lc, (u64)x.entry_offset); /* entryoff = vaddr - __TEXT.vmaddr */
- mbuf_u64(&lc, 0); /* stacksize */
-
- /* LC_LOAD_DYLIB per dylib. */
- for (u32 i = 0; i < x.ndylibs; ++i) {
- Slice nm_s = pool_slice(x.c->global, x.dylibs[i].install);
- const char* nm = nm_s.s;
- size_t nl = nm_s.len;
- u32 cmd_size = (u32)ALIGN_UP((u64)(24u + (u32)nl + 1u), 8u);
- u32 cmd_start = lc.len;
- mbuf_u32(&lc, LC_LOAD_DYLIB);
- mbuf_u32(&lc, cmd_size);
- mbuf_u32(&lc, 24u); /* name offset */
- mbuf_u32(&lc, 0); /* timestamp */
- mbuf_u32(&lc, (1u << 16)); /* current_version 1.0 */
- mbuf_u32(&lc, (1u << 16)); /* compat_version 1.0 */
- mbuf_str(&lc, nm ? nm : "", (u32)nl);
- while (lc.len - cmd_start < cmd_size) mbuf_u8(&lc, 0);
- }
-
- /* LC_FUNCTION_STARTS / LC_DATA_IN_CODE — empty. */
- mbuf_u32(&lc, LC_FUNCTION_STARTS_C);
- mbuf_u32(&lc, 16);
- mbuf_u32(&lc, x.fn_starts_off);
- mbuf_u32(&lc, 0);
-
- mbuf_u32(&lc, LC_DATA_IN_CODE_C);
- mbuf_u32(&lc, 16);
- mbuf_u32(&lc, x.data_in_code_off);
- mbuf_u32(&lc, 0);
-
- /* LC_CODE_SIGNATURE */
- mbuf_u32(&lc, LC_CODE_SIGNATURE_C);
- mbuf_u32(&lc, 16);
- mbuf_u32(&lc, x.codesig_off);
- mbuf_u32(&lc, x.codesig_size);
-
- /* Sanity: lc.len + MACHO_HDR64_SIZE must equal headers_size we
- * predicted in plan_layout. If not, we mis-sized — panic. */
- if ((u64)lc.len + MACHO_HDR64_SIZE != x.headers_size) {
- compiler_panic(x.c, no_loc(),
- "link_macho: load-cmd size mismatch: predicted %llu got %u",
- (unsigned long long)(x.headers_size - MACHO_HDR64_SIZE),
- lc.len);
- }
-
- /* ---- now stream the file ---- */
- /* The Writer in cfree allows seek; we'll write a flat buffer first
- * (so we can hash it for codesig) and flush at the end. */
- MByte file;
- mbuf_init(&file, x.h);
-
- /* mach_header_64 */
- u32 ncmds = 0;
- /* Recount: PAGEZERO + TEXT + maybe DATA_CONST + maybe DATA + LINKEDIT
- * + chained + exports_trie + symtab + dysymtab + dyld + uuid +
- * build_version + main + nDylibs + fn_starts + data_in_code +
- * codesig. */
- ncmds += 2; /* PAGEZERO + TEXT */
- if (x.segs[2].nsects > 0) ncmds++;
- if (x.segs[3].nsects > 0) ncmds++;
- ncmds++; /* LINKEDIT */
- ncmds += 11 + x.ndylibs;
- /* (chained, exports_trie, symtab, dysymtab, dyld, uuid, build_version,
- * main, fn_starts, data_in_code, codesig) = 11 */
-
- mbuf_u32(&file, MH_MAGIC_64);
- mbuf_u32(&file, x.arch->macho_cputype);
- mbuf_u32(&file, x.arch->macho_cpusubtype);
- mbuf_u32(&file, MH_EXECUTE);
- mbuf_u32(&file, ncmds);
- mbuf_u32(&file, lc.len);
- {
- u32 mh_flags = MH_DYLDLINK | MH_TWOLEVEL | MH_NOUNDEFS | MH_PIE;
- /* dyld scans __thread_vars and allocates a pthread_key for each
- * descriptor only when this flag is set; without it the descriptor's
- * thunk pointer is silently patched to _tlv_bootstrap_error. Apple's
- * ld sets it whenever the image contains S_THREAD_LOCAL_* sections. */
- if (x.ntlv) mh_flags |= MH_HAS_TLV_DESCRIPTORS;
- mbuf_u32(&file, mh_flags);
- }
- mbuf_u32(&file, 0); /* reserved */
- mbuf_append(&file, lc.data, lc.len);
-
- /* Pad to first section's file offset. */
- /* __TEXT first section begins at headers_size; we wrote header+lc =
- * headers_size, so no pad needed. Then each MSec's file_offset
- * tells us where to write its bytes. */
-
- /* Now emit segment payload bytes per MSec. */
- for (u32 i = 0; i < x.nsecs; ++i) {
- MSec* m = &x.secs[i];
- if (m->is_zerofill || m->size == 0) continue;
- /* Pad up to m->file_offset. */
- while (file.len < m->file_offset) mbuf_u8(&file, 0);
- if (m->synth_data) {
- mbuf_append(&file, m->synth_data, m->synth_size);
- } else {
- LinkSection* ls = &img->sections[m->link_sec_id - 1u];
- u32 segid = ls->segment_id;
- u8* base =
- (segid != LINK_SEG_NONE) ? img->segment_bytes[segid - 1u] : NULL;
- if (base && ls->size) {
- mbuf_append(&file, base + ls->input_offset, (u32)ls->size);
- } else if (ls->size) {
- for (u64 k = 0; k < ls->size; ++k) mbuf_u8(&file, 0);
- }
- }
- }
-
- /* Pad to LINKEDIT start. */
- while (file.len < x.linkedit_fileoff) mbuf_u8(&file, 0);
-
- /* LINKEDIT contents in declared order. */
- while (file.len < x.chained_fixups_off) mbuf_u8(&file, 0);
- mbuf_append(&file, x.chained_fixups.data, x.chained_fixups.len);
- while (file.len < x.exports_trie_off) mbuf_u8(&file, 0);
- mbuf_append(&file, x.exports_trie.data, x.exports_trie.len);
- while (file.len < x.fn_starts_off) mbuf_u8(&file, 0);
- /* fn_starts is empty */
- while (file.len < x.data_in_code_off) mbuf_u8(&file, 0);
- /* empty */
- while (file.len < x.symtab_off) mbuf_u8(&file, 0);
- mbuf_append(&file, x.symtab.data, x.symtab.len);
- while (file.len < x.indirect_off) mbuf_u8(&file, 0);
- mbuf_append(&file, x.indirect.data, x.indirect.len);
- while (file.len < x.strtab_off) mbuf_u8(&file, 0);
- mbuf_append(&file, x.strtab.data, x.strtab.len);
- while (file.len < x.codesig_off) mbuf_u8(&file, 0);
-
- /* Compute codesig hashes over file bytes [0, codesig_off). */
- /* The codesig blob currently has zero hashes; hash now. */
- compute_codesig(&x, file.data, x.codesig_off, "a.out");
- /* Append codesig. */
- mbuf_append(&file, x.codesig.data, x.codesig.len);
-
- /* Stream out. */
- cfree_writer_seek(w, 0);
- cfree_writer_write(w, file.data, file.len);
-
- /* Cleanup. */
- fix_fini(&fl);
- mbuf_fini(&lc);
- mbuf_fini(&file);
- mbuf_fini(&x.chained_fixups);
- mbuf_fini(&x.exports_trie);
- mbuf_fini(&x.symtab);
- mbuf_fini(&x.strtab);
- mbuf_fini(&x.indirect);
- mbuf_fini(&x.fn_starts);
- mbuf_fini(&x.data_in_code);
- mbuf_fini(&x.codesig);
- if (x.imports) x.h->free(x.h, x.imports, 0); /* VEC_GROW: cap unknown */
- if (x.dylibs) x.h->free(x.h, x.dylibs, 0);
- if (x.sym_to_imp)
- x.h->free(x.h, x.sym_to_imp, sizeof(u32) * x.sym_to_imp_size);
- if (x.secs) x.h->free(x.h, x.secs, 0);
- if (x.stubs_bytes) x.h->free(x.h, x.stubs_bytes, x.stubs_size);
- if (x.got_bytes) x.h->free(x.h, x.got_bytes, x.got_size);
- if (x.tlv_ptrs_bytes) x.h->free(x.h, x.tlv_ptrs_bytes, x.tlv_ptrs_size);
- if (x.tlv_slots) x.h->free(x.h, x.tlv_slots, 0);
- if (x.sym_to_tlv)
- x.h->free(x.h, x.sym_to_tlv, sizeof(u32) * x.sym_to_tlv_size);
-}
diff --git a/src/obj/coff_archive.c b/src/obj/coff/archive.c
diff --git a/src/obj/coff.h b/src/obj/coff/coff.h
diff --git a/src/obj/coff/emit.c b/src/obj/coff/emit.c
@@ -0,0 +1,732 @@
+/* PE/COFF relocatable .obj writer. Walks a finalized ObjBuilder and
+ * emits a 64-bit little-endian relocatable object via the supplied
+ * Writer. Counterpart to emit_elf / emit_macho.
+ *
+ * Layout strategy:
+ * 1. plan COFF sections (one per kept obj section), assigning
+ * Characteristics, alignment, raw size, and per-section reloc
+ * counts;
+ * 2. build the symbol table (synthesized per-section static symbols
+ * with section-definition aux records, plus file symbols and
+ * every ObjSym kept after sweep);
+ * 3. build per-section relocation records via the per-arch
+ * translator (arch_for_compiler(c)->coff->reloc_to);
+ * 4. assign file offsets:
+ * file header | section headers | (bytes + relocs)* | symtab | strtab
+ * 5. write the file in that order.
+ *
+ * 64-bit little-endian only — IMAGE_FILE_MACHINE_AMD64 (x86_64) and
+ * IMAGE_FILE_MACHINE_ARM64 (aarch64). Big-endian / ptr_size != 8 panic
+ * at entry.
+ *
+ * Section name mapping policy: we pass the cfree Section.name through
+ * verbatim to the COFF Name field. Callers / readers are expected to
+ * have stored COFF-shaped names (".text", ".rdata", ".tls$", etc.) at
+ * the obj layer; emit_coff does not rewrite ELF-style spellings like
+ * ".rodata" -> ".rdata". Names longer than 8 bytes spill into the
+ * string table with the "/<decimal-offset>" encoding.
+ *
+ * Addend handling: COFF stores the addend inline in the patched bytes
+ * (there is no addend field in IMAGE_RELOCATION). The ObjBuilder
+ * caller is responsible for having written the addend into the section
+ * bytes already — matching how MSVC / mingw emit. A nonzero
+ * Reloc::addend with has_explicit_addend set is rejected here as a
+ * known v1 limitation. */
+
+#include <string.h>
+
+#include "core/arena.h"
+#include "core/buf.h"
+#include "core/heap.h"
+#include "core/pool.h"
+#include "core/slice.h"
+#include "core/util.h"
+#include "obj/coff/coff.h"
+#include "obj/format.h"
+
+static SrcLoc no_loc(void) {
+ SrcLoc l = {0, 0, 0};
+ return l;
+}
+
+static int coff_rel32_absorbs_minus4(CfreeArchKind arch, RelocKind kind,
+ i64 addend) {
+ if (arch != CFREE_ARCH_X86_64 || addend != -4) return 0;
+ switch (kind) {
+ case R_PC32:
+ case R_REL32:
+ case R_PLT32:
+ case R_X64_PLT32:
+ case R_X64_GOTPCREL:
+ case R_X64_GOTPCRELX:
+ case R_X64_REX_GOTPCRELX:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/* ---- per-COFF-section plan record ---- */
+
+typedef struct CSec {
+ /* IMAGE_SECTION_HEADER fields (little-endian-encoded at write time). */
+ char name8[8]; /* Name field bytes; "/N" form if long name */
+ u32 virtual_size; /* nonzero for NOBITS (bss size) */
+ u32 size_of_raw_data; /* zero for NOBITS */
+ u32 pointer_to_raw_data;
+ u32 pointer_to_relocations;
+ u16 number_of_relocations;
+ u32 characteristics; /* IMAGE_SCN_* | ALIGN nibble */
+
+ /* Planning state. */
+ u32 align; /* in bytes, power of two */
+ u32 obj_sec; /* originating ObjSecId */
+ int is_nobits;
+ const Buf* obj_bytes; /* NULL when nobits */
+ u8* reloc_bytes; /* arena-allocated, nreloc * 10 bytes */
+ ObjGroupId group_id; /* OBJ_GROUP_NONE if not in a group */
+} CSec;
+
+/* ---- emit ---- */
+
+static u32 log2_align(u32 a) {
+ u32 r = 0;
+ while ((1u << r) < a) ++r;
+ return r;
+}
+
+/* Map cfree section flags/sem to IMAGE_SCN_* Characteristics, leaving
+ * the alignment nibble for the caller to OR in. */
+static u32 sec_characteristics(const Section* s, int in_group) {
+ u32 r = 0;
+ int is_bss = (s->kind == SEC_BSS) || (s->sem == SSEM_NOBITS);
+ if (s->flags & SF_EXEC) {
+ r |= IMAGE_SCN_CNT_CODE | IMAGE_SCN_MEM_EXECUTE;
+ } else if (is_bss) {
+ r |= IMAGE_SCN_CNT_UNINITIALIZED_DATA;
+ } else if (s->flags & SF_WRITE) {
+ r |= IMAGE_SCN_CNT_INITIALIZED_DATA;
+ } else if (s->flags & SF_ALLOC) {
+ /* Read-only allocated data (.rdata). */
+ r |= IMAGE_SCN_CNT_INITIALIZED_DATA;
+ }
+ if (s->flags & SF_ALLOC) r |= IMAGE_SCN_MEM_READ;
+ if (s->flags & SF_WRITE) r |= IMAGE_SCN_MEM_WRITE;
+ if (in_group) r |= IMAGE_SCN_LNK_COMDAT;
+ /* When a reader stashed format-specific flag bits on a COFF-origin
+ * section, OR them back in here. ext_type carries the raw
+ * Characteristics value (or zero if no override); ext_flags is a
+ * sibling bag for any bits the canonical mapping above would lose. */
+ if (s->ext_kind == OBJ_EXT_COFF) {
+ if (s->ext_type) {
+ /* Preserve the raw characteristics verbatim — overrides the
+ * canonical mapping. Keeps round-trip byte-stable for sections
+ * carrying CNT_INFO / LNK_REMOVE / MEM_DISCARDABLE / etc. */
+ r = s->ext_type & ~IMAGE_SCN_ALIGN_MASK;
+ }
+ r |= s->ext_flags;
+ }
+ return r;
+}
+
+/* Append `len` bytes of `s` followed by a single NUL to `b`, returning
+ * the offset at which `s` was placed. Dedupe linearly — strtabs are
+ * small enough that this is fine without a hash table, and the
+ * dedupe matches what binutils / llvm-objcopy emit. Mirror of the
+ * helper in elf_emit. */
+static u32 strtab_add(Buf* b, const char* s, u32 len) {
+ if (len == 0) return 0;
+ u32 total = buf_pos(b);
+ if (total > len) {
+ u8 stack[256];
+ u8* tmp =
+ total <= sizeof stack ? stack : (u8*)b->heap->alloc(b->heap, total, 1);
+ if (tmp) {
+ buf_flatten(b, tmp);
+ /* Skip the first 4 bytes (the size-prefix placeholder) when
+ * searching for matches. */
+ u32 start = COFF_STRTAB_SIZE_FIELD_BYTES;
+ if (total > start + len) {
+ for (u32 i = start; i + len < total; ++i) {
+ if (tmp[i + len] == 0 && memcmp(tmp + i, s, len) == 0) {
+ if (tmp != stack) b->heap->free(b->heap, tmp, total);
+ return i;
+ }
+ }
+ }
+ if (tmp != stack) b->heap->free(b->heap, tmp, total);
+ }
+ }
+ u32 off = total;
+ buf_write(b, s, len);
+ {
+ u8 z = 0;
+ buf_write(b, &z, 1);
+ }
+ return off;
+}
+
+/* Encode an 8-byte Name field. If the name fits in 8 bytes, copy
+ * verbatim and zero-pad. Otherwise allocate the name in `strtab` and
+ * write "/<decimal-offset>" (NUL-padded to 8 bytes). */
+static void encode_name8(char out[8], const char* name, u32 nlen, Buf* strtab) {
+ memset(out, 0, 8);
+ if (nlen <= 8) {
+ if (nlen) memcpy(out, name, nlen);
+ return;
+ }
+ u32 off = strtab_add(strtab, name, nlen);
+ /* "/<decimal-offset>" — up to 7 decimal digits leaves room for the
+ * leading slash within 8 bytes. COFF .obj strtabs are < 1 MiB in
+ * practice, so 7 digits is plenty. */
+ char tmp[16];
+ int n = 0;
+ tmp[n++] = '/';
+ /* Decimal-format off into tmp+1. */
+ char dig[12];
+ int d = 0;
+ u32 v = off;
+ if (v == 0) {
+ dig[d++] = '0';
+ } else {
+ while (v) {
+ dig[d++] = (char)('0' + (v % 10u));
+ v /= 10u;
+ }
+ }
+ while (d > 0 && n < (int)sizeof tmp) tmp[n++] = dig[--d];
+ if (n > 8) n = 8;
+ memcpy(out, tmp, (size_t)n);
+}
+
+/* Write one 18-byte IMAGE_SYMBOL record into `dst`. */
+static void wr_sym(u8* dst, const char ShortName[8], u32 Zeroes, u32 Offset,
+ u32 Value, i16 SectionNumber, u16 Type, u8 StorageClass,
+ u8 NumberOfAuxSymbols) {
+ if (Zeroes == 0 && Offset != 0) {
+ /* LongName form: 4 zero bytes then 4-byte LE strtab offset. */
+ memset(dst, 0, 4);
+ wr_u32_le(dst + 4, Offset);
+ } else {
+ memcpy(dst, ShortName, 8);
+ }
+ wr_u32_le(dst + 8, Value);
+ wr_u16_le(dst + 12, (u16)SectionNumber);
+ wr_u16_le(dst + 14, Type);
+ dst[16] = StorageClass;
+ dst[17] = NumberOfAuxSymbols;
+}
+
+/* Write a section-definition aux record (18 bytes). */
+static void wr_aux_secdef(u8* dst, u32 Length, u16 NumberOfRelocations,
+ u16 NumberOfLinenumbers, u32 CheckSum, u16 Number,
+ u8 Selection) {
+ wr_u32_le(dst + 0, Length);
+ wr_u16_le(dst + 4, NumberOfRelocations);
+ wr_u16_le(dst + 6, NumberOfLinenumbers);
+ wr_u32_le(dst + 8, CheckSum);
+ wr_u16_le(dst + 12, Number);
+ dst[14] = Selection;
+ dst[15] = 0;
+ dst[16] = 0;
+ dst[17] = 0;
+}
+
+/* Write a weak-externals aux record (18 bytes). */
+static void wr_aux_weak(u8* dst, u32 TagIndex, u32 Characteristics) {
+ wr_u32_le(dst + 0, TagIndex);
+ wr_u32_le(dst + 4, Characteristics);
+ memset(dst + 8, 0, 10);
+}
+
+/* Look up the pool-interned string for a Sym. */
+static const char* sym_to_str(Compiler* c, Sym n, u32* len_out) {
+ Slice sl = pool_slice(c->global, n);
+ const char* s = sl.s;
+ if (!s) {
+ *len_out = 0;
+ return "";
+ }
+ *len_out = (u32)sl.len;
+ return s;
+}
+
+void emit_coff(Compiler* c, ObjBuilder* ob, Writer* w) {
+ Heap* h = (Heap*)c->ctx->heap;
+
+ /* Tombstone sweep — see obj_sweep_dead. */
+ obj_sweep_dead(ob);
+
+ /* ---- target validation ----------------------------------------- */
+ const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_COFF);
+ const ObjCoffArchOps* coff =
+ fmt && fmt->coff_arch ? fmt->coff_arch(c->target.arch) : NULL;
+ if (!coff || !coff->reloc_to) {
+ compiler_panic(c, no_loc(), "emit_coff: unsupported target arch %u",
+ (u32)c->target.arch);
+ }
+ u16 machine = coff->machine;
+ u32 (*reloc_to)(u32) = coff->reloc_to;
+ if (c->target.big_endian) {
+ compiler_panic(c, no_loc(), "emit_coff: big-endian COFF not supported");
+ }
+ if (c->target.ptr_size != 8) {
+ compiler_panic(c, no_loc(), "emit_coff: ptr_size %u (expected 8)",
+ (u32)c->target.ptr_size);
+ }
+
+ /* ---- pass 1: plan sections ------------------------------------- */
+ u32 nobjsec = obj_section_count(ob);
+ CSec* secs = arena_zarray(c->scratch, CSec, nobjsec ? nobjsec : 1);
+ u32* obj_to_coff = arena_zarray(c->scratch, u32, nobjsec ? nobjsec : 1);
+ u32 nsecs = 0;
+
+ /* String table — leading 4-byte size placeholder. Real strings start
+ * at offset 4. */
+ Buf strtab;
+ buf_init(&strtab, h);
+ {
+ u8 zero4[COFF_STRTAB_SIZE_FIELD_BYTES] = {0, 0, 0, 0};
+ buf_write(&strtab, zero4, COFF_STRTAB_SIZE_FIELD_BYTES);
+ }
+
+ for (u32 i = 1; i < nobjsec; ++i) {
+ const Section* s = obj_section_get(ob, i);
+ if (s->removed) continue;
+ /* Skip ELF-style synthetic sections (a reader from another format
+ * may have surfaced them) — COFF stores symtab/strtab/relocs
+ * out-of-band, not as named sections. */
+ if (s->sem == SSEM_SYMTAB || s->sem == SSEM_STRTAB || s->sem == SSEM_RELA ||
+ s->sem == SSEM_REL || s->sem == SSEM_GROUP) {
+ continue;
+ }
+
+ CSec* cs = &secs[nsecs];
+ u32 nlen;
+ const char* nm = sym_to_str(c, s->name, &nlen);
+ encode_name8(cs->name8, nm, nlen, &strtab);
+
+ cs->obj_sec = i;
+ cs->group_id = s->group_id;
+ cs->align = s->align ? s->align : 1;
+
+ int in_group = (s->group_id != OBJ_GROUP_NONE);
+ u32 ch = sec_characteristics(s, in_group);
+ /* Alignment lives in bits 20..23. Cap at log2(8192)=13 -> nibble
+ * value 14 (IMAGE_SCN_ALIGN_8192BYTES). */
+ u32 lg = log2_align(cs->align);
+ if (lg > 13) lg = 13;
+ ch &= ~IMAGE_SCN_ALIGN_MASK;
+ ch |= IMAGE_SCN_ALIGN_FROM_LOG2(lg);
+ cs->characteristics = ch;
+
+ if (s->sem == SSEM_NOBITS || s->kind == SEC_BSS) {
+ cs->is_nobits = 1;
+ cs->virtual_size = s->bss_size;
+ cs->size_of_raw_data = 0;
+ cs->obj_bytes = NULL;
+ } else {
+ cs->is_nobits = 0;
+ cs->virtual_size = 0;
+ cs->size_of_raw_data = s->bytes.total;
+ cs->obj_bytes = &s->bytes;
+ }
+
+ obj_to_coff[i] = nsecs + 1; /* 1-based; matches SectionNumber. */
+ nsecs++;
+ }
+
+ /* ---- pass 2: count and assign per-section reloc counts --------- */
+ /* COFF stores NumberOfRelocations as u16; sections with > 65535
+ * relocs use the IMAGE_SCN_LNK_NRELOC_OVFL extension which we don't
+ * implement in v1. Panic if any single section exceeds the limit. */
+ u32 total_relocs = obj_reloc_total(ob);
+ for (u32 ci = 0; ci < nsecs; ++ci) {
+ CSec* cs = &secs[ci];
+ u32 nr = obj_reloc_count(ob, cs->obj_sec);
+ if (nr > 0xFFFFu) {
+ compiler_panic(c, no_loc(),
+ "emit_coff: section %u has %u relocs (max 65535)",
+ (u32)cs->obj_sec, nr);
+ }
+ cs->number_of_relocations = (u16)nr;
+ }
+
+ /* ---- pass 3: build the symbol table ---------------------------- */
+ /* Count ObjSyms (incl. tombstoned — we'll skip those when emitting). */
+ u32 nobjsym = 0;
+ {
+ ObjSymIter* it = obj_symiter_new(ob);
+ ObjSymEntry e;
+ while (obj_symiter_next(it, &e)) ++nobjsym;
+ obj_symiter_free(it);
+ }
+
+ /* Upper bound on symbol-table records (including aux slots):
+ * - 2 records per section symbol (primary + 1 aux secdef)
+ * - 2 records per ObjSym (primary + up to 1 weak aux)
+ * - +2 spare for safety
+ * Worst case is generous; we trim by tracking nrecords as we emit. */
+ u32 max_records = 2u * nsecs + 2u * nobjsym + 4u;
+ u8* symtab =
+ (u8*)arena_zarray(c->scratch, u8, (size_t)COFF_SYMBOL_SIZE * max_records);
+ u32 nrecords = 0;
+
+ /* obj_id -> COFF symbol index (including aux slots). Index 0 is
+ * reserved as "none" in our internal map (a real COFF symbol may
+ * legitimately live at index 0, but no ObjSym ever maps there since
+ * we never put OBJ_SYM_NONE through). */
+ u32* sym_to_coff = arena_zarray(c->scratch, u32, nobjsym + 2);
+
+ /* Section symbols first — one STATIC per kept obj section, each
+ * followed by a SECTION DEFINITION aux. Reloc-against-section in
+ * other tools' output uses these; emitting them unconditionally
+ * matches what clang / mingw emit and gives readers a stable target. */
+ u32* secsym_index = arena_zarray(c->scratch, u32, nsecs + 1);
+ for (u32 ci = 0; ci < nsecs; ++ci) {
+ CSec* cs = &secs[ci];
+ char short_name[8];
+ /* The section symbol's name is the section's own name (truncated
+ * to 8 bytes — section symbols never use the strtab spill form in
+ * MSVC/clang output). */
+ memcpy(short_name, cs->name8, 8);
+
+ u8* slot = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE;
+ wr_sym(slot, short_name, /*Zeroes*/ 1, /*Offset*/ 0,
+ /*Value*/ 0,
+ /*SectionNumber*/ (i16)(ci + 1),
+ /*Type*/ IMAGE_SYM_TYPE_NULL,
+ /*StorageClass*/ IMAGE_SYM_CLASS_STATIC,
+ /*NumberOfAuxSymbols*/ 1);
+ secsym_index[ci] = nrecords;
+ nrecords++;
+
+ /* Section-definition aux. For COMDAT members we encode the
+ * Selection from the group; default to SELECT_ANY which is what
+ * gcc/clang emit unless the user requests a specific selection
+ * mode. The associated-section Number is left at 0 (cfree does
+ * not produce associative-COMDAT chains today). */
+ u8 selection = 0;
+ if (cs->group_id != OBJ_GROUP_NONE) {
+ const ObjGroup* g = obj_group_get(ob, cs->group_id);
+ if (g && !g->removed) {
+ selection = g->flags ? (u8)IMAGE_COMDAT_SELECT_ANY
+ : (u8)IMAGE_COMDAT_SELECT_ANY;
+ }
+ }
+ u8* aux = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE;
+ wr_aux_secdef(aux, /*Length*/ cs->size_of_raw_data,
+ /*NumberOfRelocations*/ cs->number_of_relocations,
+ /*NumberOfLinenumbers*/ 0,
+ /*CheckSum*/ 0,
+ /*Number*/ 0,
+ /*Selection*/ selection);
+ nrecords++;
+ }
+
+ /* File / regular symbols. */
+ {
+ ObjSymIter* it = obj_symiter_new(ob);
+ ObjSymEntry e;
+ while (obj_symiter_next(it, &e)) {
+ const ObjSym* s = e.sym;
+ if (s->removed) continue;
+ if (s->kind == SK_IFUNC) {
+ compiler_panic(c, no_loc(),
+ "emit_coff: SK_IFUNC has no PE/COFF representation");
+ }
+ /* Don't re-emit SK_SECTION symbols — section symbols are
+ * synthesized above. Map any input-side SK_SECTION onto the
+ * already-emitted one. */
+ if (s->kind == SK_SECTION) {
+ if (s->section_id && s->section_id < nobjsec) {
+ u32 ci = obj_to_coff[s->section_id];
+ if (ci) sym_to_coff[e.id] = secsym_index[ci - 1];
+ }
+ continue;
+ }
+
+ u32 nlen;
+ const char* nm = sym_to_str(c, s->name, &nlen);
+
+ if (s->kind == SK_FILE) {
+ /* File symbol: name ".file" (short), section IMAGE_SYM_DEBUG,
+ * storage class FILE, followed by aux records carrying the
+ * NUL-padded file path (18 bytes per aux). */
+ u32 file_len = nlen;
+ u32 naux =
+ file_len ? (file_len + COFF_AUX_FILE_SIZE - 1u) / COFF_AUX_FILE_SIZE
+ : 1u;
+ char short_name[8] = {'.', 'f', 'i', 'l', 'e', 0, 0, 0};
+ u8* slot = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE;
+ wr_sym(slot, short_name, 1, 0, /*Value*/ 0,
+ /*SectionNumber*/ (i16)IMAGE_SYM_DEBUG,
+ /*Type*/ IMAGE_SYM_TYPE_NULL,
+ /*StorageClass*/ IMAGE_SYM_CLASS_FILE,
+ /*NumberOfAuxSymbols*/ (u8)naux);
+ sym_to_coff[e.id] = nrecords;
+ nrecords++;
+ for (u32 a = 0; a < naux; ++a) {
+ u8* aux = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE;
+ memset(aux, 0, COFF_AUX_FILE_SIZE);
+ u32 off = a * COFF_AUX_FILE_SIZE;
+ u32 copy = file_len > off ? file_len - off : 0;
+ if (copy > COFF_AUX_FILE_SIZE) copy = COFF_AUX_FILE_SIZE;
+ if (copy) memcpy(aux, nm + off, copy);
+ nrecords++;
+ }
+ continue;
+ }
+
+ /* Regular symbol. */
+ char short_name[8];
+ u32 zeroes = 1, offset = 0;
+ memset(short_name, 0, 8);
+ if (nlen <= 8) {
+ if (nlen) memcpy(short_name, nm, nlen);
+ } else {
+ zeroes = 0;
+ offset = strtab_add(&strtab, nm, nlen);
+ }
+
+ i16 section_number = 0;
+ u32 value = 0;
+ u8 storage_class = IMAGE_SYM_CLASS_NULL;
+ u16 type = IMAGE_SYM_TYPE_NULL;
+ u8 naux = 0;
+ int emit_weak_aux = 0;
+
+ switch (s->kind) {
+ case SK_ABS:
+ section_number = (i16)IMAGE_SYM_ABSOLUTE;
+ value = (u32)s->value;
+ break;
+ case SK_COMMON:
+ /* COFF lacks a per-common alignment field; encode size in
+ * Value with SectionNumber=UNDEFINED and rely on the linker
+ * to pick a default alignment. (cfree's frontend uses
+ * COMMON only via __attribute__((common)) which is rare on
+ * PE/COFF targets.) */
+ section_number = (i16)IMAGE_SYM_UNDEFINED;
+ value = (u32)s->size;
+ break;
+ default:
+ if (s->section_id == OBJ_SEC_NONE) {
+ section_number = (i16)IMAGE_SYM_UNDEFINED;
+ value = 0;
+ } else if (s->section_id < nobjsec && obj_to_coff[s->section_id]) {
+ section_number = (i16)obj_to_coff[s->section_id];
+ value = (u32)s->value;
+ } else {
+ section_number = (i16)IMAGE_SYM_UNDEFINED;
+ value = 0;
+ }
+ break;
+ }
+
+ if (s->kind == SK_FUNC) type = (u16)COFF_SYM_TYPE_FUNCTION;
+
+ switch (s->bind) {
+ case SB_LOCAL:
+ storage_class = IMAGE_SYM_CLASS_STATIC;
+ break;
+ case SB_GLOBAL:
+ storage_class = IMAGE_SYM_CLASS_EXTERNAL;
+ break;
+ case SB_WEAK:
+ /* mingw / clang spell weak as EXTERNAL with a WeakExternal
+ * aux that points at the fallback symbol. cfree's obj layer
+ * doesn't carry a separate fallback symbol today, so we emit
+ * a self-referential weak aux (TagIndex=0) which the linker
+ * treats as "weak, no fallback" — equivalent to ELF STB_WEAK. */
+ storage_class = IMAGE_SYM_CLASS_WEAK_EXTERNAL;
+ emit_weak_aux = 1;
+ naux = 1;
+ break;
+ default:
+ storage_class = IMAGE_SYM_CLASS_STATIC;
+ break;
+ }
+
+ u8* slot = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE;
+ wr_sym(slot, short_name, zeroes, offset, value, section_number, type,
+ storage_class, naux);
+ sym_to_coff[e.id] = nrecords;
+ nrecords++;
+ if (emit_weak_aux) {
+ u8* aux = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE;
+ wr_aux_weak(aux, /*TagIndex*/ 0,
+ /*Characteristics*/ IMAGE_WEAK_EXTERN_SEARCH_LIBRARY);
+ nrecords++;
+ }
+ }
+ obj_symiter_free(it);
+ }
+
+ /* ---- pass 4: build per-section relocation tables --------------- */
+ for (u32 ci = 0; ci < nsecs; ++ci) {
+ CSec* cs = &secs[ci];
+ u32 nr = cs->number_of_relocations;
+ if (!nr) continue;
+ u8* buf = (u8*)arena_alloc(c->scratch, (size_t)COFF_RELOC_SIZE * nr,
+ _Alignof(u32));
+ u32 j = 0;
+ for (u32 ri = 0; ri < total_relocs; ++ri) {
+ const Reloc* r = obj_reloc_at(ob, ri);
+ if (r->removed) continue;
+ if (r->section_id != cs->obj_sec) continue;
+ if (r->sym == OBJ_SYM_NONE) {
+ compiler_panic(c, no_loc(),
+ "emit_coff: reloc without symbol not supported "
+ "(sec=%u offset=%u kind=%u)",
+ (u32)r->section_id, (u32)r->offset, (u32)r->kind);
+ }
+ if (r->has_explicit_addend && r->addend != 0 &&
+ !coff_rel32_absorbs_minus4(c->target.arch, (RelocKind)r->kind,
+ r->addend)) {
+ /* v1 limitation: COFF carries the addend in the patched bytes,
+ * and we don't currently mutate the obj's section bytes to
+ * encode a separate explicit addend. cfree's MCEmitter writes
+ * the addend inline for COFF targets, so this branch only
+ * fires for inputs synthesized by external tools. */
+ compiler_panic(c, no_loc(),
+ "emit_coff: explicit nonzero addend not supported "
+ "(sec=%u offset=%u kind=%u addend=%lld)",
+ (u32)r->section_id, (u32)r->offset, (u32)r->kind,
+ (long long)r->addend);
+ }
+ u32 wire = reloc_to(r->kind);
+ /* Both arch translators use 0 (IMAGE_REL_*_ABSOLUTE) as the
+ * unsupported-input sentinel; treat that as a panic unless the
+ * input really is R_NONE. */
+ if (wire == 0 && r->kind != R_NONE) {
+ compiler_panic(c, no_loc(),
+ "emit_coff: unsupported relocation kind %u for arch %u",
+ (u32)r->kind, (u32)c->target.arch);
+ }
+ u32 sym_idx = sym_to_coff[r->sym];
+ u8* slot = buf + (size_t)j * COFF_RELOC_SIZE;
+ wr_u32_le(slot + 0, r->offset);
+ wr_u32_le(slot + 4, sym_idx);
+ wr_u16_le(slot + 8, (u16)wire);
+ ++j;
+ }
+ cs->reloc_bytes = buf;
+ /* If a tombstoned reloc was skipped between count and emit, j may
+ * be less than nr; trust the latter count for the wire field. */
+ if (j != nr) cs->number_of_relocations = (u16)j;
+ }
+
+ /* ---- pass 5: assign file offsets ------------------------------- */
+ /* Layout:
+ * [file header] [section headers] [per-section: bytes, relocs]*
+ * [symbol table] [string table] */
+ u64 cur =
+ (u64)COFF_FILE_HEADER_SIZE + (u64)COFF_SECTION_HEADER_SIZE * (u64)nsecs;
+
+ for (u32 ci = 0; ci < nsecs; ++ci) {
+ CSec* cs = &secs[ci];
+ /* Raw data offset. NOBITS contributes nothing on disk. */
+ if (cs->is_nobits || cs->size_of_raw_data == 0) {
+ cs->pointer_to_raw_data = 0;
+ } else {
+ cur = ALIGN_UP(cur, (u64)cs->align);
+ cs->pointer_to_raw_data = (u32)cur;
+ cur += cs->size_of_raw_data;
+ }
+ /* Reloc table. COFF doesn't mandate alignment for the reloc array,
+ * but llvm and binutils emit them naturally byte-packed; we 4-align
+ * for tidiness. */
+ if (cs->number_of_relocations) {
+ cur = ALIGN_UP(cur, (u64)4);
+ cs->pointer_to_relocations = (u32)cur;
+ cur += (u64)cs->number_of_relocations * COFF_RELOC_SIZE;
+ } else {
+ cs->pointer_to_relocations = 0;
+ }
+ }
+
+ cur = ALIGN_UP(cur, (u64)4);
+ u64 symtab_off = cur;
+ cur += (u64)nrecords * COFF_SYMBOL_SIZE;
+
+ /* String table starts immediately after the symtab. Patch the 4-byte
+ * size prefix (inclusive). */
+ u32 strtab_size = buf_pos(&strtab);
+ /* The size field is part of the on-disk strtab and is the total
+ * inclusive byte count. Patch it now. */
+ {
+ u8 sz_le[4];
+ wr_u32_le(sz_le, strtab_size);
+ /* Buf doesn't expose in-place patch; flatten, patch, re-emit when
+ * we write. Just remember the value. */
+ (void)sz_le;
+ }
+ u64 strtab_off = cur;
+ cur += strtab_size;
+
+ /* ---- pass 6: write the file ------------------------------------ */
+ cfree_writer_seek(w, 0);
+
+ /* IMAGE_FILE_HEADER */
+ coff_wr_u16(w, machine);
+ coff_wr_u16(w, (u16)nsecs);
+ coff_wr_u32(w, 0); /* TimeDateStamp: reproducible */
+ coff_wr_u32(w, (u32)symtab_off);
+ coff_wr_u32(w, nrecords);
+ coff_wr_u16(w, 0); /* SizeOfOptionalHeader: 0 for .obj */
+ coff_wr_u16(w, IMAGE_FILE_LARGE_ADDRESS_AWARE);
+
+ /* Section headers — one 40-byte block immediately after the file
+ * header. */
+ for (u32 ci = 0; ci < nsecs; ++ci) {
+ const CSec* cs = &secs[ci];
+ cfree_writer_write(w, cs->name8, 8);
+ coff_wr_u32(w, cs->virtual_size);
+ coff_wr_u32(w, 0); /* VirtualAddress: 0 for .obj */
+ coff_wr_u32(w, cs->size_of_raw_data);
+ coff_wr_u32(w, cs->pointer_to_raw_data);
+ coff_wr_u32(w, cs->pointer_to_relocations);
+ coff_wr_u32(w, 0); /* PointerToLinenumbers: 0 */
+ coff_wr_u16(w, cs->number_of_relocations);
+ coff_wr_u16(w, 0); /* NumberOfLinenumbers: 0 */
+ coff_wr_u32(w, cs->characteristics);
+ }
+
+ /* Section bytes + relocs (interleaved). */
+ for (u32 ci = 0; ci < nsecs; ++ci) {
+ const CSec* cs = &secs[ci];
+ if (!cs->is_nobits && cs->size_of_raw_data && cs->obj_bytes) {
+ cfree_writer_seek(w, cs->pointer_to_raw_data);
+ u32 sz = cs->obj_bytes->total;
+ u8* tmp = (u8*)h->alloc(h, sz ? sz : 1, 1);
+ if (sz) buf_flatten(cs->obj_bytes, tmp);
+ cfree_writer_write(w, tmp, sz);
+ h->free(h, tmp, sz ? sz : 1);
+ }
+ if (cs->number_of_relocations && cs->reloc_bytes) {
+ cfree_writer_seek(w, cs->pointer_to_relocations);
+ cfree_writer_write(w, cs->reloc_bytes,
+ (size_t)cs->number_of_relocations * COFF_RELOC_SIZE);
+ }
+ }
+
+ /* Symbol table. */
+ cfree_writer_seek(w, symtab_off);
+ cfree_writer_write(w, symtab, (size_t)nrecords * COFF_SYMBOL_SIZE);
+
+ /* String table: 4-byte total size (inclusive) followed by the body.
+ * `strtab` was initialized with 4 placeholder zero bytes; rewrite
+ * them with the real size before flushing. */
+ {
+ u8* flat = (u8*)arena_alloc(c->scratch, strtab_size ? strtab_size : 1, 1);
+ if (strtab_size) buf_flatten(&strtab, flat);
+ /* Patch the 4-byte size prefix in place. */
+ if (strtab_size >= COFF_STRTAB_SIZE_FIELD_BYTES) {
+ wr_u32_le(flat, strtab_size);
+ }
+ cfree_writer_seek(w, strtab_off);
+ cfree_writer_write(w, flat, strtab_size);
+ }
+ buf_fini(&strtab);
+}
diff --git a/src/obj/coff/link.c b/src/obj/coff/link.c
@@ -0,0 +1,1731 @@
+/* link_emit_coff: write a PE32+ MH_EXECUTABLE-style image to the
+ * caller-provided Writer.
+ *
+ * Phase 3.1 deliverable per doc/WINDOWS.md: skeleton + base-reloc
+ * handling for the four standard PE sections. Import-table synthesis
+ * (.idata / IAT) lands in Phase 3.2; per-arch IAT stub bytes in 3.3;
+ * TLS directory in 3.5; debug directory in 3.6 — those code paths
+ * panic loudly here so the strict-by-default posture surfaces them.
+ *
+ * File layout (in write order):
+ *
+ * [DOS stub IMAGE_DOS_HEADER] -- 64 bytes; e_lfanew=0x40
+ * [PE signature "PE\0\0"] -- 4 bytes
+ * [IMAGE_FILE_HEADER] -- 20 bytes
+ * [IMAGE_OPTIONAL_HEADER64] -- 240 bytes (PE32+)
+ * [IMAGE_SECTION_HEADER * nsec] -- 40 bytes each
+ * [pad to FileAlignment]
+ * [.text bytes, padded to FileAlignment]
+ * [.rdata bytes, padded to FileAlignment]
+ * [.data bytes, padded to FileAlignment]
+ * [.reloc bytes, padded to FileAlignment]
+ *
+ * .bss is uninitialized — it has a section header (with VirtualSize)
+ * but no file bytes and PointerToRawData=0.
+ *
+ * RVAs follow SectionAlignment (0x1000); FileAlignment is 0x200; the
+ * first section starts at RVA 0x1000 (right after the headers map).
+ * ImageBase is the Win64 convention 0x140000000.
+ *
+ * Reloc strategy. The link layout pass has already placed every kept
+ * input section into img->sections / img->segments under the ELF/Mach-O
+ * coordinate system (image-relative vaddrs, often packed by permission
+ * bucket). COFF wants a different packing — the four standard
+ * sections at SectionAlignment-aligned RVAs — so this writer re-derives
+ * per-input-section vaddrs from scratch and shifts each LinkSection /
+ * symbol / LinkRelocApply by its section's per-section delta before
+ * applying relocations. link_emit_macho takes the same tack for its
+ * __DATA_CONST splits; the ELF writer leaves vaddrs alone because the
+ * link layout already matches ELF's PT_LOAD shape. */
+
+#include "link/link.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "core/core.h"
+#include "core/heap.h"
+#include "core/pool.h"
+#include "core/slice.h"
+#include "core/util.h"
+#include "core/vec.h"
+#include "link/link_internal.h"
+#include "obj/coff/coff.h"
+#include "obj/format.h"
+
+/* ---- .idata layout constants ----
+ *
+ * Per doc/WINDOWS.md §3.2: the .idata content is a concatenation of an
+ * IMAGE_IMPORT_DESCRIPTOR table (NULL-terminated), one ILT per DLL
+ * (each NULL-terminated u64 array), one IAT per DLL (same shape),
+ * a hint/name table, and a DLL-name string pool. Each block is
+ * pointer-sized aligned within the section. AArch64 import thunks use
+ * PAGEOFFSET_12L for 64-bit ILT/IAT slots, so those sub-blocks must be
+ * 8-byte aligned. */
+#define PE_IDATA_BLOCK_ALIGN 8u
+/* Hint field on IMAGE_IMPORT_BY_NAME records. cfree never has a real
+ * hint (the OS loader doesn't need one to do the bsearch on the DLL's
+ * export name table), so 0 is the canonical "no hint" value. */
+#define PE_IMPORT_HINT_NONE 0u
+
+static SrcLoc no_loc(void) {
+ SrcLoc l = {0, 0, 0};
+ return l;
+}
+
+/* ---- PE/Win64 layout constants ----
+ *
+ * Centralised here so the wire-format numbers in this TU stay named
+ * (and the magic-numbers rule in CLAUDE.md is honoured). Values match
+ * the PE/COFF spec + Win64 conventions; mingw-w64's ld defaults agree. */
+#define PE_IMAGE_BASE LINK_PE_IMAGE_BASE
+#define PE_SECTION_ALIGNMENT 0x1000u
+#define PE_FILE_ALIGNMENT 0x200u
+#define PE_FIRST_SECTION_RVA 0x1000u
+#define PE_DOS_E_LFANEW 0x40u
+#define PE_NUM_DATA_DIRS COFF_NUM_DATA_DIRECTORIES
+#define PE_OPT_HDR_SIZE COFF_OPT_HDR64_SIZE
+#define PE_LINKER_MAJOR 0u
+#define PE_LINKER_MINOR 1u
+#define PE_OS_MAJOR 6u /* Windows Vista+ — mingw default */
+#define PE_OS_MINOR 0u
+#define PE_SUBSYS_MAJOR 6u
+#define PE_SUBSYS_MINOR 0u
+#define PE_STACK_RESERVE 0x100000ULL
+#define PE_STACK_COMMIT 0x1000ULL
+#define PE_HEAP_RESERVE 0x100000ULL
+#define PE_HEAP_COMMIT 0x1000ULL
+#define PE_DLL_CHARS \
+ (IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA | \
+ IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE | \
+ IMAGE_DLLCHARACTERISTICS_NX_COMPAT | \
+ IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE)
+
+/* PE32+ DOS-stub-to-PE-signature offsets (manual, since we marshal
+ * field-by-field rather than memcpy'ing the packed struct). */
+#define PE_DOS_HDR_SIZE COFF_DOS_HEADER_SIZE
+#define PE_SIG_SIZE 4u
+#define PE_FILE_HDR_SIZE COFF_FILE_HEADER_SIZE
+#define PE_SECTION_HDR_SIZE COFF_SECTION_HEADER_SIZE
+
+/* Standard PE output buckets, plus .idata (import directory) and
+ * .reloc — both synthesised here rather than copied from input
+ * sections. Order matters: it's the on-image RVA order. */
+typedef enum CoffBucket {
+ COFF_BUCKET_TEXT = 0,
+ COFF_BUCKET_RDATA = 1,
+ COFF_BUCKET_IDATA = 2,
+ COFF_BUCKET_DATA = 3,
+ COFF_BUCKET_TLS = 4,
+ COFF_BUCKET_BSS = 5,
+ COFF_BUCKET_RELOC = 6,
+ COFF_NBUCKETS = 7,
+} CoffBucket;
+
+/* IMAGE_TLS_DIRECTORY64 wire size: u64*4 + u32*2 = 40 bytes. */
+#define COFF_TLS_DIRECTORY64_SIZE 40u
+/* Byte offsets of the four u64 VA fields within IMAGE_TLS_DIRECTORY64
+ * — they need base relocations so ASLR can fix them up. */
+#define COFF_TLSDIR_OFF_START_ADDR 0u
+#define COFF_TLSDIR_OFF_END_ADDR 8u
+#define COFF_TLSDIR_OFF_INDEX_ADDR 16u
+#define COFF_TLSDIR_OFF_CALLBACKS 24u
+
+typedef struct CoffSection {
+ const char* name; /* short ASCII; <= 8 bytes including NUL pad */
+ u32 characteristics;
+ u8* bytes; /* NULL for .bss / .reloc-before-build */
+ u32 size; /* VirtualSize (real bytes; for .bss, mem size) */
+ u32 size_raw; /* SizeOfRawData (file size, FileAlignment-padded) */
+ u32 rva; /* VirtualAddress in image */
+ u32 file_offset; /* PointerToRawData; 0 for .bss */
+ u8 in_image; /* 1 if this bucket is emitted as a section */
+ u8 has_file_bytes; /* 0 for .bss */
+ u8 pad[2];
+} CoffSection;
+
+/* ---- byte writer helpers ---- */
+
+static void coff_write_zeroes(Writer* w, u64 n) {
+ static const u8 zeroes[256] = {0};
+ while (n) {
+ u64 step = n > sizeof(zeroes) ? sizeof(zeroes) : n;
+ cfree_writer_write(w, zeroes, (size_t)step);
+ n -= step;
+ }
+}
+
+/* Return the COFF bucket for a kept LinkSection. SF_TLS sections route
+ * into the dedicated .tls bucket so SECREL relocations from TLS access
+ * code resolve against the merged TLS image, not against .data.
+ * Everything else partitions on SF_EXEC / SF_WRITE plus the SSEM_NOBITS
+ * bit for .bss. */
+static CoffBucket coff_bucket_for(const LinkSection* ls) {
+ if (ls->flags & SF_EXEC) return COFF_BUCKET_TEXT;
+ if (ls->flags & SF_TLS) return COFF_BUCKET_TLS;
+ if (ls->sem == SSEM_NOBITS) return COFF_BUCKET_BSS;
+ if (ls->flags & SF_WRITE) return COFF_BUCKET_DATA;
+ return COFF_BUCKET_RDATA;
+}
+
+/* True for relocation kinds that need an entry in .reloc so the OS
+ * loader can patch the site after ASLR picks a runtime ImageBase.
+ * PC-relative fixups don't need base-relocs — the displacement is
+ * load-invariant. */
+static int coff_reloc_needs_base_reloc(RelocKind k) {
+ return k == R_ABS64 || k == R_ABS32;
+}
+
+/* Look up the LinkSection whose [vaddr, vaddr+size] range covers the
+ * given image-relative address `v`, or return NULL. Used to attribute
+ * symbol vaddrs to a containing section so we can apply per-section
+ * vaddr deltas after re-laying out for PE. */
+static const LinkSection* coff_section_at(const LinkImage* img, u64 v) {
+ u32 i;
+ for (i = 0; i < img->nsections; ++i) {
+ const LinkSection* ls = &img->sections[i];
+ if (v >= ls->vaddr && v <= ls->vaddr + ls->size) return ls;
+ }
+ return NULL;
+}
+
+/* Per-input-section delta map. Indexed by `LinkSection.id - 1`.
+ * Populated by coff_build_buckets. Consumed by every subsequent pass
+ * that needs to translate input-coordinate offsets (the world that
+ * img->sections / img->relocs live in) into PE-coordinate ones (where
+ * the writer plants bytes). delta is stored explicitly so callers
+ * avoid recomputing (new_rva + bucket.rva - old_vaddr) for every
+ * LinkRelocApply whose link_section_id points at the section. */
+typedef struct CoffSecMap {
+ u32 new_rva; /* image-relative RVA after PE relayout */
+ u32 new_file_off; /* file offset of the patched byte */
+ i64 delta; /* new_rva - old_vaddr */
+ u8 bucket;
+ u8 pad[3];
+} CoffSecMap;
+
+/* TLS directory placement state. Populated when at least one SF_TLS
+ * section survives dead-strip; consumed by the optional-header writer,
+ * the .reloc builder (base-relocs for the four absolute VA fields),
+ * and the .rdata emit pass that writes the final 40-byte record. */
+typedef struct CoffTlsLayout {
+ int present; /* 1 iff at least one TLS section was kept */
+ u32 dir_rdata_off; /* byte offset of the IMAGE_TLS_DIRECTORY64 within .rdata
+ */
+ u32 tls_size; /* size of the merged .tls bucket */
+ LinkSymId tls_index_sym; /* resolved _tls_index LinkSymbol */
+ LinkSymId callbacks_sym; /* __xl_a when mingw's TLS callbacks are linked */
+ u64 callbacks_addend; /* mingw points past the leading NULL sentinel */
+} CoffTlsLayout;
+
+static LinkSymId coff_find_sym(LinkImage* img, const char* name) {
+ Sym sym = pool_intern_slice(img->c->global, slice_from_cstr(name));
+ u32 n = LinkSyms_count(&img->syms);
+ u32 i;
+ for (i = 0; i < n; ++i) {
+ const LinkSymbol* s = LinkSyms_at(&img->syms, i);
+ if (s->name == sym) return (LinkSymId)(i + 1);
+ }
+ return LINK_SYM_NONE;
+}
+
+/* Locate _tls_index by name in the resolved symbol table. mingw's
+ * libmingwex defines it (as part of tlsmcrt); without a CRT the link
+ * fails here with a clear message rather than producing a TLS
+ * directory pointing at a stale address. */
+static LinkSymId coff_find_tls_index_sym(LinkImage* img) {
+ return coff_find_sym(img, "_tls_index");
+}
+
+static const LinkSection* coff_symbol_section(const LinkImage* img,
+ const LinkSymbol* s) {
+ if (s->name) {
+ Slice nm_s = pool_slice(img->c->global, s->name);
+ const char* nm = nm_s.s;
+ size_t n = nm_s.len;
+ const char* sec_name = NULL;
+ if (nm && n == 6 && memcmp(nm, "__xd_a", 6) == 0)
+ sec_name = ".CRT$XDA";
+ else if (nm && n == 6 && memcmp(nm, "__xd_z", 6) == 0)
+ sec_name = ".CRT$XDZ";
+ else if (nm && n == 6 && memcmp(nm, "__xl_a", 6) == 0)
+ sec_name = ".CRT$XLA";
+ else if (nm && n == 6 && memcmp(nm, "__xl_c", 6) == 0)
+ sec_name = ".CRT$XLC";
+ else if (nm && n == 6 && memcmp(nm, "__xl_d", 6) == 0)
+ sec_name = ".CRT$XLD";
+ else if (nm && n == 6 && memcmp(nm, "__xl_z", 6) == 0)
+ sec_name = ".CRT$XLZ";
+ if (sec_name) {
+ u32 i;
+ for (i = 0; i < img->nsections; ++i) {
+ const LinkSection* ls = &img->sections[i];
+ if (ls->name &&
+ slice_eq_cstr(pool_slice(img->c->global, ls->name), sec_name))
+ return ls;
+ }
+ }
+ }
+ if (s->section_id != LINK_SEC_NONE && s->section_id <= img->nsections)
+ return &img->sections[s->section_id - 1];
+ return coff_section_at(img, s->vaddr);
+}
+
+static u64 coff_symbol_final_va(const LinkImage* img,
+ const CoffSection out[COFF_NBUCKETS],
+ const CoffSecMap* map, LinkSymId id,
+ const char* what) {
+ const LinkSymbol* s = LinkSyms_at(&img->syms, id - 1);
+ if (!s->defined || s->kind == SK_ABS) {
+ compiler_panic(img->c, no_loc(),
+ "link_emit_coff: `%.*s` is not a defined section-bound "
+ "symbol",
+ SLICE_ARG(slice_from_cstr(what)));
+ }
+ const LinkSection* sec = coff_symbol_section(img, s);
+ if (!sec) {
+ compiler_panic(img->c, no_loc(),
+ "link_emit_coff: `%.*s` has no containing section",
+ SLICE_ARG(slice_from_cstr(what)));
+ }
+ u8 b = map[sec->id - 1].bucket;
+ return PE_IMAGE_BASE + (u64)out[b].rva + (u64)map[sec->id - 1].new_rva +
+ (s->vaddr - sec->vaddr);
+}
+
+/* Reserve 40 bytes at the tail of the .rdata bucket for the
+ * IMAGE_TLS_DIRECTORY64 record. Records the offset for later emit and
+ * grows the bucket if needed. The bytes start zeroed; coff_emit_tls_dir
+ * fills them in once final RVAs are known. */
+static void coff_plan_tls_layout(LinkImage* img, CoffSection out[COFF_NBUCKETS],
+ u32* rdata_cap, CoffTlsLayout* tls) {
+ memset(tls, 0, sizeof(*tls));
+ if (out[COFF_BUCKET_TLS].size == 0) return;
+ tls->present = 1;
+ tls->tls_size = out[COFF_BUCKET_TLS].size;
+ tls->tls_index_sym = coff_find_tls_index_sym(img);
+ if (tls->tls_index_sym == LINK_SYM_NONE) {
+ compiler_panic(img->c, no_loc(),
+ "link_emit_coff: .tls section requires `_tls_index` "
+ "(provided by mingw libmingwex / tlsmcrt.o) — none of "
+ "the linked inputs define it");
+ }
+ /* IMAGE_TLS_DIRECTORY64 needs 8-byte alignment for its u64 fields;
+ * round the .rdata size up before reserving the 40-byte record. */
+ tls->callbacks_sym = coff_find_sym(img, "__xl_a");
+ if (tls->callbacks_sym != LINK_SYM_NONE) {
+ tls->callbacks_addend = 8;
+ } else {
+ tls->callbacks_sym = coff_find_sym(img, "__xl_c");
+ tls->callbacks_addend = 0;
+ }
+ u32 rdata_size = (u32)ALIGN_UP((u64)out[COFF_BUCKET_RDATA].size, 8ull);
+ u32 need = rdata_size + COFF_TLS_DIRECTORY64_SIZE;
+ if (need > *rdata_cap) {
+ (void)VEC_GROW(img->heap, out[COFF_BUCKET_RDATA].bytes, *rdata_cap, need);
+ }
+ /* Zero any padding bytes introduced by the alignment bump and the
+ * directory slot itself. */
+ if (rdata_size > out[COFF_BUCKET_RDATA].size) {
+ memset(out[COFF_BUCKET_RDATA].bytes + out[COFF_BUCKET_RDATA].size, 0,
+ rdata_size - out[COFF_BUCKET_RDATA].size);
+ }
+ memset(out[COFF_BUCKET_RDATA].bytes + rdata_size, 0,
+ COFF_TLS_DIRECTORY64_SIZE);
+ tls->dir_rdata_off = rdata_size;
+ out[COFF_BUCKET_RDATA].size = need;
+}
+
+/* Write the IMAGE_TLS_DIRECTORY64 bytes once all bucket RVAs are
+ * final. Each u64 VA field gets ImageBase + RVA; the base-reloc pass
+ * will emit IMAGE_REL_BASED_DIR64 entries so ASLR keeps them valid. */
+static void coff_emit_tls_dir(const LinkImage* img,
+ const CoffSection out[COFF_NBUCKETS],
+ const CoffSecMap* map, const CoffTlsLayout* tls) {
+ if (!tls->present) return;
+ u64 tls_start = PE_IMAGE_BASE + (u64)out[COFF_BUCKET_TLS].rva;
+ u64 tls_end = tls_start + (u64)tls->tls_size;
+ u64 idx_vaddr =
+ coff_symbol_final_va(img, out, map, tls->tls_index_sym, "_tls_index");
+ const char* callbacks_name = tls->callbacks_addend ? "__xl_a" : "__xl_c";
+ u64 callbacks_vaddr =
+ tls->callbacks_sym
+ ? coff_symbol_final_va(img, out, map, tls->callbacks_sym,
+ callbacks_name) +
+ tls->callbacks_addend
+ : 0;
+
+ u8* p = out[COFF_BUCKET_RDATA].bytes + tls->dir_rdata_off;
+ wr_u64_le(p + COFF_TLSDIR_OFF_START_ADDR, tls_start);
+ wr_u64_le(p + COFF_TLSDIR_OFF_END_ADDR, tls_end);
+ wr_u64_le(p + COFF_TLSDIR_OFF_INDEX_ADDR, idx_vaddr);
+ wr_u64_le(p + COFF_TLSDIR_OFF_CALLBACKS, callbacks_vaddr);
+ wr_u32_le(p + 32, 0); /* SizeOfZeroFill */
+ wr_u32_le(p + 36, 0); /* Characteristics */
+}
+
+static void coff_define_tls_used(LinkImage* img,
+ const CoffSection out[COFF_NBUCKETS],
+ const CoffTlsLayout* tls) {
+ if (!tls->present) return;
+ if (!img->linker) return;
+ link_emit_boundary_sym(img->linker, img, "_tls_used",
+ PE_IMAGE_BASE + (u64)out[COFF_BUCKET_RDATA].rva +
+ (u64)tls->dir_rdata_off);
+}
+
+/* ---- import-table synthesis (Phase 3.2) ---------------------------
+ *
+ * Per doc/WINDOWS.md §3.2: every LinkSymbol with `imported = 1` gets
+ * routed through an IAT slot synthesized in `.idata`. Function
+ * imports additionally receive a small per-arch stub in `.text`
+ * (`ff 25 disp32` on x64 / `adrp;ldr;br` on aa64) so a direct CALL26
+ * or PC32 against the symbol lands on a stub that indirects through
+ * the IAT. Data imports skip the stub — the symbol's final vaddr is
+ * just the IAT slot vaddr, and code-gen emits a `mov rax, [slot]`
+ * sequence the same way it would for any other GOT-style load.
+ *
+ * cfree's COFF code-gen uses direct symbol references; there is no
+ * separate `__imp_<name>` LinkSymbol consulted at link time. The
+ * IAT-slot rewrite happens entirely by overriding the imported
+ * symbol's vaddr in apply_all_relocs. */
+
+typedef struct CoffImport {
+ LinkSymId sym; /* canonical LinkSymId from img->syms */
+ u32 dll_idx; /* index into CoffImportTable.dlls */
+ u32 stub_off; /* offset in .text bucket (functions only) */
+ u32 iat_off; /* offset in .idata IAT block */
+ u32 ilt_off; /* offset in .idata ILT block */
+ u32 hint_off; /* offset in .idata hint/name table */
+ u8 is_func;
+ u8 pad[3];
+} CoffImport;
+
+typedef struct CoffImportDll {
+ Sym soname;
+ u32 first; /* index of first import in CoffImportTable.imports */
+ u32 count;
+ u32 ilt_off; /* offset of this DLL's ILT block in .idata */
+ u32 iat_off; /* offset of this DLL's IAT block in .idata */
+ u32 name_off; /* offset of DLL name string in .idata */
+} CoffImportDll;
+
+typedef struct CoffImportTable {
+ CoffImport* imports;
+ u32 nimports;
+ u32 imports_cap; /* heap-allocation size for cleanup */
+ u32 nfunc_imports; /* subset of nimports that needs a .text stub */
+ CoffImportDll* dlls;
+ u32 ndlls;
+ u32 dlls_cap; /* heap-allocation size for cleanup */
+ /* Offsets within .idata of the five sub-blocks. Filled in by
+ * coff_plan_idata_layout once nimports / ndlls is known. */
+ u32 desc_off; /* always 0 — descriptors come first */
+ u32 desc_size;
+ u32 ilt_base;
+ u32 ilt_total;
+ u32 iat_base;
+ u32 iat_total;
+ u32 hint_base;
+ u32 hint_total;
+ u32 name_base;
+ u32 name_total;
+ u32 idata_size;
+ /* Stub region in .text bucket. Stubs are appended after every
+ * input .text section has been bucketed. stub_text_off is the
+ * bucket-local offset of the first stub; per-import stub offsets
+ * are stored in CoffImport.stub_off. */
+ u32 stub_text_off;
+ u32 stub_total;
+} CoffImportTable;
+
+/* Sort comparator: imports grouped by DLL slot, stable on input
+ * order within a DLL (sort is stable enough via secondary key). */
+static int coff_import_cmp(const void* a, const void* b) {
+ const CoffImport* ia = (const CoffImport*)a;
+ const CoffImport* ib = (const CoffImport*)b;
+ if (ia->dll_idx < ib->dll_idx) return -1;
+ if (ia->dll_idx > ib->dll_idx) return 1;
+ /* Secondary: LinkSymId so the order is reproducible. */
+ if (ia->sym < ib->sym) return -1;
+ if (ia->sym > ib->sym) return 1;
+ return 0;
+}
+
+static const char* coff_import_lookup_name(Compiler* c, const LinkSymbol* s,
+ size_t* nlen_out) {
+ Slice nm_s = s->name ? pool_slice(c->global, s->name) : SLICE_NULL;
+ const char* nm = nm_s.s;
+ size_t nlen = nm_s.len;
+ static const char kImpPrefix[] = "__imp_";
+ const size_t kImpPrefixLen = sizeof(kImpPrefix) - 1u;
+ if (nm && nlen > kImpPrefixLen &&
+ memcmp(nm, kImpPrefix, kImpPrefixLen) == 0) {
+ nm += kImpPrefixLen;
+ nlen -= kImpPrefixLen;
+ }
+ if (nlen_out) *nlen_out = nlen;
+ return nm;
+}
+
+/* True iff the import classifies as function-like. Mirrors the ELF
+ * `sym_is_func_import` heuristic: if the canonical kind is known
+ * we trust it, otherwise we default to function (which matches the
+ * COFF code-gen contract — direct calls are by far the common case
+ * and a data import wrongly stubbed would still fail loudly via the
+ * IAT-routed call). */
+static int coff_import_is_func(Compiler* c, const LinkSymbol* s) {
+ if (s->name) {
+ Slice nm_s = pool_slice(c->global, s->name);
+ const char* nm = nm_s.s;
+ size_t nlen = nm_s.len;
+ if (nm && nlen > 6u && memcmp(nm, "__imp_", 6u) == 0) return 0;
+ }
+ if (s->kind == SK_FUNC || s->kind == SK_IFUNC) return 1;
+ if (s->kind == SK_OBJ) return 0;
+ /* SK_UNDEF / SK_NOTYPE: assume function (the common case). */
+ return 1;
+}
+
+/* Walk LinkSyms, collect imports, group by DLL soname. Returns 1 if
+ * any imports were collected, 0 otherwise (caller skips the entire
+ * .idata path). */
+static int coff_collect_imports(LinkImage* img, CoffImportTable* it) {
+ Heap* heap = img->heap;
+ Compiler* c = img->c;
+ Linker* l = img->linker;
+ u32 nsyms = LinkSyms_count(&img->syms);
+ u32 imp_cap = 0;
+ u32 dll_cap = 0;
+ u32 i;
+
+ memset(it, 0, sizeof(*it));
+ if (!l) return 0;
+ for (i = 0; i < nsyms; ++i) {
+ LinkSymbol* s = LinkSyms_at(&img->syms, i);
+ LinkInput* in;
+ u32 dll_idx = (u32)-1;
+ u32 d;
+ if (!s->imported) continue;
+ if (s->name == 0) continue;
+ if (s->dso_input_id == LINK_INPUT_NONE) {
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: imported symbol has no providing DSO");
+ }
+ /* img->globals only carries defined globals/weaks; imported undefs
+ * never land there. Dedup by name: skip if any earlier slot
+ * already collected this name. */
+ {
+ int dup = 0;
+ for (u32 k = 0; k < it->nimports; ++k) {
+ LinkSymbol* prev = LinkSyms_at(&img->syms, it->imports[k].sym - 1);
+ if (prev->name == s->name) {
+ dup = 1;
+ break;
+ }
+ }
+ if (dup) continue;
+ }
+ if (s->dso_input_id - 1u >= LinkInputs_count(&l->inputs)) {
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: import dso_input_id out of range");
+ }
+ in = LinkInputs_at(&l->inputs, s->dso_input_id - 1u);
+ if (in->soname == 0) {
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: providing DSO has no soname; cannot "
+ "emit IMAGE_IMPORT_DESCRIPTOR.Name");
+ }
+ /* Find-or-add the DLL slot. */
+ for (d = 0; d < it->ndlls; ++d) {
+ if (it->dlls[d].soname == in->soname) {
+ dll_idx = d;
+ break;
+ }
+ }
+ if (dll_idx == (u32)-1) {
+ if (VEC_GROW(heap, it->dlls, dll_cap, it->ndlls + 1u))
+ compiler_panic(c, no_loc(), "link_emit_coff: oom on import dlls");
+ dll_idx = it->ndlls++;
+ memset(&it->dlls[dll_idx], 0, sizeof(it->dlls[dll_idx]));
+ it->dlls[dll_idx].soname = in->soname;
+ }
+ if (VEC_GROW(heap, it->imports, imp_cap, it->nimports + 1u))
+ compiler_panic(c, no_loc(), "link_emit_coff: oom on imports");
+ memset(&it->imports[it->nimports], 0, sizeof(it->imports[it->nimports]));
+ it->imports[it->nimports].sym = s->id;
+ it->imports[it->nimports].dll_idx = dll_idx;
+ it->imports[it->nimports].is_func = (u8)coff_import_is_func(c, s);
+ if (it->imports[it->nimports].is_func) ++it->nfunc_imports;
+ ++it->nimports;
+ it->dlls[dll_idx].count++;
+ }
+ if (it->nimports == 0) return 0;
+ /* Re-bucket the imports array so each DLL's run is contiguous. */
+ qsort(it->imports, it->nimports, sizeof(*it->imports), coff_import_cmp);
+ /* Fix up CoffImportDll.first now that imports[] is sorted. */
+ {
+ u32 cur = 0;
+ for (u32 d = 0; d < it->ndlls; ++d) {
+ it->dlls[d].first = cur;
+ cur += it->dlls[d].count;
+ }
+ }
+ it->imports_cap = imp_cap;
+ it->dlls_cap = dll_cap;
+ return 1;
+}
+
+static void coff_imports_free(LinkImage* img, CoffImportTable* it) {
+ Heap* heap = img->heap;
+ if (it->imports) {
+ heap->free(heap, it->imports,
+ (size_t)it->imports_cap * sizeof(*it->imports));
+ }
+ if (it->dlls) {
+ heap->free(heap, it->dlls, (size_t)it->dlls_cap * sizeof(*it->dlls));
+ }
+}
+
+/* Compute every per-block / per-import offset inside .idata and the
+ * total .idata size in bytes. Also assigns per-import hint/name and
+ * dll-name offsets so the descriptor table can reference them by RVA
+ * later (RVAs need the bucket's final RVA, added in coff_emit_idata). */
+static void coff_plan_idata_layout(LinkImage* img, CoffImportTable* it) {
+ Compiler* c = img->c;
+ u32 off;
+
+ /* Block 1: import descriptors (one per DLL + zero terminator). */
+ it->desc_off = 0;
+ it->desc_size = (it->ndlls + 1u) * COFF_IMPORT_DESCRIPTOR_SIZE;
+ off = (u32)ALIGN_UP((u64)it->desc_size, (u64)PE_IDATA_BLOCK_ALIGN);
+
+ /* Block 2: ILTs. Per DLL: count entries + 1 (terminator), 8 B each. */
+ it->ilt_base = off;
+ for (u32 d = 0; d < it->ndlls; ++d) {
+ it->dlls[d].ilt_off = off;
+ /* Per-import: assign ilt_off within this DLL's block. */
+ for (u32 k = 0; k < it->dlls[d].count; ++k) {
+ it->imports[it->dlls[d].first + k].ilt_off =
+ off + k * (u32)COFF_THUNK_DATA64_SIZE;
+ }
+ off += (it->dlls[d].count + 1u) * (u32)COFF_THUNK_DATA64_SIZE;
+ }
+ it->ilt_total = off - it->ilt_base;
+ off = (u32)ALIGN_UP((u64)off, (u64)PE_IDATA_BLOCK_ALIGN);
+
+ /* Block 3: IATs (same shape as ILTs). */
+ it->iat_base = off;
+ for (u32 d = 0; d < it->ndlls; ++d) {
+ it->dlls[d].iat_off = off;
+ for (u32 k = 0; k < it->dlls[d].count; ++k) {
+ it->imports[it->dlls[d].first + k].iat_off =
+ off + k * (u32)COFF_THUNK_DATA64_SIZE;
+ }
+ off += (it->dlls[d].count + 1u) * (u32)COFF_THUNK_DATA64_SIZE;
+ }
+ it->iat_total = off - it->iat_base;
+ off = (u32)ALIGN_UP((u64)off, (u64)PE_IDATA_BLOCK_ALIGN);
+
+ /* Block 4: hint/name records. Each: u16 hint + NUL-term name +
+ * 1-byte pad if the resulting size is odd (PE/COFF spec). */
+ it->hint_base = off;
+ for (u32 i = 0; i < it->nimports; ++i) {
+ LinkSymbol* s = LinkSyms_at(&img->syms, it->imports[i].sym - 1);
+ size_t nlen = 0;
+ const char* nm = coff_import_lookup_name(c, s, &nlen);
+ if (!nm || nlen == 0)
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: imported symbol has empty name");
+ it->imports[i].hint_off = off;
+ /* hint (2 B) + name (nlen + 1) + optional pad to even. */
+ u32 rec = 2u + (u32)nlen + 1u;
+ if (rec & 1u) ++rec;
+ off += rec;
+ }
+ it->hint_total = off - it->hint_base;
+ off = (u32)ALIGN_UP((u64)off, (u64)PE_IDATA_BLOCK_ALIGN);
+
+ /* Block 5: DLL name strings (NUL-terminated). */
+ it->name_base = off;
+ for (u32 d = 0; d < it->ndlls; ++d) {
+ Slice nm_s = pool_slice(c->global, it->dlls[d].soname);
+ const char* nm = nm_s.s;
+ size_t nlen = nm_s.len;
+ if (!nm || nlen == 0)
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: providing DSO has empty soname");
+ it->dlls[d].name_off = off;
+ off += (u32)nlen + 1u;
+ }
+ it->name_total = off - it->name_base;
+ it->idata_size = off;
+}
+
+/* Append the function-import stubs to the .text bucket. Each stub is
+ * the format arch descriptor's stub size. Records each stub's bucket-
+ * local offset on the matching CoffImport so the per-symbol stub vaddr
+ * can be computed once the .text bucket's RVA is final. */
+static void coff_append_stubs(LinkImage* img, CoffImportTable* it,
+ CoffSection* text_bucket, u32* text_bucket_cap) {
+ Heap* heap = img->heap;
+ Compiler* c = img->c;
+ const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_COFF);
+ const ObjCoffArchOps* arch =
+ fmt && fmt->coff_arch ? fmt->coff_arch(c->target.arch) : NULL;
+ u32 stub_size;
+ u32 stub_align;
+ u64 cur;
+ if (!arch || arch->stub_size == 0 || !arch->emit_iat_stub) {
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: arch has no COFF IAT stub emitter");
+ }
+ stub_size = arch->stub_size;
+ /* Stubs are pure code; aligning to instruction alignment is enough.
+ * x64 wants byte-granular, aa64 wants 4 B; align to stub size as a
+ * convenient upper bound. */
+ stub_align = stub_size;
+ cur = (u64)text_bucket->size;
+ cur = ALIGN_UP(cur, (u64)stub_align);
+ it->stub_text_off = (u32)cur;
+ for (u32 i = 0; i < it->nimports; ++i) {
+ if (!it->imports[i].is_func) continue;
+ it->imports[i].stub_off = (u32)cur;
+ cur += stub_size;
+ }
+ it->stub_total = (u32)cur - it->stub_text_off;
+ if (it->stub_total == 0) return;
+ /* Grow the .text bucket buffer to hold the new region. */
+ u32 need = (u32)cur;
+ if (need > *text_bucket_cap) {
+ (void)VEC_GROW(heap, text_bucket->bytes, *text_bucket_cap, need);
+ }
+ /* Zero the alignment pad; stub bytes are written later by
+ * coff_emit_stubs once vaddrs are known. */
+ if ((u32)cur > text_bucket->size) {
+ memset(text_bucket->bytes + text_bucket->size, 0,
+ (size_t)((u32)cur - text_bucket->size));
+ }
+ text_bucket->size = (u32)cur;
+}
+
+/* Emit each function import's IAT stub into the .text bucket. Must
+ * run after coff_assign_layout has fixed both .text's RVA and
+ * .idata's RVA, since the stub bakes in the post-shift IAT slot
+ * displacement. */
+static void coff_emit_stubs(LinkImage* img, const CoffImportTable* it,
+ const CoffSection out[COFF_NBUCKETS]) {
+ Compiler* c = img->c;
+ const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_COFF);
+ const ObjCoffArchOps* arch =
+ fmt && fmt->coff_arch ? fmt->coff_arch(c->target.arch) : NULL;
+ u64 img_base = PE_IMAGE_BASE;
+ u32 text_rva = out[COFF_BUCKET_TEXT].rva;
+ u32 idata_rva = out[COFF_BUCKET_IDATA].rva;
+ if (!arch || !arch->emit_iat_stub) {
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: arch has no COFF IAT stub emitter");
+ }
+ for (u32 i = 0; i < it->nimports; ++i) {
+ u64 stub_va, slot_va;
+ if (!it->imports[i].is_func) continue;
+ stub_va = img_base + (u64)text_rva + (u64)it->imports[i].stub_off;
+ slot_va = img_base + (u64)idata_rva + (u64)it->imports[i].iat_off;
+ arch->emit_iat_stub(out[COFF_BUCKET_TEXT].bytes + it->imports[i].stub_off,
+ stub_va, slot_va);
+ }
+}
+
+/* Emit .idata content into the bucket buffer. Allocates the buffer
+ * here (size is already known from coff_plan_idata_layout). */
+static void coff_emit_idata(LinkImage* img, const CoffImportTable* it,
+ CoffSection out[COFF_NBUCKETS],
+ u32* idata_bucket_cap) {
+ Heap* heap = img->heap;
+ Compiler* c = img->c;
+ CoffSection* idata = &out[COFF_BUCKET_IDATA];
+ u32 idata_rva = idata->rva;
+ u8* buf;
+ /* Allocate the bucket buffer (idata_size is already block-aligned). */
+ buf = (u8*)heap->alloc(heap, it->idata_size, _Alignof(u64));
+ if (!buf) compiler_panic(c, no_loc(), "link_emit_coff: oom on .idata buffer");
+ memset(buf, 0, it->idata_size);
+ idata->bytes = buf;
+ idata->size = it->idata_size;
+ *idata_bucket_cap = it->idata_size;
+
+ /* Block 1: IMAGE_IMPORT_DESCRIPTOR table. */
+ for (u32 d = 0; d < it->ndlls; ++d) {
+ u8* p = buf + d * (u32)COFF_IMPORT_DESCRIPTOR_SIZE;
+ u32 ilt_rva = idata_rva + it->dlls[d].ilt_off;
+ u32 iat_rva = idata_rva + it->dlls[d].iat_off;
+ u32 name_rva = idata_rva + it->dlls[d].name_off;
+ wr_u32_le(p + 0, ilt_rva); /* OriginalFirstThunk */
+ wr_u32_le(p + 4, 0u); /* TimeDateStamp */
+ wr_u32_le(p + 8, 0u); /* ForwarderChain */
+ wr_u32_le(p + 12, name_rva); /* Name */
+ wr_u32_le(p + 16, iat_rva); /* FirstThunk */
+ }
+ /* Trailing zero descriptor already zero-filled by memset. */
+
+ /* Blocks 2+3: ILT + IAT. Both initially point at the same hint/name
+ * record for each import; the OS loader rewrites IAT entries at
+ * load time. */
+ for (u32 i = 0; i < it->nimports; ++i) {
+ u64 hint_rva = (u64)idata_rva + (u64)it->imports[i].hint_off;
+ wr_u64_le(buf + it->imports[i].ilt_off, hint_rva);
+ wr_u64_le(buf + it->imports[i].iat_off, hint_rva);
+ }
+ /* Per-DLL ILT/IAT terminators are u64 0, already zero-filled. */
+
+ /* Block 4: hint/name records. */
+ for (u32 i = 0; i < it->nimports; ++i) {
+ LinkSymbol* s = LinkSyms_at(&img->syms, it->imports[i].sym - 1);
+ size_t nlen = 0;
+ const char* nm = coff_import_lookup_name(c, s, &nlen);
+ u8* p = buf + it->imports[i].hint_off;
+ wr_u16_le(p, PE_IMPORT_HINT_NONE);
+ memcpy(p + 2, nm, nlen);
+ /* NUL terminator + optional pad already zero. */
+ }
+
+ /* Block 5: DLL name strings. */
+ for (u32 d = 0; d < it->ndlls; ++d) {
+ Slice nm_s = pool_slice(c->global, it->dlls[d].soname);
+ const char* nm = nm_s.s;
+ size_t nlen = nm_s.len;
+ memcpy(buf + it->dlls[d].name_off, nm, nlen);
+ /* NUL already zero. */
+ }
+}
+
+/* Per-LinkSymId vaddr override table for imports. Indexed by
+ * LinkSymId-1; 0 means "not an import". Built once after the .idata
+ * bucket RVA is final. Consumed by coff_apply_all_relocs in lieu of
+ * the symbol's own vaddr field (which is 0 for imports). */
+typedef struct CoffImportVaddr {
+ u64* by_sym; /* size = nsyms; 0 entries mean "not imported" */
+ u32 nsyms;
+} CoffImportVaddr;
+
+static void coff_import_vaddr_build(LinkImage* img, const CoffImportTable* it,
+ const CoffSection out[COFF_NBUCKETS],
+ CoffImportVaddr* iv) {
+ Heap* heap = img->heap;
+ u64 img_base = PE_IMAGE_BASE;
+ u32 text_rva = out[COFF_BUCKET_TEXT].rva;
+ u32 idata_rva = out[COFF_BUCKET_IDATA].rva;
+ iv->nsyms = LinkSyms_count(&img->syms);
+ iv->by_sym = (u64*)heap->alloc(heap, sizeof(u64) * (size_t)(iv->nsyms + 1u),
+ _Alignof(u64));
+ if (!iv->by_sym)
+ compiler_panic(img->c, no_loc(),
+ "link_emit_coff: oom on import vaddr table");
+ memset(iv->by_sym, 0, sizeof(u64) * (size_t)(iv->nsyms + 1u));
+ for (u32 i = 0; i < it->nimports; ++i) {
+ LinkSymId sid = it->imports[i].sym;
+ u64 va;
+ if (it->imports[i].is_func) {
+ va = img_base + (u64)text_rva + (u64)it->imports[i].stub_off;
+ } else {
+ va = img_base + (u64)idata_rva + (u64)it->imports[i].iat_off;
+ }
+ iv->by_sym[sid - 1u] = va;
+ /* Fan out across every shadow LinkSymId with the same name so a
+ * per-input undef reference resolves to the same import slot. */
+ {
+ LinkSymbol* canonical = LinkSyms_at(&img->syms, sid - 1u);
+ for (u32 j = 0; j < iv->nsyms; ++j) {
+ LinkSymbol* s = LinkSyms_at(&img->syms, j);
+ if (s->name == canonical->name && s->imported) {
+ iv->by_sym[s->id - 1u] = va;
+ }
+ }
+ }
+ }
+}
+
+static void coff_import_vaddr_free(LinkImage* img, CoffImportVaddr* iv) {
+ Heap* heap = img->heap;
+ if (iv->by_sym) {
+ heap->free(heap, iv->by_sym, sizeof(u64) * (size_t)(iv->nsyms + 1u));
+ }
+}
+
+/* Resolve Compiler.target.arch -> IMAGE_FILE_MACHINE_* via the per-arch
+ * coff ops table. Panic if the arch has no COFF descriptor or the
+ * machine value is one cfree doesn't ship (Phase 1 supports AMD64 and
+ * ARM64 only). */
+static u16 coff_machine_or_panic(Compiler* c) {
+ const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_COFF);
+ const ObjCoffArchOps* arch =
+ fmt && fmt->coff_arch ? fmt->coff_arch(c->target.arch) : NULL;
+ u16 m;
+ if (!arch)
+ compiler_panic(c, no_loc(), "link_emit_coff: no COFF arch descriptor");
+ m = arch->machine;
+ if (m != IMAGE_FILE_MACHINE_AMD64 && m != IMAGE_FILE_MACHINE_ARM64)
+ compiler_panic(c, no_loc(), "link_emit_coff: unsupported machine 0x%x",
+ (unsigned)m);
+ return m;
+}
+
+static int coff_section_name_starts(Compiler* c, const LinkSection* ls,
+ const char* prefix) {
+ size_t pn = slice_from_cstr(prefix).len;
+ Slice s_s = ls->name ? pool_slice(c->global, ls->name) : SLICE_NULL;
+ const char* s = s_s.s;
+ size_t n = s_s.len;
+ return s && n >= pn && memcmp(s, prefix, pn) == 0;
+}
+
+static int coff_section_name_cmp(Compiler* c, const LinkSection* a,
+ const LinkSection* b) {
+ Slice as_s = a->name ? pool_slice(c->global, a->name) : SLICE_NULL;
+ Slice bs_s = b->name ? pool_slice(c->global, b->name) : SLICE_NULL;
+ const char* as = as_s.s ? as_s.s : "";
+ const char* bs = bs_s.s ? bs_s.s : "";
+ size_t an = as_s.len, bn = bs_s.len;
+ size_t n = an < bn ? an : bn;
+ int cmp = n ? memcmp(as, bs, n) : 0;
+ if (cmp) return cmp;
+ if (an < bn) return -1;
+ if (an > bn) return 1;
+ if (a->id < b->id) return -1;
+ if (a->id > b->id) return 1;
+ return 0;
+}
+
+static void coff_place_section(LinkImage* img, CoffSection out[COFF_NBUCKETS],
+ CoffSecMap* map, u64 bucket_cur[COFF_NBUCKETS],
+ u32 bucket_cap[COFF_NBUCKETS],
+ const LinkSection* ls) {
+ Heap* heap = img->heap;
+ CoffBucket b2 = coff_bucket_for(ls);
+ u32 align = ls->align ? ls->align : 1u;
+ u64 cur = bucket_cur[b2];
+ cur = ALIGN_UP(cur, (u64)align);
+ map[ls->id - 1].bucket = (u8)b2;
+ /* Record the bucket-local offset; the absolute RVA / file offset
+ * are filled in after bucket placement (RVAs need
+ * SectionAlignment, file offsets need FileAlignment). */
+ map[ls->id - 1].new_rva = (u32)cur;
+ if (b2 != COFF_BUCKET_BSS) {
+ /* Copy bytes from the source segment buffer into the bucket. */
+ if (ls->size) {
+ u32 need = (u32)(cur + ls->size);
+ if (need > bucket_cap[b2]) {
+ (void)VEC_GROW(heap, out[b2].bytes, bucket_cap[b2], need);
+ }
+ memset(out[b2].bytes + bucket_cur[b2], 0, (size_t)(cur - bucket_cur[b2]));
+ if (ls->sem != SSEM_NOBITS) {
+ const LinkSegment* seg = &img->segments[ls->segment_id - 1];
+ const u8* src = img->segment_bytes[seg->id - 1] +
+ (size_t)(ls->file_offset - seg->file_offset);
+ memcpy(out[b2].bytes + cur, src, (size_t)ls->size);
+ } else {
+ memset(out[b2].bytes + cur, 0, (size_t)ls->size);
+ }
+ }
+ }
+ cur += ls->size;
+ bucket_cur[b2] = cur;
+ out[b2].size = (u32)cur;
+}
+
+static void coff_insert_sorted_section(Compiler* c, const LinkSection** a,
+ u32* n, const LinkSection* ls) {
+ u32 i = *n;
+ while (i > 0 && coff_section_name_cmp(c, ls, a[i - 1u]) < 0) {
+ a[i] = a[i - 1u];
+ --i;
+ }
+ a[i] = ls;
+ *n += 1u;
+}
+
+/* ---- pass 1: bucket input sections, assemble bytes, assign deltas ----
+ * CoffSecMap is defined above (alongside CoffTlsLayout) because the
+ * TLS planning helpers need to consume one. */
+
+/* Build the four payload buckets (.text/.rdata/.data/.bss).
+ *
+ * `map[secid-1]` is populated for every kept LinkSection with the
+ * section's new RVA, new file offset, the bucket it landed in, and the
+ * delta to add to in-section vaddrs. Bucket buffers are
+ * heap-allocated; the caller frees them after emit. */
+static void coff_build_buckets(LinkImage* img, CoffSection out[COFF_NBUCKETS],
+ CoffSecMap* map) {
+ Heap* heap = img->heap;
+ Compiler* c = img->c;
+ const LinkSection** tls_sorted = NULL;
+ const LinkSection** crt_sorted = NULL;
+ u32 ntls_sorted = 0;
+ u32 ncrt_sorted = 0;
+ u32 i, b;
+
+ for (b = 0; b < COFF_NBUCKETS; ++b) {
+ memset(&out[b], 0, sizeof(out[b]));
+ }
+ out[COFF_BUCKET_TEXT].name = ".text";
+ out[COFF_BUCKET_TEXT].characteristics =
+ IMAGE_SCN_CNT_CODE | IMAGE_SCN_MEM_EXECUTE | IMAGE_SCN_MEM_READ;
+ out[COFF_BUCKET_TEXT].has_file_bytes = 1;
+ out[COFF_BUCKET_RDATA].name = ".rdata";
+ out[COFF_BUCKET_RDATA].characteristics =
+ IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ;
+ out[COFF_BUCKET_RDATA].has_file_bytes = 1;
+ out[COFF_BUCKET_IDATA].name = ".idata";
+ out[COFF_BUCKET_IDATA].characteristics =
+ IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ;
+ out[COFF_BUCKET_IDATA].has_file_bytes = 1;
+ out[COFF_BUCKET_DATA].name = ".data";
+ out[COFF_BUCKET_DATA].characteristics =
+ IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE;
+ out[COFF_BUCKET_DATA].has_file_bytes = 1;
+ /* The Windows loader uses .tls as a *template*: the bytes on disk
+ * seed each thread's per-TLS copy at thread creation, and threads
+ * write to their copies, not the template. The PE section is still
+ * marked writable because that's what mingw and link.exe emit; the
+ * loader special-cases it via the TLS directory. */
+ out[COFF_BUCKET_TLS].name = ".tls";
+ out[COFF_BUCKET_TLS].characteristics =
+ IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE;
+ out[COFF_BUCKET_TLS].has_file_bytes = 1;
+ out[COFF_BUCKET_BSS].name = ".bss";
+ out[COFF_BUCKET_BSS].characteristics = IMAGE_SCN_CNT_UNINITIALIZED_DATA |
+ IMAGE_SCN_MEM_READ |
+ IMAGE_SCN_MEM_WRITE;
+ out[COFF_BUCKET_BSS].has_file_bytes = 0;
+ out[COFF_BUCKET_RELOC].name = ".reloc";
+ out[COFF_BUCKET_RELOC].characteristics = IMAGE_SCN_CNT_INITIALIZED_DATA |
+ IMAGE_SCN_MEM_READ |
+ IMAGE_SCN_MEM_DISCARDABLE;
+ out[COFF_BUCKET_RELOC].has_file_bytes = 1;
+
+ /* Track per-bucket cursors. Bucket sizes are bounded by the sum of
+ * input section sizes plus per-section alignment padding; we grow
+ * lazily via VEC_GROW. */
+ u64 bucket_cur[COFF_NBUCKETS];
+ u32 bucket_cap[COFF_NBUCKETS];
+ for (b = 0; b < COFF_NBUCKETS; ++b) {
+ bucket_cur[b] = 0;
+ bucket_cap[b] = 0;
+ }
+
+ tls_sorted = img->nsections ? (const LinkSection**)heap->alloc(
+ heap, sizeof(*tls_sorted) * img->nsections,
+ _Alignof(const LinkSection*))
+ : NULL;
+ crt_sorted = img->nsections ? (const LinkSection**)heap->alloc(
+ heap, sizeof(*crt_sorted) * img->nsections,
+ _Alignof(const LinkSection*))
+ : NULL;
+ if (img->nsections && (!tls_sorted || !crt_sorted))
+ compiler_panic(c, no_loc(), "link_emit_coff: oom sorting sections");
+
+ for (i = 0; i < img->nsections; ++i) {
+ const LinkSection* ls = &img->sections[i];
+ if (!(ls->flags & SF_ALLOC)) continue;
+ if (ls->flags & SF_TLS) {
+ coff_insert_sorted_section(c, tls_sorted, &ntls_sorted, ls);
+ continue;
+ }
+ if (coff_section_name_starts(c, ls, ".CRT$")) {
+ coff_insert_sorted_section(c, crt_sorted, &ncrt_sorted, ls);
+ continue;
+ }
+ coff_place_section(img, out, map, bucket_cur, bucket_cap, ls);
+ }
+
+ for (i = 0; i < ntls_sorted; ++i) {
+ coff_place_section(img, out, map, bucket_cur, bucket_cap, tls_sorted[i]);
+ }
+ for (i = 0; i < ncrt_sorted; ++i) {
+ coff_place_section(img, out, map, bucket_cur, bucket_cap, crt_sorted[i]);
+ }
+
+ /* Track caps so we can free with the right size later (heap->free
+ * needs the original allocation size). Stash into size_raw
+ * temporarily — overwritten below with the proper PE value. */
+ for (b = 0; b < COFF_NBUCKETS; ++b) out[b].size_raw = bucket_cap[b];
+ if (tls_sorted)
+ heap->free(heap, tls_sorted, sizeof(*tls_sorted) * img->nsections);
+ if (crt_sorted)
+ heap->free(heap, crt_sorted, sizeof(*crt_sorted) * img->nsections);
+}
+
+/* Assign RVAs and file offsets to the buckets that participate in the
+ * image. Returns the file offset at which trailing pad-to-EOF should
+ * land (== file size). */
+static u64 coff_assign_layout(CoffSection out[COFF_NBUCKETS],
+ u32 headers_file_size, u32 first_section_rva) {
+ u32 rva = first_section_rva;
+ u64 file = ALIGN_UP((u64)headers_file_size, (u64)PE_FILE_ALIGNMENT);
+ u32 b;
+ for (b = 0; b < COFF_NBUCKETS; ++b) {
+ if (out[b].size == 0) {
+ out[b].in_image = 0;
+ out[b].rva = 0;
+ out[b].file_offset = 0;
+ out[b].size_raw = 0;
+ continue;
+ }
+ out[b].in_image = 1;
+ out[b].rva = (u32)ALIGN_UP((u64)rva, (u64)PE_SECTION_ALIGNMENT);
+ if (out[b].has_file_bytes) {
+ out[b].file_offset = (u32)file;
+ out[b].size_raw = (u32)ALIGN_UP((u64)out[b].size, (u64)PE_FILE_ALIGNMENT);
+ file += out[b].size_raw;
+ } else {
+ out[b].file_offset = 0;
+ out[b].size_raw = 0;
+ }
+ rva = out[b].rva + out[b].size;
+ }
+ return file;
+}
+
+/* Build the .reloc bytes by grouping absolute relocs by 4-KiB page.
+ * The map[] array maps LinkSectionId-1 to the per-section post-PE-relayout
+ * RVA, so we can compute each reloc's site_rva = section_rva + (orig
+ * write_vaddr - orig section_vaddr).
+ *
+ * Layout per page:
+ * u32 page_rva
+ * u32 size_of_block (8 + n_entries*2, padded to a multiple of 4)
+ * u16 entries[]: (type << 12) | (offset & 0xfff)
+ * optional trailing u16 = 0 (IMAGE_REL_BASED_ABSOLUTE) for u32 alignment */
+typedef struct CoffRelocEntry {
+ u32 site_rva;
+ u16 type;
+ u16 pad;
+} CoffRelocEntry;
+
+static int coff_reloc_entry_cmp(const void* a, const void* b) {
+ const CoffRelocEntry* ea = (const CoffRelocEntry*)a;
+ const CoffRelocEntry* eb = (const CoffRelocEntry*)b;
+ if (ea->site_rva < eb->site_rva) return -1;
+ if (ea->site_rva > eb->site_rva) return 1;
+ return 0;
+}
+
+static void coff_build_reloc_section(LinkImage* img,
+ const CoffSection out[COFF_NBUCKETS],
+ const CoffSecMap* map, CoffSection* reloc,
+ const CoffRelocEntry* extras,
+ u32 n_extras) {
+ Heap* heap = img->heap;
+ Compiler* c = img->c;
+ u32 nrel = LinkRelocs_count(&img->relocs);
+ CoffRelocEntry* entries = NULL;
+ u32 nentries = 0;
+ u32 cap = 0;
+ u32 i;
+
+ if (!img->pie) {
+ reloc->bytes = NULL;
+ reloc->size = 0;
+ return;
+ }
+ for (i = 0; i < nrel; ++i) {
+ const LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
+ const LinkSection* ls;
+ u64 site_old_vaddr;
+ u32 site_rva;
+ u16 type;
+ if (!coff_reloc_needs_base_reloc(r->kind)) continue;
+ if (r->link_section_id == LINK_SEC_NONE) continue;
+ ls = &img->sections[r->link_section_id - 1];
+ /* r->write_vaddr is in the pre-relayout coordinate system (same as
+ * ls->vaddr), so the offset into the section is stable. Add the
+ * containing bucket's final RVA to land at the image RVA. */
+ site_old_vaddr = r->write_vaddr;
+ u8 sb = map[ls->id - 1].bucket;
+ site_rva = out[sb].rva + map[ls->id - 1].new_rva +
+ (u32)(site_old_vaddr - ls->vaddr);
+ if (r->kind == R_ABS64) {
+ type = (u16)IMAGE_REL_BASED_DIR64;
+ } else {
+ type = (u16)IMAGE_REL_BASED_HIGHLOW;
+ }
+ if (nentries == cap) {
+ (void)VEC_GROW(heap, entries, cap, nentries + 1u);
+ }
+ entries[nentries].site_rva = site_rva;
+ entries[nentries].type = type;
+ entries[nentries].pad = 0;
+ ++nentries;
+ }
+ /* Append caller-supplied extras (TLS directory absolute-VA fields,
+ * etc.). These are already site-RVAs in the final image. */
+ for (i = 0; i < n_extras; ++i) {
+ if (nentries == cap) {
+ (void)VEC_GROW(heap, entries, cap, nentries + 1u);
+ }
+ entries[nentries] = extras[i];
+ ++nentries;
+ }
+ if (nentries == 0) {
+ reloc->bytes = NULL;
+ reloc->size = 0;
+ if (entries) heap->free(heap, entries, cap * sizeof(*entries));
+ (void)c;
+ return;
+ }
+ /* Sort entries by RVA so we can group runs sharing a 4-KiB page. */
+ qsort(entries, nentries, sizeof(*entries), coff_reloc_entry_cmp);
+
+ /* Two-pass: first compute the total size (so we can allocate the
+ * blob exactly), then emit. */
+ u32 blob_size = 0;
+ u32 run_start = 0;
+ while (run_start < nentries) {
+ u32 page = entries[run_start].site_rva & ~0xfffu;
+ u32 run_end = run_start;
+ while (run_end < nentries &&
+ (entries[run_end].site_rva & ~0xfffu) == page) {
+ ++run_end;
+ }
+ u32 n = run_end - run_start;
+ u32 block = COFF_BASE_RELOCATION_SIZE + n * 2u;
+ block = (u32)ALIGN_UP((u64)block, 4ull);
+ blob_size += block;
+ run_start = run_end;
+ }
+ reloc->bytes = (u8*)heap->alloc(heap, blob_size, 4);
+ if (!reloc->bytes && blob_size)
+ compiler_panic(c, no_loc(), "link_emit_coff: oom on .reloc blob");
+ memset(reloc->bytes, 0, blob_size);
+ reloc->size = blob_size;
+ /* Stash allocation size for free path. */
+ reloc->size_raw = blob_size;
+
+ u32 cursor = 0;
+ run_start = 0;
+ while (run_start < nentries) {
+ u32 page = entries[run_start].site_rva & ~0xfffu;
+ u32 run_end = run_start;
+ while (run_end < nentries &&
+ (entries[run_end].site_rva & ~0xfffu) == page) {
+ ++run_end;
+ }
+ u32 n = run_end - run_start;
+ u32 raw_size = COFF_BASE_RELOCATION_SIZE + n * 2u;
+ u32 block = (u32)ALIGN_UP((u64)raw_size, 4ull);
+ u8* p = reloc->bytes + cursor;
+ wr_u32_le(p, page);
+ wr_u32_le(p + 4, block);
+ u32 k;
+ for (k = 0; k < n; ++k) {
+ u16 entry = (u16)(((u16)entries[run_start + k].type << 12) |
+ (entries[run_start + k].site_rva & 0xfffu));
+ wr_u16_le(p + 8 + k * 2u, entry);
+ }
+ /* Optional trailing pad: a single IMAGE_REL_BASED_ABSOLUTE (0). */
+ if (block > raw_size) {
+ wr_u16_le(p + 8 + n * 2u, 0);
+ }
+ cursor += block;
+ run_start = run_end;
+ }
+ heap->free(heap, entries, cap * sizeof(*entries));
+}
+
+/* Patch each LinkRelocApply against the PE-relayout coordinates and
+ * apply. `bucket_bytes[bucket]` gives the writable buffer for that
+ * bucket; the per-section delta in map[] turns the old in-section
+ * offsets into bucket-local offsets.
+ *
+ * Imported targets (LinkSymbol.imported == 1) have no vaddr of their
+ * own — instead the .idata pass populated `iv->by_sym[id-1]` with the
+ * function stub's vaddr (for callable imports) or the IAT slot's
+ * vaddr (for data imports). This is the spot where that table is
+ * consulted in lieu of the symbol's own zero vaddr. */
+static void coff_apply_all_relocs(LinkImage* img,
+ const CoffSection out[COFF_NBUCKETS],
+ const CoffSecMap* map,
+ const CoffImportVaddr* iv) {
+ Compiler* c = img->c;
+ u32 i;
+ u64 img_base = PE_IMAGE_BASE;
+ u32 nrel = LinkRelocs_count(&img->relocs);
+ for (i = 0; i < nrel; ++i) {
+ LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
+ const LinkSymbol* tgt = LinkSyms_at(&img->syms, r->target - 1);
+ const LinkSection* sec;
+ const LinkSection* tgt_sec;
+ u64 S, P;
+ u8* P_bytes;
+ u8 bucket;
+ u32 site_off_in_sec;
+ u32 site_bucket_off;
+ if (r->link_section_id == LINK_SEC_NONE) continue;
+ sec = &img->sections[r->link_section_id - 1];
+ bucket = map[sec->id - 1].bucket;
+ if (!out[bucket].has_file_bytes || !out[bucket].bytes) {
+ /* Shouldn't happen — .bss has no relocations. */
+ continue;
+ }
+ site_off_in_sec = (u32)(r->write_vaddr - sec->vaddr);
+ site_bucket_off = map[sec->id - 1].new_rva + site_off_in_sec;
+ P_bytes = out[bucket].bytes + site_bucket_off;
+ /* P = ImageBase + bucket_rva + map[].new_rva + site_off_in_sec
+ * — i.e. the final runtime address of the patch site. */
+ P = img_base + (u64)out[bucket].rva + (u64)map[sec->id - 1].new_rva +
+ site_off_in_sec;
+
+ /* Resolve S: target symbol's new image-relative address. Look up
+ * the LinkSection that contains the symbol's original vaddr, then
+ * apply that section's delta. */
+ if (tgt->imported) {
+ /* IAT-routed: stub vaddr (functions) / slot vaddr (data). */
+ if (!iv || iv->by_sym[r->target - 1u] == 0)
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: imported target lacks IAT slot");
+ S = iv->by_sym[r->target - 1u];
+ } else if (tgt->kind == SK_ABS) {
+ S = tgt->vaddr;
+ } else if (tgt->defined) {
+ tgt_sec = coff_symbol_section(img, tgt);
+ if (!tgt_sec) {
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: symbol vaddr 0x%llx has no "
+ "containing section",
+ (unsigned long long)tgt->vaddr);
+ }
+ u8 tb = map[tgt_sec->id - 1].bucket;
+ u64 sym_off = tgt->vaddr - tgt_sec->vaddr;
+ S = img_base + (u64)out[tb].rva + (u64)map[tgt_sec->id - 1].new_rva +
+ sym_off;
+ } else {
+ /* Undef and not imported — shouldn't survive resolve_undefs. */
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: unresolved non-imported symbol");
+ }
+ /* COFF-only section-relative kinds: the SECREL value is the
+ * symbol's offset from the start of its containing output section
+ * (PE bucket), and SECTION is the 1-based PE section index.
+ * link_reloc_apply only sees S and P, so we patch these inline
+ * before delegating common kinds. */
+ if (r->kind == R_COFF_SECREL || r->kind == R_COFF_SECTION ||
+ r->kind == R_COFF_AARCH64_SECREL_LOW12A ||
+ r->kind == R_COFF_AARCH64_SECREL_HIGH12A) {
+ if (!tgt->defined || tgt->kind == SK_ABS) {
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: COFF SECREL/SECTION requires a "
+ "defined section-bound target symbol");
+ }
+ u8 tb = map[tgt_sec->id - 1].bucket;
+ u64 sym_off_in_bucket =
+ (u64)map[tgt_sec->id - 1].new_rva + (tgt->vaddr - tgt_sec->vaddr);
+ if (r->kind == R_COFF_SECREL) {
+ u64 v = sym_off_in_bucket + (u64)r->addend;
+ wr_u32_le(P_bytes, (u32)(v & 0xffffffffu));
+ } else if (r->kind == R_COFF_SECTION) {
+ /* PE section indices are 1-based; buckets are 0-based, so add 1. */
+ wr_u16_le(P_bytes, (u16)((tb + 1u) & 0xffffu));
+ } else {
+ /* AArch64 SECREL_{LOW,HIGH}12A: patch the imm12 field of an
+ * existing ADD-imm12 instruction. LOW12A = bits [11:0] of the
+ * SECREL; HIGH12A = bits [23:12]. The instruction's sh bit was
+ * already set by the codegen (0 for LOW, 1 for HIGH). */
+ u64 v = sym_off_in_bucket + (u64)r->addend;
+ u32 imm12 = (r->kind == R_COFF_AARCH64_SECREL_HIGH12A)
+ ? (u32)((v >> 12) & 0xfffu)
+ : (u32)(v & 0xfffu);
+ u32 instr = rd_u32_le(P_bytes);
+ instr = (instr & ~(0xfffu << 10)) | (imm12 << 10);
+ wr_u32_le(P_bytes, instr);
+ }
+ continue;
+ }
+ link_reloc_apply(c, r->kind, P_bytes, S, r->addend, P);
+ }
+}
+
+/* ---- header marshalling ----
+ *
+ * Each helper streams its on-disk shape to the writer field-by-field;
+ * we avoid sizeof(struct) on the packed PE wire types since they carry
+ * implicit-padding hazards on hosts that disagree with #pragma pack(1)
+ * defaults. */
+
+static void coff_write_dos_stub(Writer* w) {
+ u8 buf[PE_DOS_HDR_SIZE];
+ memset(buf, 0, sizeof(buf));
+ /* e_magic ("MZ") + e_lfanew (offset of PE signature). All other
+ * legacy fields zero. */
+ buf[0] = (u8)(IMAGE_DOS_SIGNATURE & 0xffu);
+ buf[1] = (u8)((IMAGE_DOS_SIGNATURE >> 8) & 0xffu);
+ wr_u32_le(buf + 0x3c, PE_DOS_E_LFANEW);
+ cfree_writer_write(w, buf, sizeof(buf));
+}
+
+static void coff_write_file_header(Writer* w, u16 machine, u16 nsec,
+ u16 characteristics) {
+ coff_wr_u16(w, machine);
+ coff_wr_u16(w, nsec);
+ coff_wr_u32(w, 0u); /* TimeDateStamp */
+ coff_wr_u32(w, 0u); /* PointerToSymbolTable */
+ coff_wr_u32(w, 0u); /* NumberOfSymbols */
+ coff_wr_u16(w, (u16)PE_OPT_HDR_SIZE); /* SizeOfOptionalHeader */
+ coff_wr_u16(w, characteristics);
+}
+
+/* Per-section meta used by both the data-directory fill and the
+ * IMAGE_SECTION_HEADER emit. Compactly captures everything the writer
+ * needs to know about the four-or-five output sections. */
+typedef struct CoffOutHdr {
+ const char* name;
+ u32 vsize;
+ u32 rva;
+ u32 size_raw;
+ u32 file_offset;
+ u32 characteristics;
+} CoffOutHdr;
+
+static void coff_write_optional_header(Writer* w, u32 entry_rva,
+ const CoffSection out[COFF_NBUCKETS],
+ u32 headers_size_padded, u32 image_size,
+ int pie, u16 subsystem,
+ const CoffImportTable* it,
+ const CoffTlsLayout* tls) {
+ /* Standard fields. */
+ coff_wr_u16(w, IMAGE_NT_OPTIONAL_HDR64_MAGIC);
+ coff_wr_u8(w, PE_LINKER_MAJOR);
+ coff_wr_u8(w, PE_LINKER_MINOR);
+ /* SizeOfCode / SizeOfInitializedData / SizeOfUninitializedData. */
+ u32 size_code =
+ out[COFF_BUCKET_TEXT].in_image ? out[COFF_BUCKET_TEXT].size_raw : 0;
+ u32 size_init =
+ (out[COFF_BUCKET_RDATA].in_image ? out[COFF_BUCKET_RDATA].size_raw : 0) +
+ (out[COFF_BUCKET_DATA].in_image ? out[COFF_BUCKET_DATA].size_raw : 0);
+ u32 size_uninit =
+ out[COFF_BUCKET_BSS].in_image ? out[COFF_BUCKET_BSS].size : 0;
+ coff_wr_u32(w, size_code);
+ coff_wr_u32(w, size_init);
+ coff_wr_u32(w, size_uninit);
+ coff_wr_u32(w, entry_rva);
+ coff_wr_u32(w,
+ out[COFF_BUCKET_TEXT].in_image ? out[COFF_BUCKET_TEXT].rva : 0);
+ /* Windows-specific fields. */
+ coff_wr_u64(w, PE_IMAGE_BASE);
+ coff_wr_u32(w, PE_SECTION_ALIGNMENT);
+ coff_wr_u32(w, PE_FILE_ALIGNMENT);
+ coff_wr_u16(w, PE_OS_MAJOR);
+ coff_wr_u16(w, PE_OS_MINOR);
+ coff_wr_u16(w, 0u); /* MajorImageVersion */
+ coff_wr_u16(w, 0u); /* MinorImageVersion */
+ coff_wr_u16(w, PE_SUBSYS_MAJOR);
+ coff_wr_u16(w, PE_SUBSYS_MINOR);
+ coff_wr_u32(w, 0u); /* Win32VersionValue */
+ coff_wr_u32(w, image_size);
+ coff_wr_u32(w, headers_size_padded);
+ coff_wr_u32(w, 0u); /* CheckSum */
+ coff_wr_u16(w, subsystem ? subsystem : IMAGE_SUBSYSTEM_WINDOWS_CUI);
+ coff_wr_u16(w, PE_DLL_CHARS);
+ coff_wr_u64(w, PE_STACK_RESERVE);
+ coff_wr_u64(w, PE_STACK_COMMIT);
+ coff_wr_u64(w, PE_HEAP_RESERVE);
+ coff_wr_u64(w, PE_HEAP_COMMIT);
+ coff_wr_u32(w, 0u); /* LoaderFlags */
+ coff_wr_u32(w, (u32)PE_NUM_DATA_DIRS);
+ /* DataDirectory[16]. Populated entries:
+ * [1] IMPORT — descriptor table RVA + total descriptor bytes
+ * [5] BASERELOC — when PIE and .reloc is in the image
+ * [12] IAT — first IAT block RVA + sum of per-DLL IAT sizes
+ * Everything else stays zero. */
+ u32 i;
+ int has_idata = it && it->nimports > 0 && out[COFF_BUCKET_IDATA].in_image;
+ for (i = 0; i < PE_NUM_DATA_DIRS; ++i) {
+ if (i == IMAGE_DIRECTORY_ENTRY_IMPORT && has_idata) {
+ coff_wr_u32(w, out[COFF_BUCKET_IDATA].rva + it->desc_off);
+ coff_wr_u32(w, it->desc_size);
+ } else if (i == IMAGE_DIRECTORY_ENTRY_IAT && has_idata) {
+ coff_wr_u32(w, out[COFF_BUCKET_IDATA].rva + it->iat_base);
+ coff_wr_u32(w, it->iat_total);
+ } else if (i == IMAGE_DIRECTORY_ENTRY_BASERELOC && pie &&
+ out[COFF_BUCKET_RELOC].in_image) {
+ coff_wr_u32(w, out[COFF_BUCKET_RELOC].rva);
+ coff_wr_u32(w, out[COFF_BUCKET_RELOC].size);
+ } else if (i == IMAGE_DIRECTORY_ENTRY_TLS && tls && tls->present) {
+ coff_wr_u32(w, out[COFF_BUCKET_RDATA].rva + tls->dir_rdata_off);
+ coff_wr_u32(w, COFF_TLS_DIRECTORY64_SIZE);
+ } else {
+ coff_wr_u32(w, 0u);
+ coff_wr_u32(w, 0u);
+ }
+ }
+}
+
+static void coff_write_section_header(Writer* w, const char* name, u32 vsize,
+ u32 rva, u32 size_raw, u32 file_offset,
+ u32 characteristics) {
+ u8 nm[8] = {0, 0, 0, 0, 0, 0, 0, 0};
+ size_t n = slice_from_cstr(name).len;
+ if (n > 8) n = 8;
+ memcpy(nm, name, n);
+ cfree_writer_write(w, nm, 8);
+ coff_wr_u32(w, vsize);
+ coff_wr_u32(w, rva);
+ coff_wr_u32(w, size_raw);
+ coff_wr_u32(w, file_offset);
+ coff_wr_u32(w, 0u); /* PointerToRelocations */
+ coff_wr_u32(w, 0u); /* PointerToLinenumbers */
+ coff_wr_u16(w, 0u); /* NumberOfRelocations */
+ coff_wr_u16(w, 0u); /* NumberOfLinenumbers */
+ coff_wr_u32(w, characteristics);
+}
+
+/* ---- main entry ---- */
+
+void link_emit_coff(LinkImage* img, Writer* w) {
+ Heap* heap = img->heap;
+ Compiler* c = img->c;
+ u16 machine = coff_machine_or_panic(c);
+ if (img->entry_sym == LINK_SYM_NONE)
+ compiler_panic(c, no_loc(), "link_emit_coff: no resolved entry symbol");
+
+ /* ---- pass 1: build buckets + per-section delta map ---- */
+ CoffSection out[COFF_NBUCKETS];
+ CoffSecMap* map = (CoffSecMap*)heap->alloc(
+ heap, sizeof(CoffSecMap) * (img->nsections + 1u), _Alignof(CoffSecMap));
+ if (!map && img->nsections)
+ compiler_panic(c, no_loc(), "link_emit_coff: oom on section map");
+ memset(map, 0, sizeof(CoffSecMap) * (img->nsections + 1u));
+
+ /* coff_build_buckets stashes per-bucket allocation caps in size_raw;
+ * we read them back into a local before size_raw is recomputed by
+ * coff_assign_layout so the cleanup path can free with the right
+ * size. */
+ coff_build_buckets(img, out, map);
+ /* coff_build_buckets stashes per-bucket allocation caps in size_raw
+ * (the only bucket field we own for the duration of layout); read
+ * them out before coff_assign_layout overwrites the field. .reloc
+ * and .idata aren't touched by coff_build_buckets — their caps are
+ * filled in below once coff_build_reloc_section / coff_emit_idata
+ * run. */
+ u32 bucket_caps[COFF_NBUCKETS];
+ u32 b;
+ for (b = 0; b < COFF_NBUCKETS; ++b) bucket_caps[b] = out[b].size_raw;
+
+ /* ---- pass 1b: collect imports and reserve .idata + .text stubs ----
+ *
+ * Builds the per-DLL / per-import layout and appends one IAT-routing
+ * stub per imported function to the .text bucket. The .idata bucket
+ * size is set here (so it counts in nsec); the stub vaddrs and
+ * IAT-slot vaddrs are finalised after coff_assign_layout. */
+ CoffImportTable imports;
+ int have_imports = coff_collect_imports(img, &imports);
+ if (have_imports) {
+ coff_plan_idata_layout(img, &imports);
+ coff_append_stubs(img, &imports, &out[COFF_BUCKET_TEXT],
+ &bucket_caps[COFF_BUCKET_TEXT]);
+ /* Reserve the .idata bucket size so coff_assign_layout / nsec
+ * accounting sees it. Actual bytes are written by coff_emit_idata
+ * once the bucket RVA is known. */
+ out[COFF_BUCKET_IDATA].size = imports.idata_size;
+ }
+
+ /* ---- pass 1c: plan the TLS directory record ----
+ *
+ * If any SF_TLS sections survived, reserve 40 bytes at the tail of
+ * .rdata for the IMAGE_TLS_DIRECTORY64. Bytes are zeroed now and
+ * filled in by coff_emit_tls_dir once the bucket RVAs are final. */
+ CoffTlsLayout tls;
+ coff_plan_tls_layout(img, out, &bucket_caps[COFF_BUCKET_RDATA], &tls);
+
+ /* ---- pass 2: decide whether .reloc will be in the image ----
+ *
+ * The headers' file size (and therefore every section's file
+ * offset) depends on the section-table entry count, so we need to
+ * commit to "is .reloc emitted?" before laying out file offsets.
+ * .reloc lights up iff PIE and at least one absolute reloc points
+ * into a kept section, OR a TLS directory is emitted (its four u64
+ * VA fields all need base-relocs). */
+ int emit_reloc = 0;
+ if (img->pie) {
+ u32 i;
+ u32 nrel = LinkRelocs_count(&img->relocs);
+ for (i = 0; i < nrel; ++i) {
+ const LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
+ if (!coff_reloc_needs_base_reloc(r->kind)) continue;
+ if (r->link_section_id == LINK_SEC_NONE) continue;
+ emit_reloc = 1;
+ break;
+ }
+ if (!emit_reloc && tls.present) emit_reloc = 1;
+ }
+
+ u32 nsec = 0;
+ for (b = 0; b < COFF_NBUCKETS; ++b) {
+ if (b == COFF_BUCKET_RELOC) {
+ if (emit_reloc) ++nsec; /* tentative; size set below */
+ continue;
+ }
+ if (out[b].size) ++nsec;
+ }
+ u32 headers_size_unpadded = PE_DOS_HDR_SIZE + PE_SIG_SIZE + PE_FILE_HDR_SIZE +
+ PE_OPT_HDR_SIZE + nsec * PE_SECTION_HDR_SIZE;
+ u32 headers_size_padded =
+ (u32)ALIGN_UP((u64)headers_size_unpadded, (u64)PE_FILE_ALIGNMENT);
+
+ /* First layout pass: fixes RVAs / file offsets for buckets that
+ * already have a finalised size (.text, .rdata, .idata, .data, .bss).
+ * .reloc's RVA is provisional — it depends on .reloc's own size,
+ * which is still 0 at this point. */
+ (void)coff_assign_layout(out, headers_size_padded, PE_FIRST_SECTION_RVA);
+
+ /* ---- pass 2b: emit .idata bytes + per-arch IAT stubs ----
+ *
+ * The .idata bucket's RVA is final after the first assign_layout;
+ * stubs need it (the indirect-jump displacement targets an IAT slot)
+ * and .idata's own descriptor / ILT / IAT records all carry RVAs.
+ * coff_import_vaddr_build builds the per-LinkSymId override table
+ * that apply_all_relocs consults in place of the (zero) symbol
+ * vaddr for imported targets. */
+ CoffImportVaddr import_vaddr;
+ memset(&import_vaddr, 0, sizeof(import_vaddr));
+ if (have_imports) {
+ coff_emit_idata(img, &imports, out, &bucket_caps[COFF_BUCKET_IDATA]);
+ coff_emit_stubs(img, &imports, out);
+ coff_import_vaddr_build(img, &imports, out, &import_vaddr);
+ }
+
+ /* Write the TLS directory bytes now that bucket RVAs are final. */
+ coff_emit_tls_dir(img, out, map, &tls);
+
+ /* ---- pass 3: build .reloc using the now-final bucket RVAs ----
+ *
+ * coff_build_reloc_section reads out[bucket].rva indirectly via
+ * map[].new_rva + (write_vaddr - sec->vaddr) → site offset within
+ * the bucket; the absolute site_rva is bucket.rva + that offset.
+ * Patch site RVAs are page-quantised in the emitted blob, so this
+ * is the spot where the bucket RVAs need to be already final.
+ *
+ * TLS directory's four absolute-VA fields ride into the entries via
+ * the `extras` array — they aren't ordinary symbol relocations, so
+ * they don't show up in img->relocs. */
+ if (emit_reloc) {
+ CoffRelocEntry tls_extras[4];
+ u32 n_tls_extras = 0;
+ if (tls.present) {
+ u32 dir_rva = out[COFF_BUCKET_RDATA].rva + tls.dir_rdata_off;
+ static const u32 field_offs[4] = {
+ COFF_TLSDIR_OFF_START_ADDR,
+ COFF_TLSDIR_OFF_END_ADDR,
+ COFF_TLSDIR_OFF_INDEX_ADDR,
+ COFF_TLSDIR_OFF_CALLBACKS,
+ };
+ u32 k;
+ for (k = 0; k < 4; ++k) {
+ if (field_offs[k] == COFF_TLSDIR_OFF_CALLBACKS && !tls.callbacks_sym)
+ continue;
+ tls_extras[n_tls_extras].site_rva = dir_rva + field_offs[k];
+ tls_extras[n_tls_extras].type = (u16)IMAGE_REL_BASED_DIR64;
+ tls_extras[n_tls_extras].pad = 0;
+ ++n_tls_extras;
+ }
+ }
+ coff_build_reloc_section(img, out, map, &out[COFF_BUCKET_RELOC], tls_extras,
+ n_tls_extras);
+ bucket_caps[COFF_BUCKET_RELOC] = out[COFF_BUCKET_RELOC].size_raw;
+ /* size_raw was stashed by build; assign_layout below recomputes it
+ * as the FileAlignment-padded length. */
+ (void)coff_assign_layout(out, headers_size_padded, PE_FIRST_SECTION_RVA);
+ }
+
+ /* `_tls_used` is the public mingw/PE name for the TLS directory
+ * record. Keep it in lockstep with the optional-header TLS data
+ * directory, rather than leaving references bound to mingw's tlssup.o
+ * placeholder record. */
+ coff_define_tls_used(img, out, &tls);
+
+ /* ---- pass 4: resolve entry symbol's PE RVA ----
+ *
+ * Done before apply so the optional-header field has its final
+ * value. */
+ const LinkSymbol* entry_sym = LinkSyms_at(&img->syms, img->entry_sym - 1);
+ if (!entry_sym->defined || entry_sym->kind == SK_ABS)
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: entry symbol is not a defined "
+ "image-relative function");
+ const LinkSection* entry_sec = coff_section_at(img, entry_sym->vaddr);
+ if (!entry_sec)
+ compiler_panic(c, no_loc(),
+ "link_emit_coff: entry symbol has no containing "
+ "section");
+ u8 entry_bucket = map[entry_sec->id - 1].bucket;
+ u32 entry_rva = out[entry_bucket].rva + map[entry_sec->id - 1].new_rva +
+ (u32)(entry_sym->vaddr - entry_sec->vaddr);
+
+ /* ---- pass 5: apply all relocations into bucket bytes ---- */
+ coff_apply_all_relocs(img, out, map, have_imports ? &import_vaddr : NULL);
+
+ /* ---- pass 6: compute SizeOfImage (in-memory size) ---- */
+ u32 image_size = 0;
+ for (b = 0; b < COFF_NBUCKETS; ++b) {
+ if (!out[b].in_image) continue;
+ u32 end = out[b].rva + out[b].size;
+ if (end > image_size) image_size = end;
+ }
+ image_size = (u32)ALIGN_UP((u64)image_size, (u64)PE_SECTION_ALIGNMENT);
+
+ /* ---- pass 7: write everything ---- */
+ u16 file_chars = IMAGE_FILE_EXECUTABLE_IMAGE | IMAGE_FILE_LARGE_ADDRESS_AWARE;
+ if (!img->pie || !out[COFF_BUCKET_RELOC].in_image) {
+ file_chars |= IMAGE_FILE_RELOCS_STRIPPED;
+ }
+
+ coff_write_dos_stub(w);
+ /* PE signature. */
+ coff_wr_u32(w, IMAGE_NT_SIGNATURE);
+ coff_write_file_header(w, machine, (u16)nsec, file_chars);
+ u16 subsystem = img->linker ? img->linker->pe_subsystem : 0;
+ coff_write_optional_header(w, entry_rva, out, headers_size_padded, image_size,
+ img->pie, subsystem,
+ have_imports ? &imports : NULL, &tls);
+
+ /* Section table. */
+ for (b = 0; b < COFF_NBUCKETS; ++b) {
+ if (!out[b].in_image) continue;
+ coff_write_section_header(w, out[b].name, out[b].size, out[b].rva,
+ out[b].size_raw, out[b].file_offset,
+ out[b].characteristics);
+ }
+
+ /* Pad to first section's file offset. */
+ u64 cur = (u64)headers_size_unpadded;
+ u64 first_file_off = headers_size_padded;
+ if (cur < first_file_off) {
+ coff_write_zeroes(w, first_file_off - cur);
+ cur = first_file_off;
+ }
+
+ /* Section bodies. */
+ for (b = 0; b < COFF_NBUCKETS; ++b) {
+ if (!out[b].in_image) continue;
+ if (!out[b].has_file_bytes) continue;
+ if (cur < out[b].file_offset) {
+ coff_write_zeroes(w, out[b].file_offset - cur);
+ cur = out[b].file_offset;
+ }
+ cfree_writer_write(w, out[b].bytes, out[b].size);
+ cur += out[b].size;
+ if (out[b].size_raw > out[b].size) {
+ coff_write_zeroes(w, out[b].size_raw - out[b].size);
+ cur += out[b].size_raw - out[b].size;
+ }
+ }
+
+ /* ---- cleanup ---- */
+ for (b = 0; b < COFF_NBUCKETS; ++b) {
+ if (out[b].bytes) heap->free(heap, out[b].bytes, bucket_caps[b]);
+ }
+ heap->free(heap, map, sizeof(CoffSecMap) * (img->nsections + 1u));
+ if (have_imports) {
+ coff_import_vaddr_free(img, &import_vaddr);
+ coff_imports_free(img, &imports);
+ }
+}
diff --git a/src/obj/coff/read.c b/src/obj/coff/read.c
@@ -0,0 +1,739 @@
+/* PE/COFF .obj (IMAGE_FILE_HEADER + sections) reader. Parses a 64-bit
+ * little-endian relocatable object back into a fresh ObjBuilder. Peer
+ * of read_elf / read_macho; the post-finalize ObjBuilder shape is the
+ * canonical superset doc/DESIGN.md §5.5 promises: read_coff of an
+ * emit_coff output produces an ObjBuilder shape-equivalent to the
+ * writer's input, modulo synthesized SECTION symbols and the COMDAT
+ * section-definition aux records.
+ *
+ * Scope: IMAGE_FILE_MACHINE_AMD64 and IMAGE_FILE_MACHINE_ARM64. PE
+ * executables (with a non-zero SizeOfOptionalHeader) are rejected — a
+ * future read_coff_pe would handle those. Microsoft "short import"
+ * records (Sig1=0, Sig2=0xFFFF) found inside .lib archive members are
+ * detected at entry and dispatched to read_coff_short_import, which
+ * synthesizes a DSO-shaped ObjBuilder annotated with the providing
+ * DLL name via obj_set_coff_import_dll. */
+
+#include <string.h>
+
+#include "core/arena.h"
+#include "core/heap.h"
+#include "core/pool.h"
+#include "core/slice.h"
+#include "obj/coff/coff.h"
+#include "obj/format.h"
+
+static SrcLoc no_loc(void) {
+ SrcLoc l = {0, 0, 0};
+ return l;
+}
+
+/* ---- section-header scratch ---- */
+
+typedef struct CSecRec {
+ char raw_name[8];
+ u32 virtual_size;
+ u32 size_of_raw_data;
+ u32 pointer_to_raw_data;
+ u32 pointer_to_relocations;
+ u16 number_of_relocations;
+ u32 characteristics;
+ ObjSecId obj_sec; /* OBJ_SEC_NONE if skipped */
+} CSecRec;
+
+static void parse_shdr(const u8* p, CSecRec* out) {
+ memcpy(out->raw_name, p, 8);
+ out->virtual_size = coff_rd_u32(p + 8);
+ out->size_of_raw_data = coff_rd_u32(p + 16);
+ out->pointer_to_raw_data = coff_rd_u32(p + 20);
+ out->pointer_to_relocations = coff_rd_u32(p + 24);
+ out->number_of_relocations = coff_rd_u16(p + 32);
+ out->characteristics = coff_rd_u32(p + 36);
+ out->obj_sec = OBJ_SEC_NONE;
+}
+
+/* ---- string-table lookup (4-byte size prefix, NUL-terminated entries) ---- */
+
+static const char* strtab_lookup(const u8* tab, u32 tab_size, u32 off,
+ u32* len_out) {
+ if (off >= tab_size) {
+ *len_out = 0;
+ return "";
+ }
+ const char* s = (const char*)(tab + off);
+ u32 max = tab_size - off;
+ u32 n = 0;
+ while (n < max && s[n] != '\0') ++n;
+ *len_out = n;
+ return s;
+}
+
+/* Resolve a section/symbol short-or-long name into (ptr, len). COFF
+ * section names use the "/<decimal>" convention for >8-byte names; COFF
+ * symbol names use the (Zeroes==0, Offset) form instead. This helper
+ * handles the section form (8 raw bytes; leading '/' triggers strtab
+ * lookup). */
+static void resolve_section_name(const char raw[8], const u8* strtab,
+ u32 strtab_size, const char** name_out,
+ u32* len_out) {
+ if (raw[0] == '/') {
+ /* Parse decimal offset. Up to 7 ASCII digits. */
+ u32 off = 0;
+ for (u32 i = 1; i < 8 && raw[i] >= '0' && raw[i] <= '9'; ++i) {
+ off = off * 10u + (u32)(raw[i] - '0');
+ }
+ *name_out = strtab_lookup(strtab, strtab_size, off, len_out);
+ return;
+ }
+ /* Inline: up to 8 bytes, NUL-padded (not necessarily NUL-terminated). */
+ u32 n = 0;
+ while (n < 8 && raw[n] != '\0') ++n;
+ *name_out = raw;
+ *len_out = n;
+}
+
+/* ---- characteristics -> SecKind / SecFlag / SecSem ---- */
+
+static u16 coff_sec_kind(const char* name, u32 nlen, u32 ch) {
+ if (ch & IMAGE_SCN_CNT_UNINITIALIZED_DATA) return SEC_BSS;
+ if (ch & IMAGE_SCN_CNT_CODE) return SEC_TEXT;
+ if (ch & IMAGE_SCN_MEM_EXECUTE) return SEC_TEXT;
+ if (nlen >= 7 && memcmp(name, ".debug_", 7) == 0) return SEC_DEBUG;
+ /* The MS toolchain spells DWARF section names with a leading ".debug$"
+ * (CodeView) — keep ELF-style ".debug_" detection but also treat the
+ * MS form as debug. */
+ if (nlen >= 7 && memcmp(name, ".debug$", 7) == 0) return SEC_DEBUG;
+ if (ch & IMAGE_SCN_CNT_INITIALIZED_DATA) {
+ if (ch & IMAGE_SCN_MEM_WRITE) return SEC_DATA;
+ return SEC_RODATA;
+ }
+ return SEC_OTHER;
+}
+
+static u16 coff_sec_flags(const char* name, u32 nlen, u32 ch) {
+ u16 f = 0;
+ if (ch & IMAGE_SCN_MEM_READ) f |= SF_ALLOC;
+ if (ch & IMAGE_SCN_MEM_EXECUTE) f |= SF_EXEC;
+ if (ch & IMAGE_SCN_MEM_WRITE) f |= SF_WRITE;
+ if (ch & IMAGE_SCN_LNK_COMDAT) f |= SF_GROUP;
+ /* TLS sections in PE are spelled ".tls$<suffix>" (e.g. ".tls$", ".tls$ZZZ").
+ * There is no characteristics bit for TLS — detection is name-based. */
+ if (nlen >= 5 && memcmp(name, ".tls$", 5) == 0) f |= SF_TLS;
+ if (nlen == 4 && memcmp(name, ".tls", 4) == 0) f |= SF_TLS;
+ return f;
+}
+
+/* Bits 20..23 of Characteristics encode alignment as (log2(align)+1).
+ * 0 means "default"; we collapse to align=1 for round-trip purposes. */
+static u32 coff_sec_align(u32 ch) {
+ u32 n = (ch & IMAGE_SCN_ALIGN_MASK) >> 20;
+ if (n == 0) return 1;
+ return 1u << (n - 1u);
+}
+
+/* ---- symbol-name resolution ---- */
+
+static void resolve_sym_name(const u8* rec, const u8* strtab, u32 strtab_size,
+ const char** name_out, u32* len_out) {
+ /* ShortName: 8 bytes. If first 4 bytes are zero, second 4 bytes is
+ * the strtab offset (LongName form). */
+ u32 z = coff_rd_u32(rec + 0);
+ if (z == 0) {
+ u32 off = coff_rd_u32(rec + 4);
+ *name_out = strtab_lookup(strtab, strtab_size, off, len_out);
+ return;
+ }
+ u32 n = 0;
+ while (n < 8 && rec[n] != '\0') ++n;
+ *name_out = (const char*)rec;
+ *len_out = n;
+}
+
+/* ---- short-import record handler ----
+ * Microsoft "short import" format: a 20-byte ImportObjectHeader
+ * followed by SizeOfData bytes containing two NUL-terminated strings —
+ * the imported symbol name then the DLL name. These live as members
+ * of .lib archives (mingw's libkernel32.dll.a etc.) and stand in for
+ * a full long-form COFF import object.
+ *
+ * cfree-side model: synthesize a DSO-shaped ObjBuilder with the
+ * imported symbol defined at section_id = OBJ_SEC_NONE (the same
+ * shape read_coff_dso / read_elf_dso produce for an exported name),
+ * and stash the providing DLL name on the builder via
+ * obj_set_coff_import_dll so the archive-ingestion layer can route
+ * the resulting LinkInput as a DSO with this name as the soname.
+ *
+ * We also synthesize the `__imp_<name>` alias mingw codegen uses to
+ * spell explicit IAT-slot access; both names ultimately resolve to
+ * the same DLL export at link time. */
+static ObjBuilder* read_coff_short_import(Compiler* c, const char* name,
+ const u8* data, size_t len) {
+ if (len < COFF_IMPORT_OBJECT_HEADER_SIZE)
+ compiler_panic(c, no_loc(),
+ "read_coff: short-import record shorter than header");
+
+ /* Sig1 / Sig2 already checked by the caller. */
+ /* data + 4: Version (2 bytes, ignored). */
+ u16 machine = coff_rd_u16(data + 6);
+ /* data + 8: TimeDateStamp (4 bytes, ignored). */
+ u32 size_of_data = coff_rd_u32(data + 12);
+ u16 ordinal_or_hint = coff_rd_u16(data + 16);
+ u16 type_flags = coff_rd_u16(data + 18);
+
+ if ((u64)COFF_IMPORT_OBJECT_HEADER_SIZE + (u64)size_of_data > (u64)len)
+ compiler_panic(c, no_loc(),
+ "read_coff: short-import SizeOfData=%u extends past input "
+ "(len=%zu)",
+ size_of_data, len);
+
+ if (machine != IMAGE_FILE_MACHINE_AMD64 &&
+ machine != IMAGE_FILE_MACHINE_ARM64)
+ compiler_panic(c, no_loc(),
+ "read_coff: short-import unsupported machine %#x",
+ (u32)machine);
+
+ /* Decode TypeFlags bitfield (Type:2, NameType:3, Reserved:11). */
+ u32 import_type = (u32)(type_flags & 0x3u);
+ u32 name_type = (u32)((type_flags >> 2) & 0x7u);
+
+ /* Ordinal-only imports (NameType=IMPORT_OBJECT_ORDINAL) are not yet
+ * implemented in cfree. None of the mingw / llvm-mingw system import
+ * archives use this shape — every libfoo.a member in the supported
+ * sysroots imports by name — so refusing here is a clean diagnostic,
+ * not an internal panic. When a real consumer surfaces, the work is
+ * to thread the ordinal through link_resolve and into the PE import
+ * directory hint/name tables. */
+ if (name_type == IMPORT_OBJECT_ORDINAL)
+ compiler_panic(
+ c, no_loc(),
+ "read_coff: short-import by ordinal not implemented "
+ "(archive member \"%.*s\", ordinal %u). cfree links "
+ "imports by name only; rebuild the consumer to import "
+ "by name, or omit this archive from the link.",
+ SLICE_ARG(name ? slice_from_cstr(name) : SLICE_LIT("<unnamed>")),
+ (unsigned)ordinal_or_hint);
+
+ /* Symbol name: NUL-terminated starting at data + 20. */
+ const u8* body = data + COFF_IMPORT_OBJECT_HEADER_SIZE;
+ u32 sym_name_max = size_of_data;
+ u32 sym_name_len = 0;
+ while (sym_name_len < sym_name_max && body[sym_name_len] != '\0')
+ ++sym_name_len;
+ if (sym_name_len == sym_name_max)
+ compiler_panic(c, no_loc(),
+ "read_coff: short-import symbol name not NUL-terminated");
+
+ /* DLL name: NUL-terminated starting after the symbol name's NUL. */
+ u32 dll_name_off = sym_name_len + 1u;
+ if (dll_name_off >= size_of_data)
+ compiler_panic(c, no_loc(), "read_coff: short-import missing DLL name");
+ const u8* dll_p = body + dll_name_off;
+ u32 dll_name_max = size_of_data - dll_name_off;
+ u32 dll_name_len = 0;
+ while (dll_name_len < dll_name_max && dll_p[dll_name_len] != '\0')
+ ++dll_name_len;
+ if (dll_name_len == dll_name_max)
+ compiler_panic(c, no_loc(),
+ "read_coff: short-import DLL name not NUL-terminated");
+
+ ObjBuilder* ob = obj_new(c);
+ if (!ob) compiler_panic(c, no_loc(), "read_coff: obj_new failed");
+
+ /* Pick SymKind by import type: CODE -> function, DATA/CONST -> object.
+ * Both are defined at section_id=OBJ_SEC_NONE, value=0, size=0 — the
+ * shape read_coff_dso would produce for a DLL export. */
+ SymKind k = (import_type == IMPORT_OBJECT_CODE) ? SK_FUNC : SK_OBJ;
+
+ Sym sn = pool_intern_slice(
+ c->global, (Slice){.s = (const char*)body, .len = sym_name_len});
+ ObjSymId id =
+ obj_symbol_ex(ob, sn, SB_GLOBAL, SV_DEFAULT, k, OBJ_SEC_NONE, 0, 0, 0);
+ obj_sym_mark_referenced(ob, id);
+
+ /* `__imp_<name>` alias for codegen that refers to the IAT slot
+ * directly (mingw convention). Even code imports use an object-like
+ * `__imp_` symbol because references to it want the IAT data slot, not
+ * the callable import stub. */
+ static const char kImpPrefix[] = "__imp_";
+ u32 imp_len = (u32)(sizeof kImpPrefix - 1u) + sym_name_len;
+ char* imp_buf = arena_array(c->scratch, char, imp_len);
+ memcpy(imp_buf, kImpPrefix, sizeof kImpPrefix - 1u);
+ memcpy(imp_buf + (sizeof kImpPrefix - 1u), body, sym_name_len);
+ Sym imp_sn =
+ pool_intern_slice(c->global, (Slice){.s = imp_buf, .len = imp_len});
+ ObjSymId imp_id = obj_symbol_ex(ob, imp_sn, SB_GLOBAL, SV_DEFAULT, SK_OBJ,
+ OBJ_SEC_NONE, 0, 0, 0);
+ obj_sym_mark_referenced(ob, imp_id);
+
+ /* Stash the DLL name so the archive-ingestion layer (Phase 4.3) can
+ * route this builder as a DSO with the DLL as soname. */
+ Sym dll_sn = pool_intern_slice(
+ c->global, (Slice){.s = (const char*)dll_p, .len = dll_name_len});
+ obj_set_coff_import_dll(ob, dll_sn);
+
+ (void)name_type;
+ obj_finalize(ob);
+ return ob;
+}
+
+ObjBuilder* read_coff(Compiler* c, const char* name, const u8* data,
+ size_t len) {
+ (void)name;
+
+ /* ---- Step 0: header validation ---- */
+ if (len < COFF_FILE_HEADER_SIZE)
+ compiler_panic(c, no_loc(), "read_coff: input shorter than COFF header");
+
+ /* Microsoft short-import record? (Sig1=0, Sig2=0xFFFF.) These live
+ * as members of .lib archives and stand in for a long-form import
+ * object. Detect at entry; the rest of read_coff assumes the
+ * input is a real IMAGE_FILE_HEADER. */
+ if (len >= 4 && coff_rd_u16(data + 0) == IMPORT_OBJECT_HDR_SIG1 &&
+ coff_rd_u16(data + 2) == IMPORT_OBJECT_HDR_SIG2) {
+ return read_coff_short_import(c, name, data, len);
+ }
+
+ u16 machine = coff_rd_u16(data + 0);
+ u16 nsections = coff_rd_u16(data + 2);
+ /* data + 4: TimeDateStamp (4 bytes, ignored). */
+ u32 ptr_to_symtab = coff_rd_u32(data + 8);
+ u32 nsymbols = coff_rd_u32(data + 12);
+ u16 size_opt_hdr = coff_rd_u16(data + 16);
+ /* data + 18: Characteristics (2 bytes, currently ignored). */
+
+ if (size_opt_hdr != 0)
+ compiler_panic(c, no_loc(),
+ "read_coff: input has optional header (size=%u); "
+ "use read_coff_pe for executables",
+ (u32)size_opt_hdr);
+
+ if (machine != IMAGE_FILE_MACHINE_AMD64 &&
+ machine != IMAGE_FILE_MACHINE_ARM64 &&
+ machine != IMAGE_FILE_MACHINE_ARM64EC)
+ compiler_panic(c, no_loc(), "read_coff: unsupported machine %#x",
+ (u32)machine);
+
+ const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_COFF);
+ const ObjCoffArchOps* coff =
+ fmt && fmt->coff_machine ? fmt->coff_machine(machine) : NULL;
+ if (!coff || !coff->reloc_from)
+ compiler_panic(c, no_loc(), "read_coff: no arch impl for machine %#x",
+ (u32)machine);
+ u32 (*reloc_from)(u32) = coff->reloc_from;
+
+ if ((u64)COFF_FILE_HEADER_SIZE +
+ (u64)nsections * (u64)COFF_SECTION_HEADER_SIZE >
+ (u64)len)
+ compiler_panic(c, no_loc(), "read_coff: section header table out of range");
+
+ /* ---- Step 1: bootstrap, locate strtab ---- */
+ /* Strtab is at PointerToSymbolTable + NumberOfSymbols * 18. When the
+ * file has no symbol table (ptr=0, n=0) we treat strtab as empty. */
+ const u8* strtab = NULL;
+ u32 strtab_size = 0;
+ if (ptr_to_symtab && nsymbols) {
+ u64 symtab_end = (u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYMBOL_SIZE;
+ if (symtab_end + COFF_STRTAB_SIZE_FIELD_BYTES > (u64)len)
+ compiler_panic(c, no_loc(),
+ "read_coff: symbol table / strtab header out of range");
+ u32 declared = coff_rd_u32(data + symtab_end);
+ /* The size field is inclusive of the 4-byte prefix; treat <4 as
+ * "empty" (some tools write 0). */
+ if (declared < COFF_STRTAB_SIZE_FIELD_BYTES) declared = 0;
+ if (declared) {
+ if (symtab_end + (u64)declared > (u64)len)
+ compiler_panic(c, no_loc(), "read_coff: strtab body out of range");
+ strtab = data + symtab_end;
+ strtab_size = declared;
+ } else {
+ strtab = data + symtab_end;
+ strtab_size = COFF_STRTAB_SIZE_FIELD_BYTES;
+ }
+ }
+
+ ObjBuilder* ob = obj_new(c);
+ if (!ob) compiler_panic(c, no_loc(), "read_coff: obj_new failed");
+
+ /* ---- Step 2: ingest sections ---- */
+ CSecRec* secs = arena_array(c->scratch, CSecRec, nsections ? nsections : 1);
+ const u8* shdr_base = data + COFF_FILE_HEADER_SIZE;
+ for (u32 i = 0; i < nsections; ++i) {
+ CSecRec* s = &secs[i];
+ parse_shdr(shdr_base + (u64)i * COFF_SECTION_HEADER_SIZE, s);
+
+ const char* nm;
+ u32 nlen;
+ resolve_section_name(s->raw_name, strtab, strtab_size, &nm, &nlen);
+ Sym sn = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen});
+
+ u16 kind = coff_sec_kind(nm, nlen, s->characteristics);
+ u16 flags = coff_sec_flags(nm, nlen, s->characteristics);
+ u32 align = coff_sec_align(s->characteristics);
+
+ int is_bss = (s->characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA) != 0;
+ u16 sem = is_bss ? SSEM_NOBITS : SSEM_PROGBITS;
+
+ ObjSecId id = obj_section_ex(ob, sn, (SecKind)kind, (SecSem)sem, flags,
+ align, 0u, 0u, 0u);
+ if (id == OBJ_SEC_NONE)
+ compiler_panic(c, no_loc(),
+ "read_coff: obj_section_ex failed for section %u", i);
+ s->obj_sec = id;
+
+ /* Preserve raw Characteristics so emit_coff can write back any bits
+ * the canonical SecFlag/SecSem mapping doesn't model (LNK_INFO,
+ * LNK_REMOVE, MEM_DISCARDABLE, MEM_SHARED, GPREL, alignment nibble). */
+ obj_section_set_ext(ob, id, OBJ_EXT_COFF, s->characteristics, 0);
+
+ if (is_bss) {
+ u32 bss_size = s->virtual_size ? s->virtual_size : s->size_of_raw_data;
+ obj_reserve_bss(ob, id, bss_size, align);
+ } else if (s->size_of_raw_data) {
+ u64 end = (u64)s->pointer_to_raw_data + (u64)s->size_of_raw_data;
+ if (end > (u64)len)
+ compiler_panic(c, no_loc(), "read_coff: section %u bytes out of range",
+ i);
+ u8* dst = obj_reserve(ob, id, s->size_of_raw_data);
+ memcpy(dst, data + s->pointer_to_raw_data, s->size_of_raw_data);
+ }
+ }
+
+ /* ---- Step 3: ingest symbols (with aux-record awareness) ----
+ * sym_to_obj is indexed by RAW symbol-table index (including aux
+ * slots), so reloc.SymbolTableIndex resolves directly without
+ * adjusting for skipped aux records. Aux slots map to OBJ_SYM_NONE. */
+ ObjSymId* sym_to_obj =
+ arena_zarray(c->scratch, ObjSymId, nsymbols ? nsymbols : 1);
+
+ /* Track section-symbol primary symtab index per section, stored as
+ * (raw_index + 1) so 0 can mean "not seen yet" without colliding
+ * with the (legitimate) first symbol-table slot — emit_coff always
+ * lays the first section's section-symbol at index 0. */
+ u32* sec_sym_primary = arena_zarray(c->scratch, u32, nsections + 1u);
+
+ const u8* sym_base = data + ptr_to_symtab;
+ if (nsymbols) {
+ if ((u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYMBOL_SIZE > (u64)len)
+ compiler_panic(c, no_loc(), "read_coff: symbol table body out of range");
+ }
+
+ for (u32 i = 0; i < nsymbols;) {
+ const u8* p = sym_base + (u64)i * COFF_SYMBOL_SIZE;
+ const char* nm;
+ u32 nlen;
+ resolve_sym_name(p, strtab, strtab_size, &nm, &nlen);
+
+ u32 value = coff_rd_u32(p + 8);
+ i16 sec_num = (i16)coff_rd_u16(p + 12);
+ u16 type = coff_rd_u16(p + 14);
+ u8 sclass = p[16];
+ u8 naux = p[17];
+
+ /* FILE storage class: concatenate aux records' raw bytes (each
+ * 18 bytes, NUL-padded) for the source-file name. */
+ if (sclass == IMAGE_SYM_CLASS_FILE) {
+ /* Build name from aux records (up to naux*18 bytes); fall back
+ * to the primary record's name if naux==0. */
+ const char* fnm = nm;
+ u32 fnlen = nlen;
+ if (naux) {
+ /* Each aux record's 18 bytes are interpreted as raw file-name
+ * bytes; concatenate then trim trailing NULs. */
+ u32 total = (u32)naux * COFF_SYMBOL_SIZE;
+ if ((u64)i + 1u + (u64)naux > (u64)nsymbols)
+ compiler_panic(c, no_loc(),
+ "read_coff: FILE aux records extend past symbol "
+ "table");
+ const u8* aux = p + COFF_SYMBOL_SIZE;
+ u32 n = 0;
+ while (n < total && aux[n] != '\0') ++n;
+ fnm = (const char*)aux;
+ fnlen = n;
+ }
+ Sym fsn =
+ fnlen ? pool_intern_slice(c->global, (Slice){.s = fnm, .len = fnlen})
+ : 0;
+ ObjSymId id = obj_symbol_ex(ob, fsn, SB_LOCAL, SV_DEFAULT, SK_FILE,
+ OBJ_SEC_NONE, 0, 0, 0);
+ obj_sym_mark_referenced(ob, id);
+ sym_to_obj[i] = id;
+ i += 1u + naux;
+ continue;
+ }
+
+ /* Skip .bf/.ef debug pair primaries (FUNCTION storage class) and
+ * the END_OF_FUNCTION marker: they carry no symbol cfree models. */
+ if (sclass == IMAGE_SYM_CLASS_FUNCTION ||
+ sclass == IMAGE_SYM_CLASS_END_OF_FUNCTION) {
+ sym_to_obj[i] = OBJ_SYM_NONE;
+ i += 1u + naux;
+ continue;
+ }
+
+ /* Resolve (bind, vis, kind, section_id, value, size, cmnalign). */
+ SymBind bind = SB_LOCAL;
+ SymVis vis = SV_DEFAULT;
+ SymKind kind = SK_NOTYPE;
+ ObjSecId target_sec = OBJ_SEC_NONE;
+ u64 sym_value = 0;
+ u64 sym_size = 0;
+ u64 cmnalign = 0;
+
+ if (sec_num == IMAGE_SYM_UNDEFINED) {
+ /* Undef or common. EXTERNAL with Value > 0 is a common. */
+ if (sclass == IMAGE_SYM_CLASS_EXTERNAL && value > 0) {
+ bind = SB_GLOBAL;
+ kind = SK_COMMON;
+ sym_size = value;
+ cmnalign = 1; /* COFF doesn't carry per-common alignment */
+ } else {
+ bind = (sclass == IMAGE_SYM_CLASS_WEAK_EXTERNAL) ? SB_WEAK
+ : (sclass == IMAGE_SYM_CLASS_EXTERNAL) ? SB_GLOBAL
+ : SB_LOCAL;
+ kind = SK_UNDEF;
+ }
+ } else if (sec_num == IMAGE_SYM_ABSOLUTE) {
+ kind = SK_ABS;
+ sym_value = value;
+ bind = (sclass == IMAGE_SYM_CLASS_EXTERNAL) ? SB_GLOBAL : SB_LOCAL;
+ } else if (sec_num == IMAGE_SYM_DEBUG) {
+ /* Defined-in-debug — cfree has no model for it. Skip with an
+ * OBJ_SYM_NONE entry; relocations against this slot will resolve
+ * to OBJ_SYM_NONE, which obj_reloc_ex tolerates. */
+ sym_to_obj[i] = OBJ_SYM_NONE;
+ i += 1u + naux;
+ continue;
+ } else if (sec_num >= 1 && (u32)sec_num <= nsections) {
+ target_sec = secs[sec_num - 1].obj_sec;
+ sym_value = value;
+ switch (sclass) {
+ case IMAGE_SYM_CLASS_EXTERNAL:
+ bind = SB_GLOBAL;
+ break;
+ case IMAGE_SYM_CLASS_WEAK_EXTERNAL:
+ bind = SB_WEAK;
+ break;
+ case IMAGE_SYM_CLASS_STATIC:
+ case IMAGE_SYM_CLASS_LABEL:
+ default:
+ bind = SB_LOCAL;
+ break;
+ }
+
+ /* Detect SECTION symbols: STATIC, Value==0, name matches the
+ * section's own name, and the section has at least one aux
+ * record (the section-definition aux). Mark as SK_SECTION so
+ * emit_coff regenerates the synthetic entry. */
+ int is_section_sym = 0;
+ if (sclass == IMAGE_SYM_CLASS_STATIC && value == 0 && naux >= 1) {
+ const CSecRec* cs = &secs[sec_num - 1];
+ u32 raw_nlen = 0;
+ while (raw_nlen < 8 && cs->raw_name[raw_nlen] != '\0') ++raw_nlen;
+ if (raw_nlen == nlen && memcmp(cs->raw_name, nm, nlen) == 0) {
+ is_section_sym = 1;
+ } else if (cs->raw_name[0] == '/') {
+ /* Long-named section: compare the resolved name. */
+ const char* rn;
+ u32 rnlen;
+ resolve_section_name(cs->raw_name, strtab, strtab_size, &rn, &rnlen);
+ if (rnlen == nlen && memcmp(rn, nm, nlen) == 0) is_section_sym = 1;
+ }
+ }
+
+ if (is_section_sym) {
+ kind = SK_SECTION;
+ sec_sym_primary[sec_num] = i + 1u;
+ } else if (sclass == IMAGE_SYM_CLASS_SECTION) {
+ kind = SK_SECTION;
+ } else if (sclass == IMAGE_SYM_CLASS_LABEL) {
+ kind = SK_NOTYPE;
+ } else if ((type >> 8) == IMAGE_SYM_DTYPE_FUNCTION) {
+ kind = SK_FUNC;
+ } else if (type == IMAGE_SYM_TYPE_NULL) {
+ kind = (bind == SB_LOCAL) ? SK_NOTYPE : SK_OBJ;
+ } else {
+ kind = SK_OBJ;
+ }
+ } else {
+ compiler_panic(c, no_loc(),
+ "read_coff: symbol section number %d out of range",
+ (int)sec_num);
+ }
+
+ /* WEAK_EXTERNAL primary: aux record carries TagIndex + Characteristics.
+ * cfree's model has SB_WEAK; the fallback symbol is link-time
+ * resolution by name and we drop the explicit index. */
+ if (sclass == IMAGE_SYM_CLASS_WEAK_EXTERNAL) bind = SB_WEAK;
+
+ Sym sn =
+ nlen ? pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}) : 0;
+ ObjSymId id = obj_symbol_ex(ob, sn, bind, vis, kind, target_sec, sym_value,
+ sym_size, cmnalign);
+ obj_sym_mark_referenced(ob, id);
+ sym_to_obj[i] = id;
+ i += 1u + naux;
+ }
+
+ /* ---- Step 4: stitch COMDAT groups from section-definition aux ----
+ * Each COMDAT section has a STATIC primary symbol (the section
+ * symbol) followed by one section-definition aux record. Selection
+ * != 0 marks the section as a COMDAT member; the signature symbol
+ * is the section symbol itself (Number field's selection variant
+ * controls dedup policy at link time). */
+ for (u32 s = 1; s <= nsections; ++s) {
+ u32 prim_plus1 = sec_sym_primary[s];
+ if (!prim_plus1) continue;
+ u32 prim = prim_plus1 - 1u;
+ const CSecRec* cs = &secs[s - 1];
+ if (!(cs->characteristics & IMAGE_SCN_LNK_COMDAT)) continue;
+ const u8* p = sym_base + (u64)prim * COFF_SYMBOL_SIZE;
+ u8 naux = p[17];
+ if (!naux) continue;
+ const u8* aux = p + COFF_SYMBOL_SIZE;
+ /* Aux layout: Length(4), NumberOfRelocations(2), NumberOfLinenumbers(2),
+ * CheckSum(4), Number(2), Selection(1), Unused(3). */
+ u16 assoc_number = coff_rd_u16(aux + 12);
+ u8 selection = aux[14];
+ if (selection == 0) continue;
+
+ ObjSymId sig = sym_to_obj[prim];
+ const ObjSym* sigsym = obj_symbol_get(ob, sig);
+ Sym gname = sigsym ? sigsym->name : 0;
+ ObjGroupId gid = obj_group(ob, gname, sig, (u32)selection);
+ obj_group_add_section(ob, gid, cs->obj_sec);
+ obj_section_set_group(ob, cs->obj_sec, gid);
+
+ /* ASSOCIATIVE: the COMDAT member is associated with another
+ * section's group. Add this section to that group's list too so
+ * dead-strip keeps them paired. */
+ if (selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE && assoc_number >= 1 &&
+ (u32)assoc_number <= nsections) {
+ u32 other_prim_plus1 = sec_sym_primary[assoc_number];
+ if (other_prim_plus1) {
+ u32 other_prim = other_prim_plus1 - 1u;
+ const u8* op = sym_base + (u64)other_prim * COFF_SYMBOL_SIZE;
+ if (op[17]) {
+ const u8* oaux = op + COFF_SYMBOL_SIZE;
+ u8 osel = oaux[14];
+ if (osel != 0) {
+ ObjSymId osig = sym_to_obj[other_prim];
+ const ObjSym* osigsym = obj_symbol_get(ob, osig);
+ Sym ogname = osigsym ? osigsym->name : 0;
+ ObjGroupId ogid = obj_group(ob, ogname, osig, (u32)osel);
+ obj_group_add_section(ob, ogid, cs->obj_sec);
+ }
+ }
+ }
+ }
+ }
+
+ /* ---- Step 5: per-section relocations ---- */
+ for (u32 i = 0; i < nsections; ++i) {
+ const CSecRec* s = &secs[i];
+ if (!s->number_of_relocations) continue;
+ u64 reloc_end = (u64)s->pointer_to_relocations +
+ (u64)s->number_of_relocations * (u64)COFF_RELOC_SIZE;
+ if (reloc_end > (u64)len)
+ compiler_panic(c, no_loc(),
+ "read_coff: relocation table for section %u out of range",
+ i);
+ const u8* rbase = data + s->pointer_to_relocations;
+ for (u32 j = 0; j < s->number_of_relocations; ++j) {
+ const u8* rp = rbase + (u64)j * COFF_RELOC_SIZE;
+ u32 r_va = coff_rd_u32(rp + 0);
+ u32 r_sym = coff_rd_u32(rp + 4);
+ u16 r_type = coff_rd_u16(rp + 8);
+
+ u32 kind = reloc_from(r_type);
+ if (kind == (u32)-1)
+ compiler_panic(c, no_loc(),
+ "read_coff: unsupported reloc type %u for machine %#x",
+ (u32)r_type, (u32)machine);
+
+ ObjSymId target = OBJ_SYM_NONE;
+ if (r_sym < nsymbols) target = sym_to_obj[r_sym];
+
+ /* AMD64 REL32 encodings are relative to a PC after the relocated
+ * field, while cfree's R_PC32-style apply formula subtracts the
+ * relocation field address P. Plain REL32 is relative to P+4;
+ * REL32_N is relative to P+N. Record that convention as an
+ * implicit negative addend so link_reloc_apply can stay format
+ * neutral. */
+ /* ARM64 PAGEOFFSET_12L is one wire code for LDST{8,16,32,64,128}.
+ * The per-arch translator returns R_AARCH64_LDST64_ABS_LO12_NC by
+ * default; recover the actual access width from the patched LDR/
+ * STR instruction's size field at bits [31:30] (and a SIMD/FP
+ * extension via bit 26 + opc[23]) so the linker applies the right
+ * scale. Mismatch panics at apply-time with "misaligned
+ * address" otherwise — see link_reloc.c. */
+ if ((machine == IMAGE_FILE_MACHINE_ARM64 ||
+ machine == IMAGE_FILE_MACHINE_ARM64EC) &&
+ r_type == IMAGE_REL_ARM64_PAGEOFFSET_12L && s->size_of_raw_data &&
+ (u64)r_va + 4u <= (u64)s->size_of_raw_data) {
+ const u8* ibytes = data + s->pointer_to_raw_data + r_va;
+ u32 instr = (u32)ibytes[0] | ((u32)ibytes[1] << 8) |
+ ((u32)ibytes[2] << 16) | ((u32)ibytes[3] << 24);
+ u32 sz = (instr >> 30) & 0x3u;
+ int is_simd = (instr >> 26) & 0x1u;
+ if (is_simd && ((instr >> 23) & 0x1u)) {
+ kind = R_AARCH64_LDST128_ABS_LO12_NC;
+ } else {
+ switch (sz) {
+ case 0:
+ kind = R_AARCH64_LDST8_ABS_LO12_NC;
+ break;
+ case 1:
+ kind = R_AARCH64_LDST16_ABS_LO12_NC;
+ break;
+ case 2:
+ kind = R_AARCH64_LDST32_ABS_LO12_NC;
+ break;
+ default:
+ kind = R_AARCH64_LDST64_ABS_LO12_NC;
+ break;
+ }
+ }
+ }
+
+ i64 addend = 0;
+ int has_explicit = 0;
+ if (machine == IMAGE_FILE_MACHINE_AMD64) {
+ switch (r_type) {
+ case IMAGE_REL_AMD64_REL32:
+ addend = -4;
+ has_explicit = 1;
+ break;
+ case IMAGE_REL_AMD64_REL32_1:
+ addend = -1;
+ has_explicit = 1;
+ break;
+ case IMAGE_REL_AMD64_REL32_2:
+ addend = -2;
+ has_explicit = 1;
+ break;
+ case IMAGE_REL_AMD64_REL32_3:
+ addend = -3;
+ has_explicit = 1;
+ break;
+ case IMAGE_REL_AMD64_REL32_4:
+ addend = -4;
+ has_explicit = 1;
+ break;
+ case IMAGE_REL_AMD64_REL32_5:
+ addend = -5;
+ has_explicit = 1;
+ break;
+ default:
+ break;
+ }
+ }
+
+ obj_reloc_ex(ob, s->obj_sec, r_va, (RelocKind)kind, target, addend,
+ has_explicit, 0);
+ }
+ }
+
+ /* ---- Step 6: finalize and return ---- */
+ obj_finalize(ob);
+ return ob;
+}
diff --git a/src/obj/coff/read_dso.c b/src/obj/coff/read_dso.c
@@ -0,0 +1,237 @@
+/* PE32+ DLL reader. Peer of read_elf_dso / read_macho_dso: walks the
+ * IMAGE_DIRECTORY_ENTRY_EXPORT data directory of a Windows .dll and
+ * produces an ObjBuilder of defined OBJ_SEC_NONE symbols — one per
+ * name in the Export Name Table. The DLL's own Name string (the
+ * analogue of DT_SONAME / LC_ID_DYLIB) is returned via *soname_out.
+ *
+ * The produced ObjBuilder carries no sections, relocations, or groups
+ * — DSO inputs contribute no bytes to the link. The consumer's
+ * resolve_undefs pass sees the exports as defined globals and marks
+ * matching consumer-side undefs as `imported`; the import-table
+ * emitter (Phase 3 / 4.4) later groups them by providing DLL.
+ *
+ * Scope: PE32+ only (IMAGE_NT_OPTIONAL_HDR64_MAGIC), AMD64 or ARM64,
+ * with IMAGE_FILE_DLL set. Ordinal-only exports (entries present in
+ * the EAT but absent from the ENT) are not synthesized in v1 — almost
+ * all real-world imports are by name. Forwarder entries (EAT RVA
+ * falls within the export directory's own range) are still emitted as
+ * symbols so the linker can satisfy imports against them; the OS
+ * loader follows the forwarder chain at runtime. This contract is
+ * pinned by test/coff/pe-dso-forwarder.c. */
+
+#include <string.h>
+
+#include "core/arena.h"
+#include "core/heap.h"
+#include "core/pool.h"
+#include "core/slice.h"
+#include "obj/coff/coff.h"
+
+static SrcLoc no_loc(void) {
+ SrcLoc l = {0, 0, 0};
+ return l;
+}
+
+/* ---- RVA -> file offset ----
+ * Walks the section table once per call. Returns 1 on success and
+ * fills *off_out; returns 0 if the RVA falls outside every section's
+ * [VirtualAddress, VirtualAddress + max(VirtualSize, SizeOfRawData))
+ * range or the resulting file offset would exceed `len`. */
+static int rva_to_offset(const u8* shdrs, u16 nsec, u32 rva, size_t len,
+ u64* off_out) {
+ for (u16 i = 0; i < nsec; ++i) {
+ const u8* sh = shdrs + (u64)i * COFF_SECTION_HEADER_SIZE;
+ u32 vsize = coff_rd_u32(sh + 8);
+ u32 vaddr = coff_rd_u32(sh + 12);
+ u32 raw_size = coff_rd_u32(sh + 16);
+ u32 raw_ptr = coff_rd_u32(sh + 20);
+ /* Some linkers leave VirtualSize == 0 in objects; use raw_size as
+ * a fallback so we still resolve RVAs in well-formed images. */
+ u32 span = vsize ? vsize : raw_size;
+ if (rva >= vaddr && rva < vaddr + span) {
+ u64 delta = (u64)(rva - vaddr);
+ if (delta >= raw_size) return 0; /* RVA past on-disk data */
+ u64 off = (u64)raw_ptr + delta;
+ if (off >= len) return 0;
+ *off_out = off;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* Read a NUL-terminated string starting at `off`, bounded by `len`.
+ * Returns the string length (excluding NUL); writes the pointer to
+ * *out. Returns 0 if off is out of range or the string is not
+ * terminated within the file. */
+static u32 read_cstr(const u8* data, size_t len, u64 off, const char** out) {
+ if (off >= len) {
+ *out = "";
+ return 0;
+ }
+ const char* s = (const char*)(data + off);
+ u64 max = (u64)len - off;
+ u64 n = 0;
+ while (n < max && s[n] != '\0') ++n;
+ if (n == max) {
+ *out = "";
+ return 0;
+ } /* unterminated */
+ *out = s;
+ return (u32)n;
+}
+
+ObjBuilder* read_coff_dso(Compiler* c, const char* name, const u8* data,
+ size_t len, Sym* soname_out) {
+ (void)name;
+ if (soname_out) *soname_out = 0;
+
+ /* ---- DOS header + PE signature ---- */
+ if (len < COFF_DOS_HEADER_SIZE)
+ compiler_panic(c, no_loc(), "read_coff_dso: input shorter than DOS header");
+ u16 e_magic = coff_rd_u16(data + 0);
+ if (e_magic != IMAGE_DOS_SIGNATURE)
+ compiler_panic(c, no_loc(), "read_coff_dso: bad DOS magic 0x%x", e_magic);
+ u32 e_lfanew = coff_rd_u32(data + 60);
+
+ u64 nt_end = (u64)e_lfanew + 4u + COFF_FILE_HEADER_SIZE + COFF_OPT_HDR64_SIZE;
+ if (nt_end > len)
+ compiler_panic(c, no_loc(),
+ "read_coff_dso: PE headers extend past end of file");
+
+ u32 pe_sig = coff_rd_u32(data + e_lfanew);
+ if (pe_sig != IMAGE_NT_SIGNATURE)
+ compiler_panic(c, no_loc(), "read_coff_dso: bad PE signature 0x%x", pe_sig);
+
+ /* ---- IMAGE_FILE_HEADER ---- */
+ const u8* fh = data + e_lfanew + 4u;
+ u16 machine = coff_rd_u16(fh + 0);
+ u16 nsec = coff_rd_u16(fh + 2);
+ u16 size_of_opt = coff_rd_u16(fh + 16);
+ u16 chars = coff_rd_u16(fh + 18);
+
+ if (machine != IMAGE_FILE_MACHINE_AMD64 &&
+ machine != IMAGE_FILE_MACHINE_ARM64)
+ compiler_panic(c, no_loc(), "read_coff_dso: unsupported machine 0x%x",
+ machine);
+ if (!(chars & IMAGE_FILE_DLL))
+ compiler_panic(c, no_loc(),
+ "read_coff_dso: not a DLL (Characteristics=0x%x)", chars);
+ if (size_of_opt < COFF_OPT_HDR64_SIZE)
+ compiler_panic(c, no_loc(),
+ "read_coff_dso: SizeOfOptionalHeader %u too small for PE32+",
+ size_of_opt);
+
+ /* ---- IMAGE_OPTIONAL_HEADER64 ---- */
+ const u8* oh = fh + COFF_FILE_HEADER_SIZE;
+ u16 opt_magic = coff_rd_u16(oh + 0);
+ if (opt_magic != IMAGE_NT_OPTIONAL_HDR64_MAGIC)
+ compiler_panic(c, no_loc(),
+ "read_coff_dso: not PE32+ (optional header Magic=0x%x)",
+ opt_magic);
+
+ /* DataDirectory begins at offset 112 inside the PE32+ optional header
+ * (28 standard + 84 windows-specific + NumberOfRvaAndSizes = 112). */
+ const u8* data_dir = oh + COFF_OPT_HDR64_SIZE -
+ COFF_NUM_DATA_DIRECTORIES * COFF_DATA_DIRECTORY_SIZE;
+ u32 export_rva = coff_rd_u32(data_dir + IMAGE_DIRECTORY_ENTRY_EXPORT *
+ COFF_DATA_DIRECTORY_SIZE);
+ u32 export_size = coff_rd_u32(
+ data_dir + IMAGE_DIRECTORY_ENTRY_EXPORT * COFF_DATA_DIRECTORY_SIZE + 4u);
+
+ /* ---- section table ---- */
+ u64 shdrs_off = (u64)e_lfanew + 4u + COFF_FILE_HEADER_SIZE + size_of_opt;
+ u64 shdrs_end = shdrs_off + (u64)nsec * COFF_SECTION_HEADER_SIZE;
+ if (shdrs_end > len)
+ compiler_panic(c, no_loc(),
+ "read_coff_dso: section table extends past end of file");
+ const u8* shdrs = data + shdrs_off;
+
+ ObjBuilder* ob = obj_new(c);
+ if (!ob) compiler_panic(c, no_loc(), "read_coff_dso: obj_new failed");
+
+ /* No export directory => empty DSO (legal for stub DLLs). */
+ if (export_size == 0 || export_rva == 0) {
+ obj_finalize(ob);
+ return ob;
+ }
+
+ u64 exp_off;
+ if (!rva_to_offset(shdrs, nsec, export_rva, len, &exp_off))
+ compiler_panic(c, no_loc(),
+ "read_coff_dso: export directory RVA 0x%x out of range",
+ export_rva);
+ if (exp_off + COFF_EXPORT_DIR_SIZE > len)
+ compiler_panic(c, no_loc(), "read_coff_dso: export directory truncated");
+
+ const u8* ed = data + exp_off;
+ u32 name_rva = coff_rd_u32(ed + 12);
+ u32 num_funcs = coff_rd_u32(ed + 20);
+ u32 num_names = coff_rd_u32(ed + 24);
+ u32 eat_rva = coff_rd_u32(ed + 28);
+ u32 ent_rva = coff_rd_u32(ed + 32);
+ u32 ord_rva = coff_rd_u32(ed + 36);
+ /* Base (ed + 16) is the user-visible ordinal offset; the cfree linker
+ * matches imports by name, so we don't propagate it. */
+
+ /* ---- DLL name (soname) ---- */
+ if (name_rva) {
+ u64 name_off;
+ if (!rva_to_offset(shdrs, nsec, name_rva, len, &name_off))
+ compiler_panic(c, no_loc(),
+ "read_coff_dso: DLL name RVA 0x%x out of range", name_rva);
+ const char* dll_name;
+ u32 nlen = read_cstr(data, len, name_off, &dll_name);
+ if (nlen && soname_out)
+ *soname_out =
+ pool_intern_slice(c->global, (Slice){.s = dll_name, .len = nlen});
+ }
+
+ /* ---- resolve EAT / ENT / ordinal table once ---- */
+ u64 eat_off = 0, ent_off = 0, ord_off = 0;
+ if (num_names) {
+ if (!rva_to_offset(shdrs, nsec, eat_rva, len, &eat_off))
+ compiler_panic(c, no_loc(), "read_coff_dso: EAT RVA 0x%x out of range",
+ eat_rva);
+ if (!rva_to_offset(shdrs, nsec, ent_rva, len, &ent_off))
+ compiler_panic(c, no_loc(), "read_coff_dso: ENT RVA 0x%x out of range",
+ ent_rva);
+ if (!rva_to_offset(shdrs, nsec, ord_rva, len, &ord_off))
+ compiler_panic(c, no_loc(),
+ "read_coff_dso: ordinal table RVA 0x%x out of range",
+ ord_rva);
+ if (ent_off + (u64)num_names * 4u > len ||
+ ord_off + (u64)num_names * 2u > len)
+ compiler_panic(c, no_loc(),
+ "read_coff_dso: ENT/ordinal table extends past file");
+ if (eat_off + (u64)num_funcs * 4u > len)
+ compiler_panic(c, no_loc(), "read_coff_dso: EAT extends past file");
+ }
+
+ /* ---- walk the ENT ----
+ * Forwarders (EAT RVA inside [export_rva, export_rva + export_size))
+ * still produce a symbol: cfree's linker doesn't follow the chain,
+ * but the import needs to be satisfiable so the OS loader can. */
+ for (u32 i = 0; i < num_names; ++i) {
+ u32 nrva = coff_rd_u32(data + ent_off + (u64)i * 4u);
+ u16 ord = coff_rd_u16(data + ord_off + (u64)i * 2u);
+ if (ord >= num_funcs) continue; /* malformed; skip rather than panic */
+ /* func_rva is fetched for forwarder classification only; cfree does
+ * not consume the address itself (DSO symbols are OBJ_SEC_NONE). */
+ u32 func_rva = coff_rd_u32(data + eat_off + (u64)ord * 4u);
+ (void)func_rva; /* see comment above re: forwarders */
+
+ u64 name_off;
+ if (!rva_to_offset(shdrs, nsec, nrva, len, &name_off)) continue;
+ const char* nm;
+ u32 nlen = read_cstr(data, len, name_off, &nm);
+ if (!nlen) continue;
+
+ Sym sn = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen});
+ ObjSymId id = obj_symbol(ob, sn, SB_GLOBAL, SK_FUNC, OBJ_SEC_NONE, 0, 0);
+ obj_sym_mark_referenced(ob, id);
+ }
+
+ obj_finalize(ob);
+ return ob;
+}
diff --git a/src/obj/coff/reloc_aarch64.c b/src/obj/coff/reloc_aarch64.c
@@ -0,0 +1,96 @@
+/* RelocKind <-> AArch64 PE/COFF reloc-type mapping. Mirror of
+ * elf_reloc_aarch64.c for PE/COFF.
+ *
+ * The ARM64 PE/COFF reloc set covers the common AArch64 patch sites:
+ * ADRP page-base / page-offset pairs, BRANCH26/19/14, ADDR32/64, plus
+ * the section-relative SECREL family which cfree does not model in v1.
+ * PAGEOFFSET_12L collapses all LDST*_ABS_LO12_NC widths into one wire
+ * code; the width is recoverable from the patched LDR/STR instruction
+ * encoding, so the reader picks the LDST64 form and the consumer can
+ * disambiguate later if it cares. ADDR32NB is image-relative; v1
+ * collapses it to R_ABS32 and lets layout subtract the image base. */
+
+#include "obj/coff/coff.h"
+
+u32 coff_aarch64_reloc_to(u32 kind /* RelocKind */) {
+ switch (kind) {
+ case R_NONE:
+ return IMAGE_REL_ARM64_ABSOLUTE;
+ case R_ABS64:
+ return IMAGE_REL_ARM64_ADDR64;
+ case R_ABS32:
+ return IMAGE_REL_ARM64_ADDR32;
+ case R_AARCH64_CALL26:
+ case R_AARCH64_JUMP26:
+ return IMAGE_REL_ARM64_BRANCH26;
+ case R_AARCH64_CONDBR19:
+ return IMAGE_REL_ARM64_BRANCH19;
+ case R_AARCH64_TSTBR14:
+ return IMAGE_REL_ARM64_BRANCH14;
+ case R_AARCH64_ADR_PREL_PG_HI21:
+ case R_AARCH64_ADR_PREL_PG_HI21_NC:
+ return IMAGE_REL_ARM64_PAGEBASE_REL21;
+ case R_AARCH64_ADR_PREL_LO21:
+ return IMAGE_REL_ARM64_REL21;
+ case R_AARCH64_ADD_ABS_LO12_NC:
+ return IMAGE_REL_ARM64_PAGEOFFSET_12A;
+ case R_AARCH64_LDST8_ABS_LO12_NC:
+ case R_AARCH64_LDST16_ABS_LO12_NC:
+ case R_AARCH64_LDST32_ABS_LO12_NC:
+ case R_AARCH64_LDST64_ABS_LO12_NC:
+ case R_AARCH64_LDST128_ABS_LO12_NC:
+ return IMAGE_REL_ARM64_PAGEOFFSET_12L;
+ case R_PC32:
+ case R_REL32:
+ return IMAGE_REL_ARM64_REL32;
+ case R_COFF_SECREL:
+ return IMAGE_REL_ARM64_SECREL;
+ case R_COFF_SECTION:
+ return IMAGE_REL_ARM64_SECTION;
+ case R_COFF_AARCH64_SECREL_LOW12A:
+ return IMAGE_REL_ARM64_SECREL_LOW12A;
+ case R_COFF_AARCH64_SECREL_HIGH12A:
+ return IMAGE_REL_ARM64_SECREL_HIGH12A;
+ default:
+ return IMAGE_REL_ARM64_ABSOLUTE;
+ }
+}
+
+u32 coff_aarch64_reloc_from(u32 wire_type) {
+ switch (wire_type) {
+ case IMAGE_REL_ARM64_ABSOLUTE:
+ return R_NONE;
+ case IMAGE_REL_ARM64_ADDR64:
+ return R_ABS64;
+ case IMAGE_REL_ARM64_ADDR32:
+ return R_ABS32;
+ case IMAGE_REL_ARM64_ADDR32NB:
+ return R_ABS32;
+ case IMAGE_REL_ARM64_BRANCH26:
+ return R_AARCH64_CALL26;
+ case IMAGE_REL_ARM64_BRANCH19:
+ return R_AARCH64_CONDBR19;
+ case IMAGE_REL_ARM64_BRANCH14:
+ return R_AARCH64_TSTBR14;
+ case IMAGE_REL_ARM64_PAGEBASE_REL21:
+ return R_AARCH64_ADR_PREL_PG_HI21;
+ case IMAGE_REL_ARM64_REL21:
+ return R_AARCH64_ADR_PREL_LO21;
+ case IMAGE_REL_ARM64_PAGEOFFSET_12A:
+ return R_AARCH64_ADD_ABS_LO12_NC;
+ case IMAGE_REL_ARM64_PAGEOFFSET_12L:
+ return R_AARCH64_LDST64_ABS_LO12_NC;
+ case IMAGE_REL_ARM64_REL32:
+ return R_PC32;
+ case IMAGE_REL_ARM64_SECREL:
+ return R_COFF_SECREL;
+ case IMAGE_REL_ARM64_SECTION:
+ return R_COFF_SECTION;
+ case IMAGE_REL_ARM64_SECREL_LOW12A:
+ return R_COFF_AARCH64_SECREL_LOW12A;
+ case IMAGE_REL_ARM64_SECREL_HIGH12A:
+ return R_COFF_AARCH64_SECREL_HIGH12A;
+ default:
+ return (u32)-1; /* sentinel */
+ }
+}
diff --git a/src/obj/coff/reloc_x86_64.c b/src/obj/coff/reloc_x86_64.c
@@ -0,0 +1,76 @@
+/* RelocKind <-> x86_64 PE/COFF reloc-type mapping. Mirror of
+ * elf_reloc_x86_64.c for PE/COFF.
+ *
+ * PE/COFF's AMD64 reloc set is much narrower than ELF's: only ABSOLUTE,
+ * ADDR64, ADDR32, ADDR32NB, REL32 (with REL32_1..5 implicit-addend
+ * variants), plus a few section-relative forms cfree does not model in
+ * v1. We emit plain REL32 (4) for every PC-relative kind and let the
+ * explicit Reloc.addend ride in the patched bytes; on the read side
+ * REL32_1..5 collapse to R_PC32 (the reader applies the implicit
+ * addend itself). IMAGE_REL_AMD64_ABSOLUTE (== 0) doubles as the
+ * "unsupported" sentinel on the _to side, matching the ELF contract. */
+
+#include "obj/coff/coff.h"
+
+u32 coff_x86_64_reloc_to(u32 kind /* RelocKind */) {
+ switch (kind) {
+ case R_NONE:
+ return IMAGE_REL_AMD64_ABSOLUTE;
+ case R_ABS64:
+ return IMAGE_REL_AMD64_ADDR64;
+ case R_ABS32:
+ return IMAGE_REL_AMD64_ADDR32;
+ case R_X64_32S:
+ return IMAGE_REL_AMD64_ADDR32NB;
+ case R_PC32:
+ case R_REL32:
+ case R_PLT32:
+ case R_X64_PLT32:
+ case R_X64_GOTPCREL:
+ case R_X64_GOTPCRELX:
+ case R_X64_REX_GOTPCRELX:
+ return IMAGE_REL_AMD64_REL32;
+ case R_COFF_SECREL:
+ return IMAGE_REL_AMD64_SECREL;
+ case R_COFF_SECTION:
+ return IMAGE_REL_AMD64_SECTION;
+ default:
+ return IMAGE_REL_AMD64_ABSOLUTE;
+ }
+}
+
+u32 coff_x86_64_reloc_from(u32 wire_type) {
+ switch (wire_type) {
+ case IMAGE_REL_AMD64_ABSOLUTE:
+ return R_NONE;
+ case IMAGE_REL_AMD64_ADDR64:
+ return R_ABS64;
+ case IMAGE_REL_AMD64_ADDR32:
+ return R_ABS32;
+ case IMAGE_REL_AMD64_ADDR32NB:
+ return R_X64_32S;
+ case IMAGE_REL_AMD64_REL32:
+ case IMAGE_REL_AMD64_REL32_1:
+ case IMAGE_REL_AMD64_REL32_2:
+ case IMAGE_REL_AMD64_REL32_3:
+ case IMAGE_REL_AMD64_REL32_4:
+ case IMAGE_REL_AMD64_REL32_5:
+ return R_PC32;
+ case IMAGE_REL_AMD64_SECREL:
+ return R_COFF_SECREL;
+ case IMAGE_REL_AMD64_SECTION:
+ return R_COFF_SECTION;
+ /* SECREL7 (7-bit section-relative) appears in mingw-emitted archive
+ * members (intrinsic helpers, exception tables, DWARF). cfree
+ * doesn't currently apply or emit these, but panicking at read
+ * time would block ingesting any mingw archive whose non-import
+ * members carry .debug_info / .pdata. Map to R_NONE so the
+ * relocation slot is preserved structurally but treated as a
+ * no-op by the relocator; the member can still be dead-stripped
+ * when nothing references it. */
+ case IMAGE_REL_AMD64_SECREL7:
+ return R_NONE;
+ default:
+ return (u32)-1; /* sentinel */
+ }
+}
diff --git a/src/obj/coff_emit.c b/src/obj/coff_emit.c
@@ -1,732 +0,0 @@
-/* PE/COFF relocatable .obj writer. Walks a finalized ObjBuilder and
- * emits a 64-bit little-endian relocatable object via the supplied
- * Writer. Counterpart to emit_elf / emit_macho.
- *
- * Layout strategy:
- * 1. plan COFF sections (one per kept obj section), assigning
- * Characteristics, alignment, raw size, and per-section reloc
- * counts;
- * 2. build the symbol table (synthesized per-section static symbols
- * with section-definition aux records, plus file symbols and
- * every ObjSym kept after sweep);
- * 3. build per-section relocation records via the per-arch
- * translator (arch_for_compiler(c)->coff->reloc_to);
- * 4. assign file offsets:
- * file header | section headers | (bytes + relocs)* | symtab | strtab
- * 5. write the file in that order.
- *
- * 64-bit little-endian only — IMAGE_FILE_MACHINE_AMD64 (x86_64) and
- * IMAGE_FILE_MACHINE_ARM64 (aarch64). Big-endian / ptr_size != 8 panic
- * at entry.
- *
- * Section name mapping policy: we pass the cfree Section.name through
- * verbatim to the COFF Name field. Callers / readers are expected to
- * have stored COFF-shaped names (".text", ".rdata", ".tls$", etc.) at
- * the obj layer; emit_coff does not rewrite ELF-style spellings like
- * ".rodata" -> ".rdata". Names longer than 8 bytes spill into the
- * string table with the "/<decimal-offset>" encoding.
- *
- * Addend handling: COFF stores the addend inline in the patched bytes
- * (there is no addend field in IMAGE_RELOCATION). The ObjBuilder
- * caller is responsible for having written the addend into the section
- * bytes already — matching how MSVC / mingw emit. A nonzero
- * Reloc::addend with has_explicit_addend set is rejected here as a
- * known v1 limitation. */
-
-#include <string.h>
-
-#include "arch/arch.h"
-#include "core/arena.h"
-#include "core/buf.h"
-#include "core/heap.h"
-#include "core/pool.h"
-#include "core/slice.h"
-#include "core/util.h"
-#include "obj/coff.h"
-
-static SrcLoc no_loc(void) {
- SrcLoc l = {0, 0, 0};
- return l;
-}
-
-static int coff_rel32_absorbs_minus4(CfreeArchKind arch, RelocKind kind,
- i64 addend) {
- if (arch != CFREE_ARCH_X86_64 || addend != -4) return 0;
- switch (kind) {
- case R_PC32:
- case R_REL32:
- case R_PLT32:
- case R_X64_PLT32:
- case R_X64_GOTPCREL:
- case R_X64_GOTPCRELX:
- case R_X64_REX_GOTPCRELX:
- return 1;
- default:
- return 0;
- }
-}
-
-/* ---- per-COFF-section plan record ---- */
-
-typedef struct CSec {
- /* IMAGE_SECTION_HEADER fields (little-endian-encoded at write time). */
- char name8[8]; /* Name field bytes; "/N" form if long name */
- u32 virtual_size; /* nonzero for NOBITS (bss size) */
- u32 size_of_raw_data; /* zero for NOBITS */
- u32 pointer_to_raw_data;
- u32 pointer_to_relocations;
- u16 number_of_relocations;
- u32 characteristics; /* IMAGE_SCN_* | ALIGN nibble */
-
- /* Planning state. */
- u32 align; /* in bytes, power of two */
- u32 obj_sec; /* originating ObjSecId */
- int is_nobits;
- const Buf* obj_bytes; /* NULL when nobits */
- u8* reloc_bytes; /* arena-allocated, nreloc * 10 bytes */
- ObjGroupId group_id; /* OBJ_GROUP_NONE if not in a group */
-} CSec;
-
-/* ---- emit ---- */
-
-static u32 log2_align(u32 a) {
- u32 r = 0;
- while ((1u << r) < a) ++r;
- return r;
-}
-
-/* Map cfree section flags/sem to IMAGE_SCN_* Characteristics, leaving
- * the alignment nibble for the caller to OR in. */
-static u32 sec_characteristics(const Section* s, int in_group) {
- u32 r = 0;
- int is_bss = (s->kind == SEC_BSS) || (s->sem == SSEM_NOBITS);
- if (s->flags & SF_EXEC) {
- r |= IMAGE_SCN_CNT_CODE | IMAGE_SCN_MEM_EXECUTE;
- } else if (is_bss) {
- r |= IMAGE_SCN_CNT_UNINITIALIZED_DATA;
- } else if (s->flags & SF_WRITE) {
- r |= IMAGE_SCN_CNT_INITIALIZED_DATA;
- } else if (s->flags & SF_ALLOC) {
- /* Read-only allocated data (.rdata). */
- r |= IMAGE_SCN_CNT_INITIALIZED_DATA;
- }
- if (s->flags & SF_ALLOC) r |= IMAGE_SCN_MEM_READ;
- if (s->flags & SF_WRITE) r |= IMAGE_SCN_MEM_WRITE;
- if (in_group) r |= IMAGE_SCN_LNK_COMDAT;
- /* When a reader stashed format-specific flag bits on a COFF-origin
- * section, OR them back in here. ext_type carries the raw
- * Characteristics value (or zero if no override); ext_flags is a
- * sibling bag for any bits the canonical mapping above would lose. */
- if (s->ext_kind == OBJ_EXT_COFF) {
- if (s->ext_type) {
- /* Preserve the raw characteristics verbatim — overrides the
- * canonical mapping. Keeps round-trip byte-stable for sections
- * carrying CNT_INFO / LNK_REMOVE / MEM_DISCARDABLE / etc. */
- r = s->ext_type & ~IMAGE_SCN_ALIGN_MASK;
- }
- r |= s->ext_flags;
- }
- return r;
-}
-
-/* Append `len` bytes of `s` followed by a single NUL to `b`, returning
- * the offset at which `s` was placed. Dedupe linearly — strtabs are
- * small enough that this is fine without a hash table, and the
- * dedupe matches what binutils / llvm-objcopy emit. Mirror of the
- * helper in elf_emit. */
-static u32 strtab_add(Buf* b, const char* s, u32 len) {
- if (len == 0) return 0;
- u32 total = buf_pos(b);
- if (total > len) {
- u8 stack[256];
- u8* tmp =
- total <= sizeof stack ? stack : (u8*)b->heap->alloc(b->heap, total, 1);
- if (tmp) {
- buf_flatten(b, tmp);
- /* Skip the first 4 bytes (the size-prefix placeholder) when
- * searching for matches. */
- u32 start = COFF_STRTAB_SIZE_FIELD_BYTES;
- if (total > start + len) {
- for (u32 i = start; i + len < total; ++i) {
- if (tmp[i + len] == 0 && memcmp(tmp + i, s, len) == 0) {
- if (tmp != stack) b->heap->free(b->heap, tmp, total);
- return i;
- }
- }
- }
- if (tmp != stack) b->heap->free(b->heap, tmp, total);
- }
- }
- u32 off = total;
- buf_write(b, s, len);
- {
- u8 z = 0;
- buf_write(b, &z, 1);
- }
- return off;
-}
-
-/* Encode an 8-byte Name field. If the name fits in 8 bytes, copy
- * verbatim and zero-pad. Otherwise allocate the name in `strtab` and
- * write "/<decimal-offset>" (NUL-padded to 8 bytes). */
-static void encode_name8(char out[8], const char* name, u32 nlen, Buf* strtab) {
- memset(out, 0, 8);
- if (nlen <= 8) {
- if (nlen) memcpy(out, name, nlen);
- return;
- }
- u32 off = strtab_add(strtab, name, nlen);
- /* "/<decimal-offset>" — up to 7 decimal digits leaves room for the
- * leading slash within 8 bytes. COFF .obj strtabs are < 1 MiB in
- * practice, so 7 digits is plenty. */
- char tmp[16];
- int n = 0;
- tmp[n++] = '/';
- /* Decimal-format off into tmp+1. */
- char dig[12];
- int d = 0;
- u32 v = off;
- if (v == 0) {
- dig[d++] = '0';
- } else {
- while (v) {
- dig[d++] = (char)('0' + (v % 10u));
- v /= 10u;
- }
- }
- while (d > 0 && n < (int)sizeof tmp) tmp[n++] = dig[--d];
- if (n > 8) n = 8;
- memcpy(out, tmp, (size_t)n);
-}
-
-/* Write one 18-byte IMAGE_SYMBOL record into `dst`. */
-static void wr_sym(u8* dst, const char ShortName[8], u32 Zeroes, u32 Offset,
- u32 Value, i16 SectionNumber, u16 Type, u8 StorageClass,
- u8 NumberOfAuxSymbols) {
- if (Zeroes == 0 && Offset != 0) {
- /* LongName form: 4 zero bytes then 4-byte LE strtab offset. */
- memset(dst, 0, 4);
- wr_u32_le(dst + 4, Offset);
- } else {
- memcpy(dst, ShortName, 8);
- }
- wr_u32_le(dst + 8, Value);
- wr_u16_le(dst + 12, (u16)SectionNumber);
- wr_u16_le(dst + 14, Type);
- dst[16] = StorageClass;
- dst[17] = NumberOfAuxSymbols;
-}
-
-/* Write a section-definition aux record (18 bytes). */
-static void wr_aux_secdef(u8* dst, u32 Length, u16 NumberOfRelocations,
- u16 NumberOfLinenumbers, u32 CheckSum, u16 Number,
- u8 Selection) {
- wr_u32_le(dst + 0, Length);
- wr_u16_le(dst + 4, NumberOfRelocations);
- wr_u16_le(dst + 6, NumberOfLinenumbers);
- wr_u32_le(dst + 8, CheckSum);
- wr_u16_le(dst + 12, Number);
- dst[14] = Selection;
- dst[15] = 0;
- dst[16] = 0;
- dst[17] = 0;
-}
-
-/* Write a weak-externals aux record (18 bytes). */
-static void wr_aux_weak(u8* dst, u32 TagIndex, u32 Characteristics) {
- wr_u32_le(dst + 0, TagIndex);
- wr_u32_le(dst + 4, Characteristics);
- memset(dst + 8, 0, 10);
-}
-
-/* Look up the pool-interned string for a Sym. */
-static const char* sym_to_str(Compiler* c, Sym n, u32* len_out) {
- Slice sl = pool_slice(c->global, n);
- const char* s = sl.s;
- if (!s) {
- *len_out = 0;
- return "";
- }
- *len_out = (u32)sl.len;
- return s;
-}
-
-void emit_coff(Compiler* c, ObjBuilder* ob, Writer* w) {
- Heap* h = (Heap*)c->ctx->heap;
-
- /* Tombstone sweep — see obj_sweep_dead. */
- obj_sweep_dead(ob);
-
- /* ---- target validation ----------------------------------------- */
- const ArchImpl* arch = arch_for_compiler(c);
- const ArchCoffOps* coff = arch ? arch->coff : NULL;
- if (!coff || !coff->reloc_to) {
- compiler_panic(c, no_loc(), "emit_coff: unsupported target arch %u",
- (u32)c->target.arch);
- }
- u16 machine = coff->machine;
- u32 (*reloc_to)(u32) = coff->reloc_to;
- if (c->target.big_endian) {
- compiler_panic(c, no_loc(), "emit_coff: big-endian COFF not supported");
- }
- if (c->target.ptr_size != 8) {
- compiler_panic(c, no_loc(), "emit_coff: ptr_size %u (expected 8)",
- (u32)c->target.ptr_size);
- }
-
- /* ---- pass 1: plan sections ------------------------------------- */
- u32 nobjsec = obj_section_count(ob);
- CSec* secs = arena_zarray(c->scratch, CSec, nobjsec ? nobjsec : 1);
- u32* obj_to_coff = arena_zarray(c->scratch, u32, nobjsec ? nobjsec : 1);
- u32 nsecs = 0;
-
- /* String table — leading 4-byte size placeholder. Real strings start
- * at offset 4. */
- Buf strtab;
- buf_init(&strtab, h);
- {
- u8 zero4[COFF_STRTAB_SIZE_FIELD_BYTES] = {0, 0, 0, 0};
- buf_write(&strtab, zero4, COFF_STRTAB_SIZE_FIELD_BYTES);
- }
-
- for (u32 i = 1; i < nobjsec; ++i) {
- const Section* s = obj_section_get(ob, i);
- if (s->removed) continue;
- /* Skip ELF-style synthetic sections (a reader from another format
- * may have surfaced them) — COFF stores symtab/strtab/relocs
- * out-of-band, not as named sections. */
- if (s->sem == SSEM_SYMTAB || s->sem == SSEM_STRTAB ||
- s->sem == SSEM_RELA || s->sem == SSEM_REL || s->sem == SSEM_GROUP) {
- continue;
- }
-
- CSec* cs = &secs[nsecs];
- u32 nlen;
- const char* nm = sym_to_str(c, s->name, &nlen);
- encode_name8(cs->name8, nm, nlen, &strtab);
-
- cs->obj_sec = i;
- cs->group_id = s->group_id;
- cs->align = s->align ? s->align : 1;
-
- int in_group = (s->group_id != OBJ_GROUP_NONE);
- u32 ch = sec_characteristics(s, in_group);
- /* Alignment lives in bits 20..23. Cap at log2(8192)=13 -> nibble
- * value 14 (IMAGE_SCN_ALIGN_8192BYTES). */
- u32 lg = log2_align(cs->align);
- if (lg > 13) lg = 13;
- ch &= ~IMAGE_SCN_ALIGN_MASK;
- ch |= IMAGE_SCN_ALIGN_FROM_LOG2(lg);
- cs->characteristics = ch;
-
- if (s->sem == SSEM_NOBITS || s->kind == SEC_BSS) {
- cs->is_nobits = 1;
- cs->virtual_size = s->bss_size;
- cs->size_of_raw_data = 0;
- cs->obj_bytes = NULL;
- } else {
- cs->is_nobits = 0;
- cs->virtual_size = 0;
- cs->size_of_raw_data = s->bytes.total;
- cs->obj_bytes = &s->bytes;
- }
-
- obj_to_coff[i] = nsecs + 1; /* 1-based; matches SectionNumber. */
- nsecs++;
- }
-
- /* ---- pass 2: count and assign per-section reloc counts --------- */
- /* COFF stores NumberOfRelocations as u16; sections with > 65535
- * relocs use the IMAGE_SCN_LNK_NRELOC_OVFL extension which we don't
- * implement in v1. Panic if any single section exceeds the limit. */
- u32 total_relocs = obj_reloc_total(ob);
- for (u32 ci = 0; ci < nsecs; ++ci) {
- CSec* cs = &secs[ci];
- u32 nr = obj_reloc_count(ob, cs->obj_sec);
- if (nr > 0xFFFFu) {
- compiler_panic(c, no_loc(),
- "emit_coff: section %u has %u relocs (max 65535)",
- (u32)cs->obj_sec, nr);
- }
- cs->number_of_relocations = (u16)nr;
- }
-
- /* ---- pass 3: build the symbol table ---------------------------- */
- /* Count ObjSyms (incl. tombstoned — we'll skip those when emitting). */
- u32 nobjsym = 0;
- {
- ObjSymIter* it = obj_symiter_new(ob);
- ObjSymEntry e;
- while (obj_symiter_next(it, &e)) ++nobjsym;
- obj_symiter_free(it);
- }
-
- /* Upper bound on symbol-table records (including aux slots):
- * - 2 records per section symbol (primary + 1 aux secdef)
- * - 2 records per ObjSym (primary + up to 1 weak aux)
- * - +2 spare for safety
- * Worst case is generous; we trim by tracking nrecords as we emit. */
- u32 max_records = 2u * nsecs + 2u * nobjsym + 4u;
- u8* symtab = (u8*)arena_zarray(c->scratch, u8,
- (size_t)COFF_SYMBOL_SIZE * max_records);
- u32 nrecords = 0;
-
- /* obj_id -> COFF symbol index (including aux slots). Index 0 is
- * reserved as "none" in our internal map (a real COFF symbol may
- * legitimately live at index 0, but no ObjSym ever maps there since
- * we never put OBJ_SYM_NONE through). */
- u32* sym_to_coff = arena_zarray(c->scratch, u32, nobjsym + 2);
-
- /* Section symbols first — one STATIC per kept obj section, each
- * followed by a SECTION DEFINITION aux. Reloc-against-section in
- * other tools' output uses these; emitting them unconditionally
- * matches what clang / mingw emit and gives readers a stable target. */
- u32* secsym_index = arena_zarray(c->scratch, u32, nsecs + 1);
- for (u32 ci = 0; ci < nsecs; ++ci) {
- CSec* cs = &secs[ci];
- char short_name[8];
- /* The section symbol's name is the section's own name (truncated
- * to 8 bytes — section symbols never use the strtab spill form in
- * MSVC/clang output). */
- memcpy(short_name, cs->name8, 8);
-
- u8* slot = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE;
- wr_sym(slot, short_name, /*Zeroes*/ 1, /*Offset*/ 0,
- /*Value*/ 0,
- /*SectionNumber*/ (i16)(ci + 1),
- /*Type*/ IMAGE_SYM_TYPE_NULL,
- /*StorageClass*/ IMAGE_SYM_CLASS_STATIC,
- /*NumberOfAuxSymbols*/ 1);
- secsym_index[ci] = nrecords;
- nrecords++;
-
- /* Section-definition aux. For COMDAT members we encode the
- * Selection from the group; default to SELECT_ANY which is what
- * gcc/clang emit unless the user requests a specific selection
- * mode. The associated-section Number is left at 0 (cfree does
- * not produce associative-COMDAT chains today). */
- u8 selection = 0;
- if (cs->group_id != OBJ_GROUP_NONE) {
- const ObjGroup* g = obj_group_get(ob, cs->group_id);
- if (g && !g->removed) {
- selection = g->flags ? (u8)IMAGE_COMDAT_SELECT_ANY
- : (u8)IMAGE_COMDAT_SELECT_ANY;
- }
- }
- u8* aux = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE;
- wr_aux_secdef(aux, /*Length*/ cs->size_of_raw_data,
- /*NumberOfRelocations*/ cs->number_of_relocations,
- /*NumberOfLinenumbers*/ 0,
- /*CheckSum*/ 0,
- /*Number*/ 0,
- /*Selection*/ selection);
- nrecords++;
- }
-
- /* File / regular symbols. */
- {
- ObjSymIter* it = obj_symiter_new(ob);
- ObjSymEntry e;
- while (obj_symiter_next(it, &e)) {
- const ObjSym* s = e.sym;
- if (s->removed) continue;
- if (s->kind == SK_IFUNC) {
- compiler_panic(c, no_loc(),
- "emit_coff: SK_IFUNC has no PE/COFF representation");
- }
- /* Don't re-emit SK_SECTION symbols — section symbols are
- * synthesized above. Map any input-side SK_SECTION onto the
- * already-emitted one. */
- if (s->kind == SK_SECTION) {
- if (s->section_id && s->section_id < nobjsec) {
- u32 ci = obj_to_coff[s->section_id];
- if (ci) sym_to_coff[e.id] = secsym_index[ci - 1];
- }
- continue;
- }
-
- u32 nlen;
- const char* nm = sym_to_str(c, s->name, &nlen);
-
- if (s->kind == SK_FILE) {
- /* File symbol: name ".file" (short), section IMAGE_SYM_DEBUG,
- * storage class FILE, followed by aux records carrying the
- * NUL-padded file path (18 bytes per aux). */
- u32 file_len = nlen;
- u32 naux = file_len ? (file_len + COFF_AUX_FILE_SIZE - 1u) /
- COFF_AUX_FILE_SIZE
- : 1u;
- char short_name[8] = {'.', 'f', 'i', 'l', 'e', 0, 0, 0};
- u8* slot = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE;
- wr_sym(slot, short_name, 1, 0, /*Value*/ 0,
- /*SectionNumber*/ (i16)IMAGE_SYM_DEBUG,
- /*Type*/ IMAGE_SYM_TYPE_NULL,
- /*StorageClass*/ IMAGE_SYM_CLASS_FILE,
- /*NumberOfAuxSymbols*/ (u8)naux);
- sym_to_coff[e.id] = nrecords;
- nrecords++;
- for (u32 a = 0; a < naux; ++a) {
- u8* aux = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE;
- memset(aux, 0, COFF_AUX_FILE_SIZE);
- u32 off = a * COFF_AUX_FILE_SIZE;
- u32 copy = file_len > off ? file_len - off : 0;
- if (copy > COFF_AUX_FILE_SIZE) copy = COFF_AUX_FILE_SIZE;
- if (copy) memcpy(aux, nm + off, copy);
- nrecords++;
- }
- continue;
- }
-
- /* Regular symbol. */
- char short_name[8];
- u32 zeroes = 1, offset = 0;
- memset(short_name, 0, 8);
- if (nlen <= 8) {
- if (nlen) memcpy(short_name, nm, nlen);
- } else {
- zeroes = 0;
- offset = strtab_add(&strtab, nm, nlen);
- }
-
- i16 section_number = 0;
- u32 value = 0;
- u8 storage_class = IMAGE_SYM_CLASS_NULL;
- u16 type = IMAGE_SYM_TYPE_NULL;
- u8 naux = 0;
- int emit_weak_aux = 0;
-
- switch (s->kind) {
- case SK_ABS:
- section_number = (i16)IMAGE_SYM_ABSOLUTE;
- value = (u32)s->value;
- break;
- case SK_COMMON:
- /* COFF lacks a per-common alignment field; encode size in
- * Value with SectionNumber=UNDEFINED and rely on the linker
- * to pick a default alignment. (cfree's frontend uses
- * COMMON only via __attribute__((common)) which is rare on
- * PE/COFF targets.) */
- section_number = (i16)IMAGE_SYM_UNDEFINED;
- value = (u32)s->size;
- break;
- default:
- if (s->section_id == OBJ_SEC_NONE) {
- section_number = (i16)IMAGE_SYM_UNDEFINED;
- value = 0;
- } else if (s->section_id < nobjsec && obj_to_coff[s->section_id]) {
- section_number = (i16)obj_to_coff[s->section_id];
- value = (u32)s->value;
- } else {
- section_number = (i16)IMAGE_SYM_UNDEFINED;
- value = 0;
- }
- break;
- }
-
- if (s->kind == SK_FUNC) type = (u16)COFF_SYM_TYPE_FUNCTION;
-
- switch (s->bind) {
- case SB_LOCAL:
- storage_class = IMAGE_SYM_CLASS_STATIC;
- break;
- case SB_GLOBAL:
- storage_class = IMAGE_SYM_CLASS_EXTERNAL;
- break;
- case SB_WEAK:
- /* mingw / clang spell weak as EXTERNAL with a WeakExternal
- * aux that points at the fallback symbol. cfree's obj layer
- * doesn't carry a separate fallback symbol today, so we emit
- * a self-referential weak aux (TagIndex=0) which the linker
- * treats as "weak, no fallback" — equivalent to ELF STB_WEAK. */
- storage_class = IMAGE_SYM_CLASS_WEAK_EXTERNAL;
- emit_weak_aux = 1;
- naux = 1;
- break;
- default:
- storage_class = IMAGE_SYM_CLASS_STATIC;
- break;
- }
-
- u8* slot = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE;
- wr_sym(slot, short_name, zeroes, offset, value, section_number, type,
- storage_class, naux);
- sym_to_coff[e.id] = nrecords;
- nrecords++;
- if (emit_weak_aux) {
- u8* aux = symtab + (size_t)nrecords * COFF_SYMBOL_SIZE;
- wr_aux_weak(aux, /*TagIndex*/ 0,
- /*Characteristics*/ IMAGE_WEAK_EXTERN_SEARCH_LIBRARY);
- nrecords++;
- }
- }
- obj_symiter_free(it);
- }
-
- /* ---- pass 4: build per-section relocation tables --------------- */
- for (u32 ci = 0; ci < nsecs; ++ci) {
- CSec* cs = &secs[ci];
- u32 nr = cs->number_of_relocations;
- if (!nr) continue;
- u8* buf =
- (u8*)arena_alloc(c->scratch, (size_t)COFF_RELOC_SIZE * nr, _Alignof(u32));
- u32 j = 0;
- for (u32 ri = 0; ri < total_relocs; ++ri) {
- const Reloc* r = obj_reloc_at(ob, ri);
- if (r->removed) continue;
- if (r->section_id != cs->obj_sec) continue;
- if (r->sym == OBJ_SYM_NONE) {
- compiler_panic(c, no_loc(),
- "emit_coff: reloc without symbol not supported "
- "(sec=%u offset=%u kind=%u)",
- (u32)r->section_id, (u32)r->offset, (u32)r->kind);
- }
- if (r->has_explicit_addend && r->addend != 0 &&
- !coff_rel32_absorbs_minus4(c->target.arch, (RelocKind)r->kind,
- r->addend)) {
- /* v1 limitation: COFF carries the addend in the patched bytes,
- * and we don't currently mutate the obj's section bytes to
- * encode a separate explicit addend. cfree's MCEmitter writes
- * the addend inline for COFF targets, so this branch only
- * fires for inputs synthesized by external tools. */
- compiler_panic(c, no_loc(),
- "emit_coff: explicit nonzero addend not supported "
- "(sec=%u offset=%u kind=%u addend=%lld)",
- (u32)r->section_id, (u32)r->offset, (u32)r->kind,
- (long long)r->addend);
- }
- u32 wire = reloc_to(r->kind);
- /* Both arch translators use 0 (IMAGE_REL_*_ABSOLUTE) as the
- * unsupported-input sentinel; treat that as a panic unless the
- * input really is R_NONE. */
- if (wire == 0 && r->kind != R_NONE) {
- compiler_panic(
- c, no_loc(),
- "emit_coff: unsupported relocation kind %u for arch %u",
- (u32)r->kind, (u32)c->target.arch);
- }
- u32 sym_idx = sym_to_coff[r->sym];
- u8* slot = buf + (size_t)j * COFF_RELOC_SIZE;
- wr_u32_le(slot + 0, r->offset);
- wr_u32_le(slot + 4, sym_idx);
- wr_u16_le(slot + 8, (u16)wire);
- ++j;
- }
- cs->reloc_bytes = buf;
- /* If a tombstoned reloc was skipped between count and emit, j may
- * be less than nr; trust the latter count for the wire field. */
- if (j != nr) cs->number_of_relocations = (u16)j;
- }
-
- /* ---- pass 5: assign file offsets ------------------------------- */
- /* Layout:
- * [file header] [section headers] [per-section: bytes, relocs]*
- * [symbol table] [string table] */
- u64 cur = (u64)COFF_FILE_HEADER_SIZE +
- (u64)COFF_SECTION_HEADER_SIZE * (u64)nsecs;
-
- for (u32 ci = 0; ci < nsecs; ++ci) {
- CSec* cs = &secs[ci];
- /* Raw data offset. NOBITS contributes nothing on disk. */
- if (cs->is_nobits || cs->size_of_raw_data == 0) {
- cs->pointer_to_raw_data = 0;
- } else {
- cur = ALIGN_UP(cur, (u64)cs->align);
- cs->pointer_to_raw_data = (u32)cur;
- cur += cs->size_of_raw_data;
- }
- /* Reloc table. COFF doesn't mandate alignment for the reloc array,
- * but llvm and binutils emit them naturally byte-packed; we 4-align
- * for tidiness. */
- if (cs->number_of_relocations) {
- cur = ALIGN_UP(cur, (u64)4);
- cs->pointer_to_relocations = (u32)cur;
- cur += (u64)cs->number_of_relocations * COFF_RELOC_SIZE;
- } else {
- cs->pointer_to_relocations = 0;
- }
- }
-
- cur = ALIGN_UP(cur, (u64)4);
- u64 symtab_off = cur;
- cur += (u64)nrecords * COFF_SYMBOL_SIZE;
-
- /* String table starts immediately after the symtab. Patch the 4-byte
- * size prefix (inclusive). */
- u32 strtab_size = buf_pos(&strtab);
- /* The size field is part of the on-disk strtab and is the total
- * inclusive byte count. Patch it now. */
- {
- u8 sz_le[4];
- wr_u32_le(sz_le, strtab_size);
- /* Buf doesn't expose in-place patch; flatten, patch, re-emit when
- * we write. Just remember the value. */
- (void)sz_le;
- }
- u64 strtab_off = cur;
- cur += strtab_size;
-
- /* ---- pass 6: write the file ------------------------------------ */
- cfree_writer_seek(w, 0);
-
- /* IMAGE_FILE_HEADER */
- coff_wr_u16(w, machine);
- coff_wr_u16(w, (u16)nsecs);
- coff_wr_u32(w, 0); /* TimeDateStamp: reproducible */
- coff_wr_u32(w, (u32)symtab_off);
- coff_wr_u32(w, nrecords);
- coff_wr_u16(w, 0); /* SizeOfOptionalHeader: 0 for .obj */
- coff_wr_u16(w, IMAGE_FILE_LARGE_ADDRESS_AWARE);
-
- /* Section headers — one 40-byte block immediately after the file
- * header. */
- for (u32 ci = 0; ci < nsecs; ++ci) {
- const CSec* cs = &secs[ci];
- cfree_writer_write(w, cs->name8, 8);
- coff_wr_u32(w, cs->virtual_size);
- coff_wr_u32(w, 0); /* VirtualAddress: 0 for .obj */
- coff_wr_u32(w, cs->size_of_raw_data);
- coff_wr_u32(w, cs->pointer_to_raw_data);
- coff_wr_u32(w, cs->pointer_to_relocations);
- coff_wr_u32(w, 0); /* PointerToLinenumbers: 0 */
- coff_wr_u16(w, cs->number_of_relocations);
- coff_wr_u16(w, 0); /* NumberOfLinenumbers: 0 */
- coff_wr_u32(w, cs->characteristics);
- }
-
- /* Section bytes + relocs (interleaved). */
- for (u32 ci = 0; ci < nsecs; ++ci) {
- const CSec* cs = &secs[ci];
- if (!cs->is_nobits && cs->size_of_raw_data && cs->obj_bytes) {
- cfree_writer_seek(w, cs->pointer_to_raw_data);
- u32 sz = cs->obj_bytes->total;
- u8* tmp = (u8*)h->alloc(h, sz ? sz : 1, 1);
- if (sz) buf_flatten(cs->obj_bytes, tmp);
- cfree_writer_write(w, tmp, sz);
- h->free(h, tmp, sz ? sz : 1);
- }
- if (cs->number_of_relocations && cs->reloc_bytes) {
- cfree_writer_seek(w, cs->pointer_to_relocations);
- cfree_writer_write(w, cs->reloc_bytes,
- (size_t)cs->number_of_relocations * COFF_RELOC_SIZE);
- }
- }
-
- /* Symbol table. */
- cfree_writer_seek(w, symtab_off);
- cfree_writer_write(w, symtab, (size_t)nrecords * COFF_SYMBOL_SIZE);
-
- /* String table: 4-byte total size (inclusive) followed by the body.
- * `strtab` was initialized with 4 placeholder zero bytes; rewrite
- * them with the real size before flushing. */
- {
- u8* flat = (u8*)arena_alloc(c->scratch, strtab_size ? strtab_size : 1, 1);
- if (strtab_size) buf_flatten(&strtab, flat);
- /* Patch the 4-byte size prefix in place. */
- if (strtab_size >= COFF_STRTAB_SIZE_FIELD_BYTES) {
- wr_u32_le(flat, strtab_size);
- }
- cfree_writer_seek(w, strtab_off);
- cfree_writer_write(w, flat, strtab_size);
- }
- buf_fini(&strtab);
-}
diff --git a/src/obj/coff_read.c b/src/obj/coff_read.c
@@ -1,722 +0,0 @@
-/* PE/COFF .obj (IMAGE_FILE_HEADER + sections) reader. Parses a 64-bit
- * little-endian relocatable object back into a fresh ObjBuilder. Peer
- * of read_elf / read_macho; the post-finalize ObjBuilder shape is the
- * canonical superset doc/DESIGN.md §5.5 promises: read_coff of an
- * emit_coff output produces an ObjBuilder shape-equivalent to the
- * writer's input, modulo synthesized SECTION symbols and the COMDAT
- * section-definition aux records.
- *
- * Scope: IMAGE_FILE_MACHINE_AMD64 and IMAGE_FILE_MACHINE_ARM64. PE
- * executables (with a non-zero SizeOfOptionalHeader) are rejected — a
- * future read_coff_pe would handle those. Microsoft "short import"
- * records (Sig1=0, Sig2=0xFFFF) found inside .lib archive members are
- * detected at entry and dispatched to read_coff_short_import, which
- * synthesizes a DSO-shaped ObjBuilder annotated with the providing
- * DLL name via obj_set_coff_import_dll. */
-
-#include <string.h>
-
-#include "arch/arch.h"
-#include "core/arena.h"
-#include "core/heap.h"
-#include "core/pool.h"
-#include "core/slice.h"
-#include "obj/coff.h"
-
-static SrcLoc no_loc(void) {
- SrcLoc l = {0, 0, 0};
- return l;
-}
-
-/* ---- section-header scratch ---- */
-
-typedef struct CSecRec {
- char raw_name[8];
- u32 virtual_size;
- u32 size_of_raw_data;
- u32 pointer_to_raw_data;
- u32 pointer_to_relocations;
- u16 number_of_relocations;
- u32 characteristics;
- ObjSecId obj_sec; /* OBJ_SEC_NONE if skipped */
-} CSecRec;
-
-static void parse_shdr(const u8* p, CSecRec* out) {
- memcpy(out->raw_name, p, 8);
- out->virtual_size = coff_rd_u32(p + 8);
- out->size_of_raw_data = coff_rd_u32(p + 16);
- out->pointer_to_raw_data = coff_rd_u32(p + 20);
- out->pointer_to_relocations = coff_rd_u32(p + 24);
- out->number_of_relocations = coff_rd_u16(p + 32);
- out->characteristics = coff_rd_u32(p + 36);
- out->obj_sec = OBJ_SEC_NONE;
-}
-
-/* ---- string-table lookup (4-byte size prefix, NUL-terminated entries) ---- */
-
-static const char* strtab_lookup(const u8* tab, u32 tab_size, u32 off,
- u32* len_out) {
- if (off >= tab_size) {
- *len_out = 0;
- return "";
- }
- const char* s = (const char*)(tab + off);
- u32 max = tab_size - off;
- u32 n = 0;
- while (n < max && s[n] != '\0') ++n;
- *len_out = n;
- return s;
-}
-
-/* Resolve a section/symbol short-or-long name into (ptr, len). COFF
- * section names use the "/<decimal>" convention for >8-byte names; COFF
- * symbol names use the (Zeroes==0, Offset) form instead. This helper
- * handles the section form (8 raw bytes; leading '/' triggers strtab
- * lookup). */
-static void resolve_section_name(const char raw[8], const u8* strtab,
- u32 strtab_size, const char** name_out,
- u32* len_out) {
- if (raw[0] == '/') {
- /* Parse decimal offset. Up to 7 ASCII digits. */
- u32 off = 0;
- for (u32 i = 1; i < 8 && raw[i] >= '0' && raw[i] <= '9'; ++i) {
- off = off * 10u + (u32)(raw[i] - '0');
- }
- *name_out = strtab_lookup(strtab, strtab_size, off, len_out);
- return;
- }
- /* Inline: up to 8 bytes, NUL-padded (not necessarily NUL-terminated). */
- u32 n = 0;
- while (n < 8 && raw[n] != '\0') ++n;
- *name_out = raw;
- *len_out = n;
-}
-
-/* ---- characteristics -> SecKind / SecFlag / SecSem ---- */
-
-static u16 coff_sec_kind(const char* name, u32 nlen, u32 ch) {
- if (ch & IMAGE_SCN_CNT_UNINITIALIZED_DATA) return SEC_BSS;
- if (ch & IMAGE_SCN_CNT_CODE) return SEC_TEXT;
- if (ch & IMAGE_SCN_MEM_EXECUTE) return SEC_TEXT;
- if (nlen >= 7 && memcmp(name, ".debug_", 7) == 0) return SEC_DEBUG;
- /* The MS toolchain spells DWARF section names with a leading ".debug$"
- * (CodeView) — keep ELF-style ".debug_" detection but also treat the
- * MS form as debug. */
- if (nlen >= 7 && memcmp(name, ".debug$", 7) == 0) return SEC_DEBUG;
- if (ch & IMAGE_SCN_CNT_INITIALIZED_DATA) {
- if (ch & IMAGE_SCN_MEM_WRITE) return SEC_DATA;
- return SEC_RODATA;
- }
- return SEC_OTHER;
-}
-
-static u16 coff_sec_flags(const char* name, u32 nlen, u32 ch) {
- u16 f = 0;
- if (ch & IMAGE_SCN_MEM_READ) f |= SF_ALLOC;
- if (ch & IMAGE_SCN_MEM_EXECUTE) f |= SF_EXEC;
- if (ch & IMAGE_SCN_MEM_WRITE) f |= SF_WRITE;
- if (ch & IMAGE_SCN_LNK_COMDAT) f |= SF_GROUP;
- /* TLS sections in PE are spelled ".tls$<suffix>" (e.g. ".tls$", ".tls$ZZZ").
- * There is no characteristics bit for TLS — detection is name-based. */
- if (nlen >= 5 && memcmp(name, ".tls$", 5) == 0) f |= SF_TLS;
- if (nlen == 4 && memcmp(name, ".tls", 4) == 0) f |= SF_TLS;
- return f;
-}
-
-/* Bits 20..23 of Characteristics encode alignment as (log2(align)+1).
- * 0 means "default"; we collapse to align=1 for round-trip purposes. */
-static u32 coff_sec_align(u32 ch) {
- u32 n = (ch & IMAGE_SCN_ALIGN_MASK) >> 20;
- if (n == 0) return 1;
- return 1u << (n - 1u);
-}
-
-/* ---- symbol-name resolution ---- */
-
-static void resolve_sym_name(const u8* rec, const u8* strtab, u32 strtab_size,
- const char** name_out, u32* len_out) {
- /* ShortName: 8 bytes. If first 4 bytes are zero, second 4 bytes is
- * the strtab offset (LongName form). */
- u32 z = coff_rd_u32(rec + 0);
- if (z == 0) {
- u32 off = coff_rd_u32(rec + 4);
- *name_out = strtab_lookup(strtab, strtab_size, off, len_out);
- return;
- }
- u32 n = 0;
- while (n < 8 && rec[n] != '\0') ++n;
- *name_out = (const char*)rec;
- *len_out = n;
-}
-
-/* ---- short-import record handler ----
- * Microsoft "short import" format: a 20-byte ImportObjectHeader
- * followed by SizeOfData bytes containing two NUL-terminated strings —
- * the imported symbol name then the DLL name. These live as members
- * of .lib archives (mingw's libkernel32.dll.a etc.) and stand in for
- * a full long-form COFF import object.
- *
- * cfree-side model: synthesize a DSO-shaped ObjBuilder with the
- * imported symbol defined at section_id = OBJ_SEC_NONE (the same
- * shape read_coff_dso / read_elf_dso produce for an exported name),
- * and stash the providing DLL name on the builder via
- * obj_set_coff_import_dll so the archive-ingestion layer can route
- * the resulting LinkInput as a DSO with this name as the soname.
- *
- * We also synthesize the `__imp_<name>` alias mingw codegen uses to
- * spell explicit IAT-slot access; both names ultimately resolve to
- * the same DLL export at link time. */
-static ObjBuilder* read_coff_short_import(Compiler* c, const char* name,
- const u8* data, size_t len) {
- if (len < COFF_IMPORT_OBJECT_HEADER_SIZE)
- compiler_panic(c, no_loc(),
- "read_coff: short-import record shorter than header");
-
- /* Sig1 / Sig2 already checked by the caller. */
- /* data + 4: Version (2 bytes, ignored). */
- u16 machine = coff_rd_u16(data + 6);
- /* data + 8: TimeDateStamp (4 bytes, ignored). */
- u32 size_of_data = coff_rd_u32(data + 12);
- u16 ordinal_or_hint = coff_rd_u16(data + 16);
- u16 type_flags = coff_rd_u16(data + 18);
-
- if ((u64)COFF_IMPORT_OBJECT_HEADER_SIZE + (u64)size_of_data > (u64)len)
- compiler_panic(c, no_loc(),
- "read_coff: short-import SizeOfData=%u extends past input "
- "(len=%zu)",
- size_of_data, len);
-
- if (machine != IMAGE_FILE_MACHINE_AMD64 &&
- machine != IMAGE_FILE_MACHINE_ARM64)
- compiler_panic(c, no_loc(),
- "read_coff: short-import unsupported machine %#x",
- (u32)machine);
-
- /* Decode TypeFlags bitfield (Type:2, NameType:3, Reserved:11). */
- u32 import_type = (u32)(type_flags & 0x3u);
- u32 name_type = (u32)((type_flags >> 2) & 0x7u);
-
- /* Ordinal-only imports (NameType=IMPORT_OBJECT_ORDINAL) are not yet
- * implemented in cfree. None of the mingw / llvm-mingw system import
- * archives use this shape — every libfoo.a member in the supported
- * sysroots imports by name — so refusing here is a clean diagnostic,
- * not an internal panic. When a real consumer surfaces, the work is
- * to thread the ordinal through link_resolve and into the PE import
- * directory hint/name tables. */
- if (name_type == IMPORT_OBJECT_ORDINAL)
- compiler_panic(c, no_loc(),
- "read_coff: short-import by ordinal not implemented "
- "(archive member \"%.*s\", ordinal %u). cfree links "
- "imports by name only; rebuild the consumer to import "
- "by name, or omit this archive from the link.",
- SLICE_ARG(name ? slice_from_cstr(name)
- : SLICE_LIT("<unnamed>")),
- (unsigned)ordinal_or_hint);
-
- /* Symbol name: NUL-terminated starting at data + 20. */
- const u8* body = data + COFF_IMPORT_OBJECT_HEADER_SIZE;
- u32 sym_name_max = size_of_data;
- u32 sym_name_len = 0;
- while (sym_name_len < sym_name_max && body[sym_name_len] != '\0')
- ++sym_name_len;
- if (sym_name_len == sym_name_max)
- compiler_panic(c, no_loc(),
- "read_coff: short-import symbol name not NUL-terminated");
-
- /* DLL name: NUL-terminated starting after the symbol name's NUL. */
- u32 dll_name_off = sym_name_len + 1u;
- if (dll_name_off >= size_of_data)
- compiler_panic(c, no_loc(),
- "read_coff: short-import missing DLL name");
- const u8* dll_p = body + dll_name_off;
- u32 dll_name_max = size_of_data - dll_name_off;
- u32 dll_name_len = 0;
- while (dll_name_len < dll_name_max && dll_p[dll_name_len] != '\0')
- ++dll_name_len;
- if (dll_name_len == dll_name_max)
- compiler_panic(c, no_loc(),
- "read_coff: short-import DLL name not NUL-terminated");
-
- ObjBuilder* ob = obj_new(c);
- if (!ob) compiler_panic(c, no_loc(), "read_coff: obj_new failed");
-
- /* Pick SymKind by import type: CODE -> function, DATA/CONST -> object.
- * Both are defined at section_id=OBJ_SEC_NONE, value=0, size=0 — the
- * shape read_coff_dso would produce for a DLL export. */
- SymKind k = (import_type == IMPORT_OBJECT_CODE) ? SK_FUNC : SK_OBJ;
-
- Sym sn = pool_intern_slice(
- c->global, (Slice){ .s = (const char*)body, .len = sym_name_len });
- ObjSymId id = obj_symbol_ex(ob, sn, SB_GLOBAL, SV_DEFAULT, k, OBJ_SEC_NONE,
- 0, 0, 0);
- obj_sym_mark_referenced(ob, id);
-
- /* `__imp_<name>` alias for codegen that refers to the IAT slot
- * directly (mingw convention). Even code imports use an object-like
- * `__imp_` symbol because references to it want the IAT data slot, not
- * the callable import stub. */
- static const char kImpPrefix[] = "__imp_";
- u32 imp_len = (u32)(sizeof kImpPrefix - 1u) + sym_name_len;
- char* imp_buf = arena_array(c->scratch, char, imp_len);
- memcpy(imp_buf, kImpPrefix, sizeof kImpPrefix - 1u);
- memcpy(imp_buf + (sizeof kImpPrefix - 1u), body, sym_name_len);
- Sym imp_sn =
- pool_intern_slice(c->global, (Slice){ .s = imp_buf, .len = imp_len });
- ObjSymId imp_id = obj_symbol_ex(ob, imp_sn, SB_GLOBAL, SV_DEFAULT, SK_OBJ,
- OBJ_SEC_NONE, 0, 0, 0);
- obj_sym_mark_referenced(ob, imp_id);
-
- /* Stash the DLL name so the archive-ingestion layer (Phase 4.3) can
- * route this builder as a DSO with the DLL as soname. */
- Sym dll_sn = pool_intern_slice(
- c->global, (Slice){ .s = (const char*)dll_p, .len = dll_name_len });
- obj_set_coff_import_dll(ob, dll_sn);
-
- (void)name_type;
- obj_finalize(ob);
- return ob;
-}
-
-ObjBuilder* read_coff(Compiler* c, const char* name, const u8* data,
- size_t len) {
- (void)name;
-
- /* ---- Step 0: header validation ---- */
- if (len < COFF_FILE_HEADER_SIZE)
- compiler_panic(c, no_loc(), "read_coff: input shorter than COFF header");
-
- /* Microsoft short-import record? (Sig1=0, Sig2=0xFFFF.) These live
- * as members of .lib archives and stand in for a long-form import
- * object. Detect at entry; the rest of read_coff assumes the
- * input is a real IMAGE_FILE_HEADER. */
- if (len >= 4 &&
- coff_rd_u16(data + 0) == IMPORT_OBJECT_HDR_SIG1 &&
- coff_rd_u16(data + 2) == IMPORT_OBJECT_HDR_SIG2) {
- return read_coff_short_import(c, name, data, len);
- }
-
- u16 machine = coff_rd_u16(data + 0);
- u16 nsections = coff_rd_u16(data + 2);
- /* data + 4: TimeDateStamp (4 bytes, ignored). */
- u32 ptr_to_symtab = coff_rd_u32(data + 8);
- u32 nsymbols = coff_rd_u32(data + 12);
- u16 size_opt_hdr = coff_rd_u16(data + 16);
- /* data + 18: Characteristics (2 bytes, currently ignored). */
-
- if (size_opt_hdr != 0)
- compiler_panic(c, no_loc(),
- "read_coff: input has optional header (size=%u); "
- "use read_coff_pe for executables",
- (u32)size_opt_hdr);
-
- if (machine != IMAGE_FILE_MACHINE_AMD64 &&
- machine != IMAGE_FILE_MACHINE_ARM64 &&
- machine != IMAGE_FILE_MACHINE_ARM64EC)
- compiler_panic(c, no_loc(), "read_coff: unsupported machine %#x",
- (u32)machine);
-
- const ArchImpl* arch = arch_lookup_coff_machine(machine);
- if (!arch || !arch->coff || !arch->coff->reloc_from)
- compiler_panic(c, no_loc(), "read_coff: no arch impl for machine %#x",
- (u32)machine);
- u32 (*reloc_from)(u32) = arch->coff->reloc_from;
-
- if ((u64)COFF_FILE_HEADER_SIZE +
- (u64)nsections * (u64)COFF_SECTION_HEADER_SIZE >
- (u64)len)
- compiler_panic(c, no_loc(),
- "read_coff: section header table out of range");
-
- /* ---- Step 1: bootstrap, locate strtab ---- */
- /* Strtab is at PointerToSymbolTable + NumberOfSymbols * 18. When the
- * file has no symbol table (ptr=0, n=0) we treat strtab as empty. */
- const u8* strtab = NULL;
- u32 strtab_size = 0;
- if (ptr_to_symtab && nsymbols) {
- u64 symtab_end =
- (u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYMBOL_SIZE;
- if (symtab_end + COFF_STRTAB_SIZE_FIELD_BYTES > (u64)len)
- compiler_panic(c, no_loc(),
- "read_coff: symbol table / strtab header out of range");
- u32 declared = coff_rd_u32(data + symtab_end);
- /* The size field is inclusive of the 4-byte prefix; treat <4 as
- * "empty" (some tools write 0). */
- if (declared < COFF_STRTAB_SIZE_FIELD_BYTES) declared = 0;
- if (declared) {
- if (symtab_end + (u64)declared > (u64)len)
- compiler_panic(c, no_loc(), "read_coff: strtab body out of range");
- strtab = data + symtab_end;
- strtab_size = declared;
- } else {
- strtab = data + symtab_end;
- strtab_size = COFF_STRTAB_SIZE_FIELD_BYTES;
- }
- }
-
- ObjBuilder* ob = obj_new(c);
- if (!ob) compiler_panic(c, no_loc(), "read_coff: obj_new failed");
-
- /* ---- Step 2: ingest sections ---- */
- CSecRec* secs = arena_array(c->scratch, CSecRec, nsections ? nsections : 1);
- const u8* shdr_base = data + COFF_FILE_HEADER_SIZE;
- for (u32 i = 0; i < nsections; ++i) {
- CSecRec* s = &secs[i];
- parse_shdr(shdr_base + (u64)i * COFF_SECTION_HEADER_SIZE, s);
-
- const char* nm;
- u32 nlen;
- resolve_section_name(s->raw_name, strtab, strtab_size, &nm, &nlen);
- Sym sn = pool_intern_slice(c->global, (Slice){ .s = nm, .len = nlen });
-
- u16 kind = coff_sec_kind(nm, nlen, s->characteristics);
- u16 flags = coff_sec_flags(nm, nlen, s->characteristics);
- u32 align = coff_sec_align(s->characteristics);
-
- int is_bss = (s->characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA) != 0;
- u16 sem = is_bss ? SSEM_NOBITS : SSEM_PROGBITS;
-
- ObjSecId id = obj_section_ex(ob, sn, (SecKind)kind, (SecSem)sem, flags,
- align, 0u, 0u, 0u);
- if (id == OBJ_SEC_NONE)
- compiler_panic(c, no_loc(),
- "read_coff: obj_section_ex failed for section %u", i);
- s->obj_sec = id;
-
- /* Preserve raw Characteristics so emit_coff can write back any bits
- * the canonical SecFlag/SecSem mapping doesn't model (LNK_INFO,
- * LNK_REMOVE, MEM_DISCARDABLE, MEM_SHARED, GPREL, alignment nibble). */
- obj_section_set_ext(ob, id, OBJ_EXT_COFF, s->characteristics, 0);
-
- if (is_bss) {
- u32 bss_size = s->virtual_size ? s->virtual_size : s->size_of_raw_data;
- obj_reserve_bss(ob, id, bss_size, align);
- } else if (s->size_of_raw_data) {
- u64 end = (u64)s->pointer_to_raw_data + (u64)s->size_of_raw_data;
- if (end > (u64)len)
- compiler_panic(c, no_loc(),
- "read_coff: section %u bytes out of range", i);
- u8* dst = obj_reserve(ob, id, s->size_of_raw_data);
- memcpy(dst, data + s->pointer_to_raw_data, s->size_of_raw_data);
- }
- }
-
- /* ---- Step 3: ingest symbols (with aux-record awareness) ----
- * sym_to_obj is indexed by RAW symbol-table index (including aux
- * slots), so reloc.SymbolTableIndex resolves directly without
- * adjusting for skipped aux records. Aux slots map to OBJ_SYM_NONE. */
- ObjSymId* sym_to_obj =
- arena_zarray(c->scratch, ObjSymId, nsymbols ? nsymbols : 1);
-
- /* Track section-symbol primary symtab index per section, stored as
- * (raw_index + 1) so 0 can mean "not seen yet" without colliding
- * with the (legitimate) first symbol-table slot — emit_coff always
- * lays the first section's section-symbol at index 0. */
- u32* sec_sym_primary = arena_zarray(c->scratch, u32, nsections + 1u);
-
- const u8* sym_base = data + ptr_to_symtab;
- if (nsymbols) {
- if ((u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYMBOL_SIZE > (u64)len)
- compiler_panic(c, no_loc(),
- "read_coff: symbol table body out of range");
- }
-
- for (u32 i = 0; i < nsymbols; ) {
- const u8* p = sym_base + (u64)i * COFF_SYMBOL_SIZE;
- const char* nm;
- u32 nlen;
- resolve_sym_name(p, strtab, strtab_size, &nm, &nlen);
-
- u32 value = coff_rd_u32(p + 8);
- i16 sec_num = (i16)coff_rd_u16(p + 12);
- u16 type = coff_rd_u16(p + 14);
- u8 sclass = p[16];
- u8 naux = p[17];
-
- /* FILE storage class: concatenate aux records' raw bytes (each
- * 18 bytes, NUL-padded) for the source-file name. */
- if (sclass == IMAGE_SYM_CLASS_FILE) {
- /* Build name from aux records (up to naux*18 bytes); fall back
- * to the primary record's name if naux==0. */
- const char* fnm = nm;
- u32 fnlen = nlen;
- if (naux) {
- /* Each aux record's 18 bytes are interpreted as raw file-name
- * bytes; concatenate then trim trailing NULs. */
- u32 total = (u32)naux * COFF_SYMBOL_SIZE;
- if ((u64)i + 1u + (u64)naux > (u64)nsymbols)
- compiler_panic(c, no_loc(),
- "read_coff: FILE aux records extend past symbol "
- "table");
- const u8* aux = p + COFF_SYMBOL_SIZE;
- u32 n = 0;
- while (n < total && aux[n] != '\0') ++n;
- fnm = (const char*)aux;
- fnlen = n;
- }
- Sym fsn = fnlen ? pool_intern_slice(
- c->global, (Slice){ .s = fnm, .len = fnlen })
- : 0;
- ObjSymId id = obj_symbol_ex(ob, fsn, SB_LOCAL, SV_DEFAULT, SK_FILE,
- OBJ_SEC_NONE, 0, 0, 0);
- obj_sym_mark_referenced(ob, id);
- sym_to_obj[i] = id;
- i += 1u + naux;
- continue;
- }
-
- /* Skip .bf/.ef debug pair primaries (FUNCTION storage class) and
- * the END_OF_FUNCTION marker: they carry no symbol cfree models. */
- if (sclass == IMAGE_SYM_CLASS_FUNCTION ||
- sclass == IMAGE_SYM_CLASS_END_OF_FUNCTION) {
- sym_to_obj[i] = OBJ_SYM_NONE;
- i += 1u + naux;
- continue;
- }
-
- /* Resolve (bind, vis, kind, section_id, value, size, cmnalign). */
- SymBind bind = SB_LOCAL;
- SymVis vis = SV_DEFAULT;
- SymKind kind = SK_NOTYPE;
- ObjSecId target_sec = OBJ_SEC_NONE;
- u64 sym_value = 0;
- u64 sym_size = 0;
- u64 cmnalign = 0;
-
- if (sec_num == IMAGE_SYM_UNDEFINED) {
- /* Undef or common. EXTERNAL with Value > 0 is a common. */
- if (sclass == IMAGE_SYM_CLASS_EXTERNAL && value > 0) {
- bind = SB_GLOBAL;
- kind = SK_COMMON;
- sym_size = value;
- cmnalign = 1; /* COFF doesn't carry per-common alignment */
- } else {
- bind = (sclass == IMAGE_SYM_CLASS_WEAK_EXTERNAL) ? SB_WEAK
- : (sclass == IMAGE_SYM_CLASS_EXTERNAL) ? SB_GLOBAL
- : SB_LOCAL;
- kind = SK_UNDEF;
- }
- } else if (sec_num == IMAGE_SYM_ABSOLUTE) {
- kind = SK_ABS;
- sym_value = value;
- bind = (sclass == IMAGE_SYM_CLASS_EXTERNAL) ? SB_GLOBAL : SB_LOCAL;
- } else if (sec_num == IMAGE_SYM_DEBUG) {
- /* Defined-in-debug — cfree has no model for it. Skip with an
- * OBJ_SYM_NONE entry; relocations against this slot will resolve
- * to OBJ_SYM_NONE, which obj_reloc_ex tolerates. */
- sym_to_obj[i] = OBJ_SYM_NONE;
- i += 1u + naux;
- continue;
- } else if (sec_num >= 1 && (u32)sec_num <= nsections) {
- target_sec = secs[sec_num - 1].obj_sec;
- sym_value = value;
- switch (sclass) {
- case IMAGE_SYM_CLASS_EXTERNAL:
- bind = SB_GLOBAL;
- break;
- case IMAGE_SYM_CLASS_WEAK_EXTERNAL:
- bind = SB_WEAK;
- break;
- case IMAGE_SYM_CLASS_STATIC:
- case IMAGE_SYM_CLASS_LABEL:
- default:
- bind = SB_LOCAL;
- break;
- }
-
- /* Detect SECTION symbols: STATIC, Value==0, name matches the
- * section's own name, and the section has at least one aux
- * record (the section-definition aux). Mark as SK_SECTION so
- * emit_coff regenerates the synthetic entry. */
- int is_section_sym = 0;
- if (sclass == IMAGE_SYM_CLASS_STATIC && value == 0 && naux >= 1) {
- const CSecRec* cs = &secs[sec_num - 1];
- u32 raw_nlen = 0;
- while (raw_nlen < 8 && cs->raw_name[raw_nlen] != '\0') ++raw_nlen;
- if (raw_nlen == nlen && memcmp(cs->raw_name, nm, nlen) == 0) {
- is_section_sym = 1;
- } else if (cs->raw_name[0] == '/') {
- /* Long-named section: compare the resolved name. */
- const char* rn;
- u32 rnlen;
- resolve_section_name(cs->raw_name, strtab, strtab_size, &rn, &rnlen);
- if (rnlen == nlen && memcmp(rn, nm, nlen) == 0) is_section_sym = 1;
- }
- }
-
- if (is_section_sym) {
- kind = SK_SECTION;
- sec_sym_primary[sec_num] = i + 1u;
- } else if (sclass == IMAGE_SYM_CLASS_SECTION) {
- kind = SK_SECTION;
- } else if (sclass == IMAGE_SYM_CLASS_LABEL) {
- kind = SK_NOTYPE;
- } else if ((type >> 8) == IMAGE_SYM_DTYPE_FUNCTION) {
- kind = SK_FUNC;
- } else if (type == IMAGE_SYM_TYPE_NULL) {
- kind = (bind == SB_LOCAL) ? SK_NOTYPE : SK_OBJ;
- } else {
- kind = SK_OBJ;
- }
- } else {
- compiler_panic(c, no_loc(),
- "read_coff: symbol section number %d out of range",
- (int)sec_num);
- }
-
- /* WEAK_EXTERNAL primary: aux record carries TagIndex + Characteristics.
- * cfree's model has SB_WEAK; the fallback symbol is link-time
- * resolution by name and we drop the explicit index. */
- if (sclass == IMAGE_SYM_CLASS_WEAK_EXTERNAL) bind = SB_WEAK;
-
- Sym sn = nlen ? pool_intern_slice(c->global, (Slice){ .s = nm, .len = nlen })
- : 0;
- ObjSymId id = obj_symbol_ex(ob, sn, bind, vis, kind, target_sec, sym_value,
- sym_size, cmnalign);
- obj_sym_mark_referenced(ob, id);
- sym_to_obj[i] = id;
- i += 1u + naux;
- }
-
- /* ---- Step 4: stitch COMDAT groups from section-definition aux ----
- * Each COMDAT section has a STATIC primary symbol (the section
- * symbol) followed by one section-definition aux record. Selection
- * != 0 marks the section as a COMDAT member; the signature symbol
- * is the section symbol itself (Number field's selection variant
- * controls dedup policy at link time). */
- for (u32 s = 1; s <= nsections; ++s) {
- u32 prim_plus1 = sec_sym_primary[s];
- if (!prim_plus1) continue;
- u32 prim = prim_plus1 - 1u;
- const CSecRec* cs = &secs[s - 1];
- if (!(cs->characteristics & IMAGE_SCN_LNK_COMDAT)) continue;
- const u8* p = sym_base + (u64)prim * COFF_SYMBOL_SIZE;
- u8 naux = p[17];
- if (!naux) continue;
- const u8* aux = p + COFF_SYMBOL_SIZE;
- /* Aux layout: Length(4), NumberOfRelocations(2), NumberOfLinenumbers(2),
- * CheckSum(4), Number(2), Selection(1), Unused(3). */
- u16 assoc_number = coff_rd_u16(aux + 12);
- u8 selection = aux[14];
- if (selection == 0) continue;
-
- ObjSymId sig = sym_to_obj[prim];
- const ObjSym* sigsym = obj_symbol_get(ob, sig);
- Sym gname = sigsym ? sigsym->name : 0;
- ObjGroupId gid = obj_group(ob, gname, sig, (u32)selection);
- obj_group_add_section(ob, gid, cs->obj_sec);
- obj_section_set_group(ob, cs->obj_sec, gid);
-
- /* ASSOCIATIVE: the COMDAT member is associated with another
- * section's group. Add this section to that group's list too so
- * dead-strip keeps them paired. */
- if (selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE && assoc_number >= 1 &&
- (u32)assoc_number <= nsections) {
- u32 other_prim_plus1 = sec_sym_primary[assoc_number];
- if (other_prim_plus1) {
- u32 other_prim = other_prim_plus1 - 1u;
- const u8* op = sym_base + (u64)other_prim * COFF_SYMBOL_SIZE;
- if (op[17]) {
- const u8* oaux = op + COFF_SYMBOL_SIZE;
- u8 osel = oaux[14];
- if (osel != 0) {
- ObjSymId osig = sym_to_obj[other_prim];
- const ObjSym* osigsym = obj_symbol_get(ob, osig);
- Sym ogname = osigsym ? osigsym->name : 0;
- ObjGroupId ogid = obj_group(ob, ogname, osig, (u32)osel);
- obj_group_add_section(ob, ogid, cs->obj_sec);
- }
- }
- }
- }
- }
-
- /* ---- Step 5: per-section relocations ---- */
- for (u32 i = 0; i < nsections; ++i) {
- const CSecRec* s = &secs[i];
- if (!s->number_of_relocations) continue;
- u64 reloc_end = (u64)s->pointer_to_relocations +
- (u64)s->number_of_relocations * (u64)COFF_RELOC_SIZE;
- if (reloc_end > (u64)len)
- compiler_panic(c, no_loc(),
- "read_coff: relocation table for section %u out of range",
- i);
- const u8* rbase = data + s->pointer_to_relocations;
- for (u32 j = 0; j < s->number_of_relocations; ++j) {
- const u8* rp = rbase + (u64)j * COFF_RELOC_SIZE;
- u32 r_va = coff_rd_u32(rp + 0);
- u32 r_sym = coff_rd_u32(rp + 4);
- u16 r_type = coff_rd_u16(rp + 8);
-
- u32 kind = reloc_from(r_type);
- if (kind == (u32)-1)
- compiler_panic(c, no_loc(),
- "read_coff: unsupported reloc type %u for machine %#x",
- (u32)r_type, (u32)machine);
-
- ObjSymId target = OBJ_SYM_NONE;
- if (r_sym < nsymbols) target = sym_to_obj[r_sym];
-
- /* AMD64 REL32 encodings are relative to a PC after the relocated
- * field, while cfree's R_PC32-style apply formula subtracts the
- * relocation field address P. Plain REL32 is relative to P+4;
- * REL32_N is relative to P+N. Record that convention as an
- * implicit negative addend so link_reloc_apply can stay format
- * neutral. */
- /* ARM64 PAGEOFFSET_12L is one wire code for LDST{8,16,32,64,128}.
- * The per-arch translator returns R_AARCH64_LDST64_ABS_LO12_NC by
- * default; recover the actual access width from the patched LDR/
- * STR instruction's size field at bits [31:30] (and a SIMD/FP
- * extension via bit 26 + opc[23]) so the linker applies the right
- * scale. Mismatch panics at apply-time with "misaligned
- * address" otherwise — see link_reloc.c. */
- if ((machine == IMAGE_FILE_MACHINE_ARM64 ||
- machine == IMAGE_FILE_MACHINE_ARM64EC) &&
- r_type == IMAGE_REL_ARM64_PAGEOFFSET_12L &&
- s->size_of_raw_data && (u64)r_va + 4u <= (u64)s->size_of_raw_data) {
- const u8* ibytes = data + s->pointer_to_raw_data + r_va;
- u32 instr = (u32)ibytes[0] | ((u32)ibytes[1] << 8) |
- ((u32)ibytes[2] << 16) | ((u32)ibytes[3] << 24);
- u32 sz = (instr >> 30) & 0x3u;
- int is_simd = (instr >> 26) & 0x1u;
- if (is_simd && ((instr >> 23) & 0x1u)) {
- kind = R_AARCH64_LDST128_ABS_LO12_NC;
- } else {
- switch (sz) {
- case 0: kind = R_AARCH64_LDST8_ABS_LO12_NC; break;
- case 1: kind = R_AARCH64_LDST16_ABS_LO12_NC; break;
- case 2: kind = R_AARCH64_LDST32_ABS_LO12_NC; break;
- default: kind = R_AARCH64_LDST64_ABS_LO12_NC; break;
- }
- }
- }
-
- i64 addend = 0;
- int has_explicit = 0;
- if (machine == IMAGE_FILE_MACHINE_AMD64) {
- switch (r_type) {
- case IMAGE_REL_AMD64_REL32:
- addend = -4; has_explicit = 1; break;
- case IMAGE_REL_AMD64_REL32_1:
- addend = -1; has_explicit = 1; break;
- case IMAGE_REL_AMD64_REL32_2:
- addend = -2; has_explicit = 1; break;
- case IMAGE_REL_AMD64_REL32_3:
- addend = -3; has_explicit = 1; break;
- case IMAGE_REL_AMD64_REL32_4:
- addend = -4; has_explicit = 1; break;
- case IMAGE_REL_AMD64_REL32_5:
- addend = -5; has_explicit = 1; break;
- default:
- break;
- }
- }
-
- obj_reloc_ex(ob, s->obj_sec, r_va, (RelocKind)kind, target, addend,
- has_explicit, 0);
- }
- }
-
- /* ---- Step 6: finalize and return ---- */
- obj_finalize(ob);
- return ob;
-}
diff --git a/src/obj/coff_read_dso.c b/src/obj/coff_read_dso.c
@@ -1,236 +0,0 @@
-/* PE32+ DLL reader. Peer of read_elf_dso / read_macho_dso: walks the
- * IMAGE_DIRECTORY_ENTRY_EXPORT data directory of a Windows .dll and
- * produces an ObjBuilder of defined OBJ_SEC_NONE symbols — one per
- * name in the Export Name Table. The DLL's own Name string (the
- * analogue of DT_SONAME / LC_ID_DYLIB) is returned via *soname_out.
- *
- * The produced ObjBuilder carries no sections, relocations, or groups
- * — DSO inputs contribute no bytes to the link. The consumer's
- * resolve_undefs pass sees the exports as defined globals and marks
- * matching consumer-side undefs as `imported`; the import-table
- * emitter (Phase 3 / 4.4) later groups them by providing DLL.
- *
- * Scope: PE32+ only (IMAGE_NT_OPTIONAL_HDR64_MAGIC), AMD64 or ARM64,
- * with IMAGE_FILE_DLL set. Ordinal-only exports (entries present in
- * the EAT but absent from the ENT) are not synthesized in v1 — almost
- * all real-world imports are by name. Forwarder entries (EAT RVA
- * falls within the export directory's own range) are still emitted as
- * symbols so the linker can satisfy imports against them; the OS
- * loader follows the forwarder chain at runtime. This contract is
- * pinned by test/coff/pe-dso-forwarder.c. */
-
-#include <string.h>
-
-#include "core/arena.h"
-#include "core/heap.h"
-#include "core/pool.h"
-#include "core/slice.h"
-#include "obj/coff.h"
-
-static SrcLoc no_loc(void) {
- SrcLoc l = {0, 0, 0};
- return l;
-}
-
-/* ---- RVA -> file offset ----
- * Walks the section table once per call. Returns 1 on success and
- * fills *off_out; returns 0 if the RVA falls outside every section's
- * [VirtualAddress, VirtualAddress + max(VirtualSize, SizeOfRawData))
- * range or the resulting file offset would exceed `len`. */
-static int rva_to_offset(const u8* shdrs, u16 nsec, u32 rva, size_t len,
- u64* off_out) {
- for (u16 i = 0; i < nsec; ++i) {
- const u8* sh = shdrs + (u64)i * COFF_SECTION_HEADER_SIZE;
- u32 vsize = coff_rd_u32(sh + 8);
- u32 vaddr = coff_rd_u32(sh + 12);
- u32 raw_size = coff_rd_u32(sh + 16);
- u32 raw_ptr = coff_rd_u32(sh + 20);
- /* Some linkers leave VirtualSize == 0 in objects; use raw_size as
- * a fallback so we still resolve RVAs in well-formed images. */
- u32 span = vsize ? vsize : raw_size;
- if (rva >= vaddr && rva < vaddr + span) {
- u64 delta = (u64)(rva - vaddr);
- if (delta >= raw_size) return 0; /* RVA past on-disk data */
- u64 off = (u64)raw_ptr + delta;
- if (off >= len) return 0;
- *off_out = off;
- return 1;
- }
- }
- return 0;
-}
-
-/* Read a NUL-terminated string starting at `off`, bounded by `len`.
- * Returns the string length (excluding NUL); writes the pointer to
- * *out. Returns 0 if off is out of range or the string is not
- * terminated within the file. */
-static u32 read_cstr(const u8* data, size_t len, u64 off, const char** out) {
- if (off >= len) { *out = ""; return 0; }
- const char* s = (const char*)(data + off);
- u64 max = (u64)len - off;
- u64 n = 0;
- while (n < max && s[n] != '\0') ++n;
- if (n == max) { *out = ""; return 0; } /* unterminated */
- *out = s;
- return (u32)n;
-}
-
-ObjBuilder* read_coff_dso(Compiler* c, const char* name, const u8* data,
- size_t len, Sym* soname_out) {
- (void)name;
- if (soname_out) *soname_out = 0;
-
- /* ---- DOS header + PE signature ---- */
- if (len < COFF_DOS_HEADER_SIZE)
- compiler_panic(c, no_loc(), "read_coff_dso: input shorter than DOS header");
- u16 e_magic = coff_rd_u16(data + 0);
- if (e_magic != IMAGE_DOS_SIGNATURE)
- compiler_panic(c, no_loc(), "read_coff_dso: bad DOS magic 0x%x", e_magic);
- u32 e_lfanew = coff_rd_u32(data + 60);
-
- u64 nt_end = (u64)e_lfanew + 4u + COFF_FILE_HEADER_SIZE + COFF_OPT_HDR64_SIZE;
- if (nt_end > len)
- compiler_panic(c, no_loc(),
- "read_coff_dso: PE headers extend past end of file");
-
- u32 pe_sig = coff_rd_u32(data + e_lfanew);
- if (pe_sig != IMAGE_NT_SIGNATURE)
- compiler_panic(c, no_loc(), "read_coff_dso: bad PE signature 0x%x", pe_sig);
-
- /* ---- IMAGE_FILE_HEADER ---- */
- const u8* fh = data + e_lfanew + 4u;
- u16 machine = coff_rd_u16(fh + 0);
- u16 nsec = coff_rd_u16(fh + 2);
- u16 size_of_opt = coff_rd_u16(fh + 16);
- u16 chars = coff_rd_u16(fh + 18);
-
- if (machine != IMAGE_FILE_MACHINE_AMD64 && machine != IMAGE_FILE_MACHINE_ARM64)
- compiler_panic(c, no_loc(),
- "read_coff_dso: unsupported machine 0x%x", machine);
- if (!(chars & IMAGE_FILE_DLL))
- compiler_panic(c, no_loc(),
- "read_coff_dso: not a DLL (Characteristics=0x%x)", chars);
- if (size_of_opt < COFF_OPT_HDR64_SIZE)
- compiler_panic(c, no_loc(),
- "read_coff_dso: SizeOfOptionalHeader %u too small for PE32+",
- size_of_opt);
-
- /* ---- IMAGE_OPTIONAL_HEADER64 ---- */
- const u8* oh = fh + COFF_FILE_HEADER_SIZE;
- u16 opt_magic = coff_rd_u16(oh + 0);
- if (opt_magic != IMAGE_NT_OPTIONAL_HDR64_MAGIC)
- compiler_panic(c, no_loc(),
- "read_coff_dso: not PE32+ (optional header Magic=0x%x)",
- opt_magic);
-
- /* DataDirectory begins at offset 112 inside the PE32+ optional header
- * (28 standard + 84 windows-specific + NumberOfRvaAndSizes = 112). */
- const u8* data_dir = oh + COFF_OPT_HDR64_SIZE
- - COFF_NUM_DATA_DIRECTORIES * COFF_DATA_DIRECTORY_SIZE;
- u32 export_rva = coff_rd_u32(data_dir
- + IMAGE_DIRECTORY_ENTRY_EXPORT
- * COFF_DATA_DIRECTORY_SIZE);
- u32 export_size = coff_rd_u32(data_dir
- + IMAGE_DIRECTORY_ENTRY_EXPORT
- * COFF_DATA_DIRECTORY_SIZE
- + 4u);
-
- /* ---- section table ---- */
- u64 shdrs_off = (u64)e_lfanew + 4u + COFF_FILE_HEADER_SIZE + size_of_opt;
- u64 shdrs_end = shdrs_off + (u64)nsec * COFF_SECTION_HEADER_SIZE;
- if (shdrs_end > len)
- compiler_panic(c, no_loc(),
- "read_coff_dso: section table extends past end of file");
- const u8* shdrs = data + shdrs_off;
-
- ObjBuilder* ob = obj_new(c);
- if (!ob) compiler_panic(c, no_loc(), "read_coff_dso: obj_new failed");
-
- /* No export directory => empty DSO (legal for stub DLLs). */
- if (export_size == 0 || export_rva == 0) {
- obj_finalize(ob);
- return ob;
- }
-
- u64 exp_off;
- if (!rva_to_offset(shdrs, nsec, export_rva, len, &exp_off))
- compiler_panic(c, no_loc(),
- "read_coff_dso: export directory RVA 0x%x out of range",
- export_rva);
- if (exp_off + COFF_EXPORT_DIR_SIZE > len)
- compiler_panic(c, no_loc(),
- "read_coff_dso: export directory truncated");
-
- const u8* ed = data + exp_off;
- u32 name_rva = coff_rd_u32(ed + 12);
- u32 num_funcs = coff_rd_u32(ed + 20);
- u32 num_names = coff_rd_u32(ed + 24);
- u32 eat_rva = coff_rd_u32(ed + 28);
- u32 ent_rva = coff_rd_u32(ed + 32);
- u32 ord_rva = coff_rd_u32(ed + 36);
- /* Base (ed + 16) is the user-visible ordinal offset; the cfree linker
- * matches imports by name, so we don't propagate it. */
-
- /* ---- DLL name (soname) ---- */
- if (name_rva) {
- u64 name_off;
- if (!rva_to_offset(shdrs, nsec, name_rva, len, &name_off))
- compiler_panic(c, no_loc(),
- "read_coff_dso: DLL name RVA 0x%x out of range",
- name_rva);
- const char* dll_name;
- u32 nlen = read_cstr(data, len, name_off, &dll_name);
- if (nlen && soname_out)
- *soname_out = pool_intern_slice(c->global, (Slice){ .s = dll_name, .len = nlen });
- }
-
- /* ---- resolve EAT / ENT / ordinal table once ---- */
- u64 eat_off = 0, ent_off = 0, ord_off = 0;
- if (num_names) {
- if (!rva_to_offset(shdrs, nsec, eat_rva, len, &eat_off))
- compiler_panic(c, no_loc(),
- "read_coff_dso: EAT RVA 0x%x out of range", eat_rva);
- if (!rva_to_offset(shdrs, nsec, ent_rva, len, &ent_off))
- compiler_panic(c, no_loc(),
- "read_coff_dso: ENT RVA 0x%x out of range", ent_rva);
- if (!rva_to_offset(shdrs, nsec, ord_rva, len, &ord_off))
- compiler_panic(c, no_loc(),
- "read_coff_dso: ordinal table RVA 0x%x out of range",
- ord_rva);
- if (ent_off + (u64)num_names * 4u > len
- || ord_off + (u64)num_names * 2u > len)
- compiler_panic(c, no_loc(),
- "read_coff_dso: ENT/ordinal table extends past file");
- if (eat_off + (u64)num_funcs * 4u > len)
- compiler_panic(c, no_loc(),
- "read_coff_dso: EAT extends past file");
- }
-
- /* ---- walk the ENT ----
- * Forwarders (EAT RVA inside [export_rva, export_rva + export_size))
- * still produce a symbol: cfree's linker doesn't follow the chain,
- * but the import needs to be satisfiable so the OS loader can. */
- for (u32 i = 0; i < num_names; ++i) {
- u32 nrva = coff_rd_u32(data + ent_off + (u64)i * 4u);
- u16 ord = coff_rd_u16(data + ord_off + (u64)i * 2u);
- if (ord >= num_funcs) continue; /* malformed; skip rather than panic */
- /* func_rva is fetched for forwarder classification only; cfree does
- * not consume the address itself (DSO symbols are OBJ_SEC_NONE). */
- u32 func_rva = coff_rd_u32(data + eat_off + (u64)ord * 4u);
- (void)func_rva; /* see comment above re: forwarders */
-
- u64 name_off;
- if (!rva_to_offset(shdrs, nsec, nrva, len, &name_off)) continue;
- const char* nm;
- u32 nlen = read_cstr(data, len, name_off, &nm);
- if (!nlen) continue;
-
- Sym sn = pool_intern_slice(c->global, (Slice){ .s = nm, .len = nlen });
- ObjSymId id = obj_symbol(ob, sn, SB_GLOBAL, SK_FUNC,
- OBJ_SEC_NONE, 0, 0);
- obj_sym_mark_referenced(ob, id);
- }
-
- obj_finalize(ob);
- return ob;
-}
diff --git a/src/obj/coff_reloc_aarch64.c b/src/obj/coff_reloc_aarch64.c
@@ -1,96 +0,0 @@
-/* RelocKind <-> AArch64 PE/COFF reloc-type mapping. Mirror of
- * elf_reloc_aarch64.c for PE/COFF.
- *
- * The ARM64 PE/COFF reloc set covers the common AArch64 patch sites:
- * ADRP page-base / page-offset pairs, BRANCH26/19/14, ADDR32/64, plus
- * the section-relative SECREL family which cfree does not model in v1.
- * PAGEOFFSET_12L collapses all LDST*_ABS_LO12_NC widths into one wire
- * code; the width is recoverable from the patched LDR/STR instruction
- * encoding, so the reader picks the LDST64 form and the consumer can
- * disambiguate later if it cares. ADDR32NB is image-relative; v1
- * collapses it to R_ABS32 and lets layout subtract the image base. */
-
-#include "obj/coff.h"
-
-u32 coff_aarch64_reloc_to(u32 kind /* RelocKind */) {
- switch (kind) {
- case R_NONE:
- return IMAGE_REL_ARM64_ABSOLUTE;
- case R_ABS64:
- return IMAGE_REL_ARM64_ADDR64;
- case R_ABS32:
- return IMAGE_REL_ARM64_ADDR32;
- case R_AARCH64_CALL26:
- case R_AARCH64_JUMP26:
- return IMAGE_REL_ARM64_BRANCH26;
- case R_AARCH64_CONDBR19:
- return IMAGE_REL_ARM64_BRANCH19;
- case R_AARCH64_TSTBR14:
- return IMAGE_REL_ARM64_BRANCH14;
- case R_AARCH64_ADR_PREL_PG_HI21:
- case R_AARCH64_ADR_PREL_PG_HI21_NC:
- return IMAGE_REL_ARM64_PAGEBASE_REL21;
- case R_AARCH64_ADR_PREL_LO21:
- return IMAGE_REL_ARM64_REL21;
- case R_AARCH64_ADD_ABS_LO12_NC:
- return IMAGE_REL_ARM64_PAGEOFFSET_12A;
- case R_AARCH64_LDST8_ABS_LO12_NC:
- case R_AARCH64_LDST16_ABS_LO12_NC:
- case R_AARCH64_LDST32_ABS_LO12_NC:
- case R_AARCH64_LDST64_ABS_LO12_NC:
- case R_AARCH64_LDST128_ABS_LO12_NC:
- return IMAGE_REL_ARM64_PAGEOFFSET_12L;
- case R_PC32:
- case R_REL32:
- return IMAGE_REL_ARM64_REL32;
- case R_COFF_SECREL:
- return IMAGE_REL_ARM64_SECREL;
- case R_COFF_SECTION:
- return IMAGE_REL_ARM64_SECTION;
- case R_COFF_AARCH64_SECREL_LOW12A:
- return IMAGE_REL_ARM64_SECREL_LOW12A;
- case R_COFF_AARCH64_SECREL_HIGH12A:
- return IMAGE_REL_ARM64_SECREL_HIGH12A;
- default:
- return IMAGE_REL_ARM64_ABSOLUTE;
- }
-}
-
-u32 coff_aarch64_reloc_from(u32 wire_type) {
- switch (wire_type) {
- case IMAGE_REL_ARM64_ABSOLUTE:
- return R_NONE;
- case IMAGE_REL_ARM64_ADDR64:
- return R_ABS64;
- case IMAGE_REL_ARM64_ADDR32:
- return R_ABS32;
- case IMAGE_REL_ARM64_ADDR32NB:
- return R_ABS32;
- case IMAGE_REL_ARM64_BRANCH26:
- return R_AARCH64_CALL26;
- case IMAGE_REL_ARM64_BRANCH19:
- return R_AARCH64_CONDBR19;
- case IMAGE_REL_ARM64_BRANCH14:
- return R_AARCH64_TSTBR14;
- case IMAGE_REL_ARM64_PAGEBASE_REL21:
- return R_AARCH64_ADR_PREL_PG_HI21;
- case IMAGE_REL_ARM64_REL21:
- return R_AARCH64_ADR_PREL_LO21;
- case IMAGE_REL_ARM64_PAGEOFFSET_12A:
- return R_AARCH64_ADD_ABS_LO12_NC;
- case IMAGE_REL_ARM64_PAGEOFFSET_12L:
- return R_AARCH64_LDST64_ABS_LO12_NC;
- case IMAGE_REL_ARM64_REL32:
- return R_PC32;
- case IMAGE_REL_ARM64_SECREL:
- return R_COFF_SECREL;
- case IMAGE_REL_ARM64_SECTION:
- return R_COFF_SECTION;
- case IMAGE_REL_ARM64_SECREL_LOW12A:
- return R_COFF_AARCH64_SECREL_LOW12A;
- case IMAGE_REL_ARM64_SECREL_HIGH12A:
- return R_COFF_AARCH64_SECREL_HIGH12A;
- default:
- return (u32)-1; /* sentinel */
- }
-}
diff --git a/src/obj/coff_reloc_x86_64.c b/src/obj/coff_reloc_x86_64.c
@@ -1,76 +0,0 @@
-/* RelocKind <-> x86_64 PE/COFF reloc-type mapping. Mirror of
- * elf_reloc_x86_64.c for PE/COFF.
- *
- * PE/COFF's AMD64 reloc set is much narrower than ELF's: only ABSOLUTE,
- * ADDR64, ADDR32, ADDR32NB, REL32 (with REL32_1..5 implicit-addend
- * variants), plus a few section-relative forms cfree does not model in
- * v1. We emit plain REL32 (4) for every PC-relative kind and let the
- * explicit Reloc.addend ride in the patched bytes; on the read side
- * REL32_1..5 collapse to R_PC32 (the reader applies the implicit
- * addend itself). IMAGE_REL_AMD64_ABSOLUTE (== 0) doubles as the
- * "unsupported" sentinel on the _to side, matching the ELF contract. */
-
-#include "obj/coff.h"
-
-u32 coff_x86_64_reloc_to(u32 kind /* RelocKind */) {
- switch (kind) {
- case R_NONE:
- return IMAGE_REL_AMD64_ABSOLUTE;
- case R_ABS64:
- return IMAGE_REL_AMD64_ADDR64;
- case R_ABS32:
- return IMAGE_REL_AMD64_ADDR32;
- case R_X64_32S:
- return IMAGE_REL_AMD64_ADDR32NB;
- case R_PC32:
- case R_REL32:
- case R_PLT32:
- case R_X64_PLT32:
- case R_X64_GOTPCREL:
- case R_X64_GOTPCRELX:
- case R_X64_REX_GOTPCRELX:
- return IMAGE_REL_AMD64_REL32;
- case R_COFF_SECREL:
- return IMAGE_REL_AMD64_SECREL;
- case R_COFF_SECTION:
- return IMAGE_REL_AMD64_SECTION;
- default:
- return IMAGE_REL_AMD64_ABSOLUTE;
- }
-}
-
-u32 coff_x86_64_reloc_from(u32 wire_type) {
- switch (wire_type) {
- case IMAGE_REL_AMD64_ABSOLUTE:
- return R_NONE;
- case IMAGE_REL_AMD64_ADDR64:
- return R_ABS64;
- case IMAGE_REL_AMD64_ADDR32:
- return R_ABS32;
- case IMAGE_REL_AMD64_ADDR32NB:
- return R_X64_32S;
- case IMAGE_REL_AMD64_REL32:
- case IMAGE_REL_AMD64_REL32_1:
- case IMAGE_REL_AMD64_REL32_2:
- case IMAGE_REL_AMD64_REL32_3:
- case IMAGE_REL_AMD64_REL32_4:
- case IMAGE_REL_AMD64_REL32_5:
- return R_PC32;
- case IMAGE_REL_AMD64_SECREL:
- return R_COFF_SECREL;
- case IMAGE_REL_AMD64_SECTION:
- return R_COFF_SECTION;
- /* SECREL7 (7-bit section-relative) appears in mingw-emitted archive
- * members (intrinsic helpers, exception tables, DWARF). cfree
- * doesn't currently apply or emit these, but panicking at read
- * time would block ingesting any mingw archive whose non-import
- * members carry .debug_info / .pdata. Map to R_NONE so the
- * relocation slot is preserved structurally but treated as a
- * no-op by the relocator; the member can still be dead-stripped
- * when nothing references it. */
- case IMAGE_REL_AMD64_SECREL7:
- return R_NONE;
- default:
- return (u32)-1; /* sentinel */
- }
-}
diff --git a/src/obj/elf.h b/src/obj/elf/elf.h
diff --git a/src/obj/elf/emit.c b/src/obj/elf/emit.c
@@ -0,0 +1,752 @@
+/* ELF ET_REL writer. Walks a finalized ObjBuilder and emits a 64-bit
+ * little-endian relocatable object via the supplied Writer.
+ *
+ * Layout strategy:
+ * 1. plan ELF section headers (one per obj section, plus synthesized
+ * .symtab / .strtab / .shstrtab and one .rela.<name> per obj section
+ * that carries relocations);
+ * 2. build .symtab + .strtab content (locals first — STT_SECTION
+ * synthesized for every input section, then ordinary locals, then
+ * globals/weaks);
+ * 3. build .rela.* content using the per-arch reloc map (selected
+ * by Compiler.target.arch);
+ * 4. build .shstrtab;
+ * 5. assign file offsets sequentially, respecting per-section
+ * addralign;
+ * 6. write Ehdr, then each section's bytes (seeking to its sh_offset),
+ * then the section header table.
+ *
+ * 64-bit little-endian only. Per-arch reloc tables (elf_reloc_<arch>.c)
+ * supply the RelocKind -> ELF type mapping; e_machine is selected from
+ * Compiler.target.arch. Big-endian / 32-bit ELF panic at entry.
+ *
+ * See doc/DESIGN.md §5.5 for the round-trip invariant: read_elf of this
+ * output must produce an ObjBuilder shape-equivalent to the input,
+ * modulo (a) section ordering and (b) the synthesized STT_SECTION
+ * symbols (which are visible to read_elf but were not in the input). */
+
+#include <string.h>
+
+#include "core/arena.h"
+#include "core/buf.h"
+#include "core/heap.h"
+#include "core/pool.h"
+#include "core/slice.h"
+#include "core/util.h"
+#include "obj/elf/elf.h"
+#include "obj/format.h"
+
+static SrcLoc no_loc(void) {
+ SrcLoc l = {0, 0, 0};
+ return l;
+}
+
+/* ---- per-ELF-section plan record ---- */
+
+/* Internal section descriptor used during planning. Mirrors Elf64_Shdr
+ * but with an explicit pointer to the source bytes (either an obj
+ * Section's chunked Buf or a synthesized linear buffer). NOBITS sections
+ * have no source bytes and consume no file space. */
+typedef struct ElfSec {
+ /* Final shdr fields (little-endian-encoded at write time). */
+ u32 sh_name; /* offset into shstrtab */
+ u32 sh_type;
+ u64 sh_flags;
+ u64 sh_addr; /* always 0 for ET_REL */
+ u64 sh_offset;
+ u64 sh_size;
+ u32 sh_link;
+ u32 sh_info;
+ u64 sh_addralign;
+ u64 sh_entsize;
+
+ /* Section name. The name string lives in scratch (synthesized) or in
+ * the global pool (obj-section names); buf-source is set for sections
+ * carrying obj-section bytes, raw_bytes for synthesized. */
+ const char* name;
+ u32 name_len;
+
+ const Buf* obj_bytes; /* one of these three is set: */
+ const u8* raw_bytes; /* */
+ int is_nobits; /* */
+} ElfSec;
+
+/* ---- emit ---- */
+
+static u32 sec_flags_to_elf(u16 flags) {
+ u64 r = 0;
+ if (flags & SF_ALLOC) r |= SHF_ALLOC;
+ if (flags & SF_EXEC) r |= SHF_EXECINSTR;
+ if (flags & SF_WRITE) r |= SHF_WRITE;
+ if (flags & SF_TLS) r |= SHF_TLS;
+ if (flags & SF_MERGE) r |= SHF_MERGE;
+ if (flags & SF_STRINGS) r |= SHF_STRINGS;
+ if (flags & SF_GROUP) r |= SHF_GROUP;
+ if (flags & SF_LINK_ORDER) r |= SHF_LINK_ORDER;
+ if (flags & SF_RETAIN) r |= SHF_GNU_RETAIN;
+ return (u32)r;
+}
+
+static u32 sec_sem_to_elf(u16 sem) {
+ switch (sem) {
+ case SSEM_PROGBITS:
+ return SHT_PROGBITS;
+ case SSEM_NOBITS:
+ return SHT_NOBITS;
+ case SSEM_SYMTAB:
+ return SHT_SYMTAB;
+ case SSEM_STRTAB:
+ return SHT_STRTAB;
+ case SSEM_RELA:
+ return SHT_RELA;
+ case SSEM_REL:
+ return SHT_REL;
+ case SSEM_NOTE:
+ return SHT_NOTE;
+ case SSEM_INIT_ARRAY:
+ return SHT_INIT_ARRAY;
+ case SSEM_FINI_ARRAY:
+ return SHT_FINI_ARRAY;
+ case SSEM_PREINIT_ARRAY:
+ return SHT_PREINIT_ARRAY;
+ case SSEM_GROUP:
+ return SHT_GROUP;
+ default:
+ return SHT_PROGBITS;
+ }
+}
+
+static u8 sym_bind_to_elf(u16 bind) {
+ switch (bind) {
+ case SB_LOCAL:
+ return STB_LOCAL;
+ case SB_GLOBAL:
+ return STB_GLOBAL;
+ case SB_WEAK:
+ return STB_WEAK;
+ default:
+ return STB_LOCAL;
+ }
+}
+
+static u8 sym_kind_to_elf(u16 kind) {
+ switch (kind) {
+ case SK_UNDEF:
+ return STT_NOTYPE;
+ case SK_FUNC:
+ return STT_FUNC;
+ case SK_OBJ:
+ return STT_OBJECT;
+ case SK_SECTION:
+ return STT_SECTION;
+ case SK_FILE:
+ return STT_FILE;
+ /* Tentative definitions: real ELF emitters (clang, gcc, GNU as)
+ * write these as STT_OBJECT with shndx=SHN_COMMON. STT_COMMON is
+ * a near-extinct convention that llvm-readelf renders as the
+ * literal type name "COMMON" — emitting it breaks roundtrip
+ * against any toolchain-produced .o. */
+ case SK_COMMON:
+ return STT_OBJECT;
+ case SK_TLS:
+ return STT_TLS;
+ case SK_ABS:
+ return STT_NOTYPE; /* SHN_ABS, NOTYPE */
+ case SK_NOTYPE:
+ return STT_NOTYPE;
+ case SK_IFUNC:
+ return STT_GNU_IFUNC;
+ default:
+ return STT_NOTYPE;
+ }
+}
+
+static u8 sym_vis_to_elf(u8 vis) {
+ switch (vis) {
+ case SV_DEFAULT:
+ return STV_DEFAULT;
+ case SV_HIDDEN:
+ return STV_HIDDEN;
+ case SV_PROTECTED:
+ return STV_PROTECTED;
+ case SV_INTERNAL:
+ return STV_INTERNAL;
+ default:
+ return STV_DEFAULT;
+ }
+}
+
+static u16 sym_shndx(const ObjSym* s, const u32* obj_to_elf, u32 nsec) {
+ if (s->kind == SK_COMMON) return (u16)SHN_COMMON;
+ if (s->kind == SK_ABS) return (u16)SHN_ABS;
+ /* STT_FILE conventionally carries SHN_ABS as its shndx — its value
+ * field is not an address. Match clang/binutils. */
+ if (s->kind == SK_FILE) return (u16)SHN_ABS;
+ if (s->section_id == OBJ_SEC_NONE) return (u16)SHN_UNDEF;
+ if (s->section_id >= nsec) return (u16)SHN_UNDEF;
+ return (u16)obj_to_elf[s->section_id];
+}
+
+static const char* sym_to_str(Compiler* c, Sym n, u32* len_out) {
+ Slice sl = pool_slice(c->global, n);
+ const char* s = sl.s;
+ if (!s) {
+ *len_out = 0;
+ return "";
+ }
+ *len_out = (u32)sl.len;
+ return s;
+}
+
+/* Append `len` bytes of `s` followed by a single NUL to `b`, return
+ * the offset at which `s` was placed.
+ *
+ * If `s` already exists at some offset (as a NUL-terminated substring
+ * starting at any offset), reuse that offset — clang/binutils both
+ * dedupe trivially identical strings, and matching the convention
+ * keeps our strtab the same size as theirs. The dedupe is linear in
+ * the strtab; section + symbol counts are small enough that this is
+ * fine without a hash. */
+static u32 strtab_add(Buf* b, const char* s, u32 len) {
+ /* Empty string: always at offset 0 (the leading NUL). */
+ if (len == 0) return 0;
+
+ /* Linear search for an existing copy. We must scan chunk-by-chunk
+ * because Buf is segmented; flatten to a temp scratch buffer first
+ * if non-empty and search there. For our tiny strtabs, the cost is
+ * dominated by the writes anyway. */
+ u32 total = buf_pos(b);
+ if (total > len) {
+ /* Flatten just to search — not optimal but the strtab here is
+ * always small (low kilobytes at most). */
+ u8 stack[256];
+ u8* tmp =
+ total <= sizeof stack ? stack : (u8*)b->heap->alloc(b->heap, total, 1);
+ if (tmp) {
+ buf_flatten(b, tmp);
+ for (u32 i = 0; i + len < total; ++i) {
+ if (tmp[i + len] == 0 && memcmp(tmp + i, s, len) == 0) {
+ if (tmp != stack) b->heap->free(b->heap, tmp, total);
+ return i;
+ }
+ }
+ if (tmp != stack) b->heap->free(b->heap, tmp, total);
+ }
+ }
+
+ u32 off = total;
+ buf_write(b, s, len);
+ {
+ u8 z = 0;
+ buf_write(b, &z, 1);
+ }
+ return off;
+}
+
+void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) {
+ Heap* h = (Heap*)c->ctx->heap;
+
+ /* Run the tombstone sweep before any iteration: cascades removed
+ * sections into their defining symbols, drops dangling relocs,
+ * compacts groups, and absorbs the historical UNDEF prune. After this
+ * call every direct ID-based access below must skip entries whose
+ * `removed` bit is set. */
+ obj_sweep_dead(ob);
+
+ /* ---- target validation ------------------------------------------ */
+ const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_ELF);
+ const ObjElfArchOps* elf =
+ fmt && fmt->elf_arch ? fmt->elf_arch(c->target.arch) : NULL;
+ u32 e_machine;
+ u32 (*reloc_to)(u32);
+ if (!elf || !elf->reloc_to) {
+ compiler_panic(c, no_loc(), "emit_elf: unsupported target arch %u",
+ (u32)c->target.arch);
+ }
+ e_machine = elf->e_machine;
+ reloc_to = elf->reloc_to;
+ if (c->target.big_endian) {
+ compiler_panic(c, no_loc(), "emit_elf: big-endian ELF not supported");
+ }
+ if (c->target.ptr_size != 8) {
+ compiler_panic(c, no_loc(), "emit_elf: ptr_size %u (expected 8)",
+ (u32)c->target.ptr_size);
+ }
+
+ /* ---- pass 1: plan ELF section list ------------------------------ */
+
+ u32 nobjsec = obj_section_count(ob);
+
+ u32 nobjgrp = obj_group_count(ob);
+ /* Upper bound on ELF section count:
+ * 1 (SHN_UNDEF)
+ * + nobjsec - 1 (one ELF entry per real obj section)
+ * + nobjsec - 1 (worst case: a .rela.<name> per obj section)
+ * + nobjgrp - 1 (one synthesized SHT_GROUP per ObjGroup)
+ * + 3 (.symtab, .strtab, .shstrtab)
+ */
+ u32 max_secs =
+ 1 + (nobjsec - 1) + (nobjsec - 1) + (nobjgrp ? nobjgrp - 1 : 0) + 3;
+ if (max_secs < 4) max_secs = 4;
+ ElfSec* secs = arena_array(c->scratch, ElfSec, max_secs);
+ u32 nsecs = 0;
+ memset(&secs[nsecs++], 0, sizeof secs[0]); /* index 0 = SHN_UNDEF */
+
+ /* Map obj section id -> ELF section index. */
+ u32* obj_to_elf = arena_zarray(c->scratch, u32, nobjsec);
+
+ for (u32 i = 1; i < nobjsec; ++i) {
+ const Section* s = obj_section_get(ob, i);
+ if (s->removed) continue; /* tombstone — see obj_sweep_dead */
+ ElfSec* es = &secs[nsecs];
+ memset(es, 0, sizeof *es);
+ u32 nlen;
+ es->name = sym_to_str(c, s->name, &nlen);
+ es->name_len = nlen;
+ /* Honor format-specific overrides preserved by the reader for
+ * sh_type/sh_flags bits the canonical SecSem/SecFlag enums
+ * don't model (e.g. SHT_LLVM_ADDRSIG, SHF_EXCLUDE). */
+ es->sh_type = (s->ext_kind == OBJ_EXT_ELF && s->ext_type)
+ ? s->ext_type
+ : sec_sem_to_elf(s->sem);
+ es->sh_flags = sec_flags_to_elf(s->flags);
+ if (s->ext_kind == OBJ_EXT_ELF) es->sh_flags |= s->ext_flags;
+ es->sh_addr = 0;
+ es->sh_addralign = s->align ? s->align : 1;
+ es->sh_entsize = s->entsize;
+ es->sh_link = 0;
+ es->sh_info = 0;
+ if (s->sem == SSEM_NOBITS) {
+ es->is_nobits = 1;
+ es->sh_size = s->bss_size;
+ } else {
+ es->obj_bytes = &s->bytes;
+ es->sh_size = s->bytes.total;
+ }
+ obj_to_elf[i] = nsecs++;
+ }
+
+ /* ---- pass 2: build .symtab + .strtab content -------------------- */
+
+ /* .strtab: leading NUL byte. Then a name per emitted symbol. */
+ Buf strtab;
+ buf_init(&strtab, h);
+ {
+ u8 z = 0;
+ buf_write(&strtab, &z, 1);
+ }
+
+ /* The .symtab is built into a contiguous arena buffer of fixed-size
+ * 24-byte records. We don't know the count up front; bound by
+ * (nobjsec section symbols) + (obj symbol count). */
+ u32 nobjsym = 0;
+ {
+ ObjSymIter* it = obj_symiter_new(ob);
+ ObjSymEntry e;
+ while (obj_symiter_next(it, &e)) ++nobjsym;
+ obj_symiter_free(it);
+ }
+ u32 max_syms = 1 + (nobjsec - 1) + nobjsym;
+ u8* symtab = (u8*)arena_alloc(c->scratch, (size_t)ELF64_SYM_SIZE * max_syms,
+ _Alignof(u64));
+ u32 nsyms = 0;
+ memset(&symtab[nsyms * ELF64_SYM_SIZE], 0, ELF64_SYM_SIZE);
+ nsyms = 1; /* index 0: STN_UNDEF */
+
+/* Helper to emit one Elf64_Sym record at index `idx` into symtab. */
+#define WRITE_SYM(idx, st_name, st_info, st_other, st_shndx, st_value, \
+ st_size) \
+ do { \
+ u8* slot = &symtab[(idx) * ELF64_SYM_SIZE]; \
+ slot[0] = (u8)((st_name)); \
+ slot[1] = (u8)((st_name) >> 8); \
+ slot[2] = (u8)((st_name) >> 16); \
+ slot[3] = (u8)((st_name) >> 24); \
+ slot[4] = (u8)((st_info)); \
+ slot[5] = (u8)((st_other)); \
+ slot[6] = (u8)((st_shndx)); \
+ slot[7] = (u8)((st_shndx) >> 8); \
+ for (int _b = 0; _b < 8; ++_b) \
+ slot[8 + _b] = (u8)((u64)(st_value) >> (_b * 8)); \
+ for (int _b = 0; _b < 8; ++_b) \
+ slot[16 + _b] = (u8)((u64)(st_size) >> (_b * 8)); \
+ } while (0)
+
+ /* No automatic STT_SECTION synthesis. Section symbols are emitted
+ * iff they are present in the input ObjBuilder (typically as
+ * SK_SECTION ObjSyms preserved by read_elf, or added explicitly by
+ * a hand-built caller that needs to reference a section by sym).
+ * This matches clang's output: only sections referenced by section
+ * symbols carry one. */
+
+ /* Map obj symbol id -> elf symbol index. */
+ u32* sym_to_elf = arena_zarray(c->scratch, u32, nobjsym + 2);
+
+ /* Two passes over obj symbols: locals, then globals/weak. */
+ for (int pass = 0; pass < 2; ++pass) {
+ ObjSymIter* it = obj_symiter_new(ob);
+ ObjSymEntry e;
+ while (obj_symiter_next(it, &e)) {
+ const ObjSym* s = e.sym;
+ if (s->removed) continue; /* spurious-UNDEF prune + explicit removal */
+ int is_local = (s->bind == SB_LOCAL);
+ if ((pass == 0) != is_local) continue;
+ u32 nlen;
+ const char* nm = sym_to_str(c, s->name, &nlen);
+ u32 nameoff = nlen ? strtab_add(&strtab, nm, nlen) : 0;
+ u8 info =
+ ELF64_ST_INFO(sym_bind_to_elf(s->bind), sym_kind_to_elf(s->kind));
+ u8 other = sym_vis_to_elf(s->vis);
+ u16 shndx = sym_shndx(s, obj_to_elf, nobjsec);
+ u64 value = (s->kind == SK_COMMON) ? s->common_align : s->value;
+ WRITE_SYM(nsyms, nameoff, info, other, shndx, value, s->size);
+ sym_to_elf[e.id] = nsyms;
+ nsyms++;
+ }
+ obj_symiter_free(it);
+ }
+#undef WRITE_SYM
+
+ /* sh_info on .symtab is the index of the first non-local symbol.
+ * Locals = 1 (STN_UNDEF) + count of input-side LOCAL obj symbols. */
+ u32 nlocals = 1;
+ {
+ ObjSymIter* it = obj_symiter_new(ob);
+ ObjSymEntry e;
+ while (obj_symiter_next(it, &e)) {
+ if (e.sym->removed) continue;
+ if (e.sym->bind == SB_LOCAL) ++nlocals;
+ }
+ obj_symiter_free(it);
+ }
+
+ /* Append .symtab + .strtab + .shstrtab planning records.
+ * sh_link/sh_info for .symtab and .rela.* are filled in once we know
+ * each section's elf index. */
+ u32 idx_symtab = 0, idx_strtab = 0, idx_shstrtab = 0;
+
+ /* ---- pass 2.5: synthesize SHT_GROUP sections from ObjGroups ----
+ * Append one SHT_GROUP section per ObjGroup. The body is a 4-byte LE
+ * flags word followed by the elf section index of each member.
+ * Placed before relas so the file layout has data sections, then
+ * groups, then relas/symtab/strtab — matching clang's ordering and
+ * keeping data-section offsets independent of group presence. */
+ u32* group_elf_idx =
+ nobjgrp > 1 ? arena_array(c->scratch, u32, nobjgrp) : NULL;
+ if (group_elf_idx) memset(group_elf_idx, 0, sizeof(u32) * nobjgrp);
+ for (u32 gi = 1; gi < nobjgrp; ++gi) {
+ const ObjGroup* g = obj_group_get(ob, gi);
+ if (!g || g->removed) continue;
+
+ u32 body_size = 4u + 4u * g->nsections;
+ u8* body = (u8*)arena_alloc(c->scratch, body_size, _Alignof(u32));
+ u32 gflags = g->flags ? g->flags : 1u; /* GRP_COMDAT default */
+ body[0] = (u8)(gflags);
+ body[1] = (u8)(gflags >> 8);
+ body[2] = (u8)(gflags >> 16);
+ body[3] = (u8)(gflags >> 24);
+ for (u32 j = 0; j < g->nsections; ++j) {
+ ObjSecId sid = g->sections[j];
+ u32 eidx = (sid && sid < nobjsec) ? obj_to_elf[sid] : 0;
+ u8* slot = body + 4 + j * 4;
+ slot[0] = (u8)(eidx);
+ slot[1] = (u8)(eidx >> 8);
+ slot[2] = (u8)(eidx >> 16);
+ slot[3] = (u8)(eidx >> 24);
+ }
+
+ u32 nlen;
+ const char* gname = sym_to_str(c, g->name, &nlen);
+ if (nlen == 0) {
+ gname = ".group";
+ nlen = 6;
+ }
+
+ ElfSec* es = &secs[nsecs];
+ memset(es, 0, sizeof *es);
+ es->name = gname;
+ es->name_len = nlen;
+ es->sh_type = SHT_GROUP;
+ es->sh_flags = 0;
+ es->sh_addralign = 4;
+ es->sh_entsize = 4;
+ es->sh_info = (g->signature && g->signature < nobjsym + 2)
+ ? sym_to_elf[g->signature]
+ : 0;
+ /* sh_link patched below once idx_symtab is known. */
+ es->raw_bytes = body;
+ es->sh_size = body_size;
+ group_elf_idx[gi] = nsecs;
+ nsecs++;
+ }
+
+ /* ---- pass 3: build .rela.<name> contents ------------------------ */
+
+ /* Allocate one .rela section per obj section that has any relocs. */
+ u32 total_relocs = obj_reloc_total(ob);
+
+ typedef struct RelaPlan {
+ u32 obj_section; /* obj section the rela applies to */
+ u8* bytes; /* arena-allocated rela bytes */
+ u32 size; /* bytes count = nrelocs * 24 */
+ } RelaPlan;
+
+ RelaPlan* rela_plans = arena_zarray(c->scratch, RelaPlan, nobjsec);
+ u32 nrela_plans = 0;
+
+ for (u32 si = 1; si < nobjsec; ++si) {
+ const Section* host = obj_section_get(ob, si);
+ if (!host || host->removed) continue;
+ u32 nr = obj_reloc_count(ob, si);
+ if (!nr) continue;
+ u8* buf = (u8*)arena_alloc(c->scratch, (size_t)ELF64_RELA_SIZE * nr,
+ _Alignof(u64));
+ u32 j = 0;
+ for (u32 i = 0; i < total_relocs; ++i) {
+ const Reloc* r = obj_reloc_at(ob, i);
+ if (r->removed) continue;
+ if (r->section_id != si) continue;
+ u32 etype = reloc_to(r->kind);
+ if (etype == ELF_R_AARCH64_NONE /* == ELF_R_X86_64_NONE == 0 */ &&
+ r->kind != R_NONE) {
+ compiler_panic(c, no_loc(),
+ "emit_elf: unsupported relocation kind %u for arch %u",
+ (u32)r->kind, (u32)c->target.arch);
+ }
+ u32 sym_elf_idx;
+ if (r->sym == OBJ_SYM_NONE) {
+ /* Reloc against a section: use the synthesized
+ * STT_SECTION symbol if the obj reloc carries a
+ * section_id-equivalent; otherwise 0. */
+ sym_elf_idx = 0;
+ } else {
+ sym_elf_idx = sym_to_elf[r->sym];
+ }
+ u8* slot = &buf[j * ELF64_RELA_SIZE];
+ for (int b = 0; b < 8; ++b) slot[b] = (u8)((u64)r->offset >> (b * 8));
+ u64 info = ELF64_R_INFO(sym_elf_idx, etype);
+ for (int b = 0; b < 8; ++b) slot[8 + b] = (u8)(info >> (b * 8));
+ for (int b = 0; b < 8; ++b)
+ slot[16 + b] = (u8)((u64)r->addend >> (b * 8));
+ ++j;
+ }
+ rela_plans[nrela_plans].obj_section = si;
+ rela_plans[nrela_plans].bytes = buf;
+ rela_plans[nrela_plans].size = nr * ELF64_RELA_SIZE;
+ nrela_plans++;
+ }
+
+ /* Append ElfSec entries for each .rela.<name>. Names are ".rela" +
+ * the obj section name; allocate in scratch. */
+ u32* rela_elf_idx = arena_array(c->scratch, u32, nrela_plans + 1);
+ for (u32 ri = 0; ri < nrela_plans; ++ri) {
+ u32 si = rela_plans[ri].obj_section;
+ const Section* s = obj_section_get(ob, si);
+ u32 base_len;
+ const char* base = sym_to_str(c, s->name, &base_len);
+ u32 nlen = 5 + base_len; /* ".rela" + base */
+ char* nm = (char*)arena_alloc(c->scratch, nlen + 1, 1);
+ memcpy(nm, ".rela", 5);
+ memcpy(nm + 5, base, base_len);
+ nm[nlen] = 0;
+
+ ElfSec* es = &secs[nsecs];
+ memset(es, 0, sizeof *es);
+ es->name = nm;
+ es->name_len = nlen;
+ es->sh_type = SHT_RELA;
+ es->sh_flags = SHF_INFO_LINK;
+ es->sh_addralign = 8;
+ es->sh_entsize = ELF64_RELA_SIZE;
+ es->sh_info = obj_to_elf[si]; /* section the relas apply to */
+ /* sh_link filled below once we know symtab's elf index. */
+ es->raw_bytes = rela_plans[ri].bytes;
+ es->sh_size = rela_plans[ri].size;
+ rela_elf_idx[ri] = nsecs;
+ nsecs++;
+ }
+
+ /* Append .symtab. */
+ {
+ ElfSec* es = &secs[nsecs];
+ memset(es, 0, sizeof *es);
+ es->name = ".symtab";
+ es->name_len = 7;
+ es->sh_type = SHT_SYMTAB;
+ es->sh_flags = 0;
+ es->sh_addralign = 8;
+ es->sh_entsize = ELF64_SYM_SIZE;
+ es->raw_bytes = symtab;
+ es->sh_size = (u64)nsyms * ELF64_SYM_SIZE;
+ es->sh_info = nlocals; /* first non-local symbol */
+ idx_symtab = nsecs;
+ nsecs++;
+ }
+
+ /* Patch sh_link on each .rela section now that we have idx_symtab. */
+ for (u32 ri = 0; ri < nrela_plans; ++ri) {
+ secs[rela_elf_idx[ri]].sh_link = idx_symtab;
+ }
+ /* SHT_GROUP also points its sh_link at .symtab (the symtab the
+ * signature symbol's index in sh_info refers to). */
+ for (u32 gi = 1; gi < nobjgrp; ++gi) {
+ if (group_elf_idx && group_elf_idx[gi]) {
+ secs[group_elf_idx[gi]].sh_link = idx_symtab;
+ }
+ }
+
+ /* ---- pass 4: append section names to the same strtab and emit it.
+ *
+ * clang reuses .strtab for both symbol names and section names —
+ * e_shstrndx and .symtab.sh_link both point at it. Match that
+ * convention: continue appending into `strtab` (which already
+ * contains the symbol names), then emit one STRTAB section. */
+
+ /* secs[0] (SHN_UNDEF) carries name "" → offset 0. */
+ secs[0].sh_name = 0;
+ for (u32 i = 1; i < nsecs; ++i) {
+ secs[i].sh_name = strtab_add(&strtab, secs[i].name, secs[i].name_len);
+ }
+
+ /* Append the .strtab section record itself; its own name lands in
+ * the same buffer (so the strtab is self-describing). */
+ {
+ const char* nm = ".strtab";
+ u32 nlen = 7;
+ u32 nameoff = strtab_add(&strtab, nm, nlen);
+ u32 sz = buf_pos(&strtab);
+ u8* flat = (u8*)arena_alloc(c->scratch, sz, 1);
+ buf_flatten(&strtab, flat);
+ buf_fini(&strtab);
+
+ ElfSec* es = &secs[nsecs];
+ memset(es, 0, sizeof *es);
+ es->name = nm;
+ es->name_len = nlen;
+ es->sh_name = nameoff;
+ es->sh_type = SHT_STRTAB;
+ es->sh_addralign = 1;
+ es->raw_bytes = flat;
+ es->sh_size = sz;
+ idx_strtab = nsecs;
+ idx_shstrtab = nsecs; /* same section serves both roles */
+ nsecs++;
+ }
+ secs[idx_symtab].sh_link = idx_strtab;
+
+ /* ---- pass 5: assign file offsets -------------------------------- */
+
+ u64 cur = ELF64_EHDR_SIZE;
+ for (u32 i = 1; i < nsecs; ++i) {
+ ElfSec* es = &secs[i];
+ if (es->is_nobits) {
+ /* sh_offset for NOBITS is conventionally where the next
+ * non-NOBITS section begins; we set it to cur without
+ * advancing. */
+ es->sh_offset = cur;
+ continue;
+ }
+ u64 a = es->sh_addralign ? es->sh_addralign : 1;
+ cur = ALIGN_UP(cur, a);
+ es->sh_offset = cur;
+ cur += es->sh_size;
+ }
+ cur = ALIGN_UP(cur, (u64)8);
+ u64 e_shoff = cur;
+
+ /* ---- pass 6: write Ehdr ----------------------------------------- */
+
+ u8 ident[EI_NIDENT] = {0};
+ ident[EI_MAG0] = ELFMAG0;
+ ident[EI_MAG1] = ELFMAG1;
+ ident[EI_MAG2] = ELFMAG2;
+ ident[EI_MAG3] = ELFMAG3;
+ ident[EI_CLASS] = ELFCLASS64;
+ ident[EI_DATA] = ELFDATA2LSB;
+ ident[EI_VERSION] = EV_CURRENT;
+ /* SysV is the canonical OSABI for relocatable AArch64 .o; clang and
+ * GNU ld both emit it for Linux targets. Linking does not key off
+ * EI_OSABI for plain AArch64 ELF — it's e_machine that matters.
+ *
+ * Exception: GNU extensions (STT_GNU_IFUNC, SHF_GNU_RETAIN, ...)
+ * require EI_OSABI=ELFOSABI_GNU. Clang sets it for any TU using a
+ * GNU-flavored marker; we mirror that so roundtrip is byte-stable. */
+ ident[EI_OSABI] = ELFOSABI_NONE;
+ {
+ ObjSymIter* it = obj_symiter_new(ob);
+ ObjSymEntry e;
+ u32 nsec = obj_section_count(ob), si;
+ while (obj_symiter_next(it, &e)) {
+ if (e.sym->removed) continue;
+ if (e.sym->kind == SK_IFUNC) {
+ ident[EI_OSABI] = ELFOSABI_GNU;
+ break;
+ }
+ }
+ obj_symiter_free(it);
+ if (ident[EI_OSABI] != ELFOSABI_GNU) {
+ for (si = 1; si < nsec; ++si) {
+ const Section* sec = obj_section_get(ob, si);
+ if (sec && !sec->removed && (sec->flags & SF_RETAIN)) {
+ ident[EI_OSABI] = ELFOSABI_GNU;
+ break;
+ }
+ }
+ }
+ }
+ /* e_flags: prefer the value preserved from a prior read (round-trip);
+ * else synthesize a sensible per-arch default. RV64 cfree targets the
+ * Linux psABI's lp64d soft-relax convention (RVC + double-float ABI). */
+ u32 e_flags;
+ if (!obj_get_elf_e_flags(ob, &e_flags)) e_flags = elf->e_flags;
+
+ cfree_writer_seek(w, 0);
+ cfree_writer_write(w, ident, EI_NIDENT);
+ elf_wr_u16(w, ET_REL);
+ elf_wr_u16(w, (u16)e_machine);
+ elf_wr_u32(w, EV_CURRENT);
+ elf_wr_u64(w, 0); /* e_entry */
+ elf_wr_u64(w, 0); /* e_phoff */
+ elf_wr_u64(w, e_shoff); /* e_shoff */
+ elf_wr_u32(w, e_flags); /* e_flags */
+ elf_wr_u16(w, ELF64_EHDR_SIZE); /* e_ehsize */
+ elf_wr_u16(w, 0); /* e_phentsize */
+ elf_wr_u16(w, 0); /* e_phnum */
+ elf_wr_u16(w, ELF64_SHDR_SIZE); /* e_shentsize */
+ elf_wr_u16(w, (u16)nsecs); /* e_shnum */
+ elf_wr_u16(w, (u16)idx_shstrtab); /* e_shstrndx */
+
+ /* ---- pass 7: write each section's bytes ------------------------- */
+
+ for (u32 i = 1; i < nsecs; ++i) {
+ ElfSec* es = &secs[i];
+ if (es->is_nobits || es->sh_size == 0) continue;
+ cfree_writer_seek(w, es->sh_offset);
+ if (es->obj_bytes) {
+ u32 sz = es->obj_bytes->total;
+ u8* tmp = (u8*)h->alloc(h, sz ? sz : 1, 1);
+ if (sz) buf_flatten(es->obj_bytes, tmp);
+ cfree_writer_write(w, tmp, sz);
+ h->free(h, tmp, sz ? sz : 1);
+ } else if (es->raw_bytes) {
+ cfree_writer_write(w, es->raw_bytes, (size_t)es->sh_size);
+ }
+ }
+
+ /* ---- pass 8: write section header table ------------------------- */
+
+ cfree_writer_seek(w, e_shoff);
+ for (u32 i = 0; i < nsecs; ++i) {
+ const ElfSec* es = &secs[i];
+ elf_wr_u32(w, es->sh_name);
+ elf_wr_u32(w, es->sh_type);
+ elf_wr_u64(w, es->sh_flags);
+ elf_wr_u64(w, es->sh_addr);
+ elf_wr_u64(w, es->sh_offset);
+ elf_wr_u64(w, es->sh_size);
+ elf_wr_u32(w, es->sh_link);
+ elf_wr_u32(w, es->sh_info);
+ elf_wr_u64(w, es->sh_addralign);
+ elf_wr_u64(w, es->sh_entsize);
+ }
+}
diff --git a/src/obj/elf/link.c b/src/obj/elf/link.c
@@ -0,0 +1,1421 @@
+/* link_emit_elf: write a static ET_EXEC ELF64 image to the
+ * caller-provided Writer.
+ *
+ * 64-bit little-endian only. The per-arch ELF reloc-type tables in
+ * obj/elf_reloc_<arch>.c handle RelocKind <-> ELF translation; this
+ * file gets e_machine from the link arch descriptor.
+ *
+ * File layout (in write order):
+ *
+ * [headers PT_LOAD, PF_R, mapped at IMAGE_BASE]
+ * Ehdr64
+ * Phdr64[nphdr] -- one per loaded segment + headers +
+ * PT_NOTE .note.gnu.build-id -- 12 + 16 = 28 bytes
+ * (deterministic 16-byte id) pad to PAGE
+ *
+ * [PT_LOAD per kept image segment, in img->segments order]
+ * segment bytes (padded to its file_offset)
+ *
+ * [non-allocatable sections, file-only]
+ * .symtab -- ELF64_SYM_SIZE * nsyms
+ * .strtab -- NUL-led blob
+ * .shstrtab -- NUL-led blob
+ *
+ * [section header table at e_shoff]
+ * Shdr64[nshdr]
+ *
+ * Section header schema (for nm / objdump -t / gdb consumption):
+ *
+ * 0 SHN_UNDEF (zero entry)
+ * N one shdr per loaded sub-region: .text/.rodata/.data/.bss as
+ * the corresponding RX/R/RW segments materialize (.bss split
+ * out as the trailing memsz>filesz tail of the RW segment).
+ * 1 .note.gnu.build-id (allocatable, in headers PT_LOAD)
+ * 1 .symtab (sh_link -> .strtab; sh_info = first non-local idx)
+ * 1 .strtab
+ * 1 .shstrtab (Ehdr64.e_shstrndx)
+ *
+ * Build-id is computed deterministically over the post-relocation
+ * segment bytes (FNV-1a 64 over each segment, mixed into a 128-bit
+ * accumulator). The 16-byte digest is written into the note before the
+ * note is emitted to the Writer.
+ *
+ * The image image-relative addresses on entry are bumped by
+ * align_up(headers_size, PAGE) before relocs are applied, exactly as
+ * before — segment bytes / symbol vaddrs land at their final IMAGE_BASE
+ * absolute addresses by the time relocs run. */
+
+#include "link/link.h"
+
+#include <string.h>
+
+#include "core/heap.h"
+#include "core/pool.h"
+#include "core/slice.h"
+#include "core/util.h"
+#include "core/vec.h"
+#include "link/link_arch.h"
+#include "link/link_internal.h"
+#include "obj/elf/elf.h"
+#include "obj/format.h"
+
+static SrcLoc no_loc(void) {
+ SrcLoc l = {0, 0, 0};
+ return l;
+}
+
+/* ---- ELF64 wire structs (subset) ---- */
+
+#define EI_NIDENT 16
+
+typedef struct __attribute__((packed)) Ehdr64 {
+ u8 e_ident[EI_NIDENT];
+ u16 e_type;
+ u16 e_machine;
+ u32 e_version;
+ u64 e_entry;
+ u64 e_phoff;
+ u64 e_shoff;
+ u32 e_flags;
+ u16 e_ehsize;
+ u16 e_phentsize;
+ u16 e_phnum;
+ u16 e_shentsize;
+ u16 e_shnum;
+ u16 e_shstrndx;
+} Ehdr64;
+
+typedef struct __attribute__((packed)) Phdr64 {
+ u32 p_type;
+ u32 p_flags;
+ u64 p_offset;
+ u64 p_vaddr;
+ u64 p_paddr;
+ u64 p_filesz;
+ u64 p_memsz;
+ u64 p_align;
+} Phdr64;
+
+typedef struct __attribute__((packed)) Shdr64 {
+ u32 sh_name;
+ u32 sh_type;
+ u64 sh_flags;
+ u64 sh_addr;
+ u64 sh_offset;
+ u64 sh_size;
+ u32 sh_link;
+ u32 sh_info;
+ u64 sh_addralign;
+ u64 sh_entsize;
+} Shdr64;
+
+#define PT_NOTE 4
+#define PT_TLS 7
+
+/* Static ET_EXEC base. ET_DYN (PIE) uses 0 — the loader picks the
+ * runtime base. The active value lives in `img_base` below; the macro
+ * stays for the static path's hard-coded vaddrs. */
+#define IMAGE_BASE_STATIC 0x400000ULL
+
+#define BUILD_ID_DESC_LEN 16u
+#define NOTE_NAME_GNU "GNU"
+#define NOTE_NAME_GNU_LEN 4u /* "GNU\0" */
+#define NOTE_BUILD_ID_TYPE 3u
+#define BUILD_ID_NOTE_BYTES (12u + NOTE_NAME_GNU_LEN + BUILD_ID_DESC_LEN)
+
+/* ---- byte writer helpers ---- */
+
+static void write_bytes(Writer* w, const void* data, size_t n) {
+ w->write(w, data, n);
+}
+
+static void write_zeroes(Writer* w, size_t n) {
+ static const u8 zeroes[256] = {0};
+ while (n) {
+ size_t step = n > sizeof(zeroes) ? sizeof(zeroes) : n;
+ w->write(w, zeroes, step);
+ n -= step;
+ }
+}
+
+static u32 perms_to_pflags(u32 secflags) {
+ u32 f = PF_R;
+ if (secflags & SF_EXEC) f |= PF_X;
+ if (secflags & SF_WRITE) f |= PF_W;
+ return f;
+}
+
+/* Scripted-layout post-pass: vaddrs are already final (the script
+ * pinned them via `. = …`), so only file offsets need to bump to
+ * leave room for ehdr+phdrs. Mirror of shift_image_addresses but
+ * touches only the file dimension. */
+static void shift_image_file_offsets(LinkImage* img, u64 delta) {
+ u32 i;
+ for (i = 0; i < img->nsegments; ++i) img->segments[i].file_offset += delta;
+ for (i = 0; i < img->nsections; ++i) img->sections[i].file_offset += delta;
+ for (i = 0; i < LinkRelocs_count(&img->relocs); ++i)
+ LinkRelocs_at(&img->relocs, i)->write_file_offset += delta;
+}
+
+static void shift_image_addresses(LinkImage* img, u64 delta) {
+ u32 i;
+ for (i = 0; i < img->nsegments; ++i) {
+ img->segments[i].file_offset += delta;
+ img->segments[i].vaddr += delta;
+ }
+ for (i = 0; i < img->nsections; ++i) {
+ img->sections[i].file_offset += delta;
+ img->sections[i].vaddr += delta;
+ }
+ for (i = 0; i < LinkRelocs_count(&img->relocs); ++i) {
+ LinkRelocs_at(&img->relocs, i)->write_file_offset += delta;
+ LinkRelocs_at(&img->relocs, i)->write_vaddr += delta;
+ }
+ for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
+ LinkSymbol* s = LinkSyms_at(&img->syms, i);
+ if (s->kind == SK_ABS) continue;
+ if (!s->defined) continue;
+ s->vaddr += delta;
+ }
+ /* tls_vaddr lives in the same image-relative coordinate system as
+ * the segments it tracks, so it bumps with them. */
+ if (img->tls_memsz) img->tls_vaddr += delta;
+ /* Dyn-link state mirrors a few segment / section vaddrs and pre-
+ * populated DynRela.r_offset values from layout_dyn. Bump them so
+ * the post-shift .rela.plt / .dynamic emit and apply_all_relocs see
+ * the right addresses (sym_plt_vaddr is read to redirect CALL26
+ * against imports). */
+ if (img->dyn) {
+ LinkDynState* dyn = img->dyn;
+ if (dyn->plt_vaddr) dyn->plt_vaddr += delta;
+ if (dyn->got_plt_vaddr) dyn->got_plt_vaddr += delta;
+ if (dyn->dynamic_vaddr) dyn->dynamic_vaddr += delta;
+ if (dyn->sym_plt_vaddr) {
+ u32 j;
+ for (j = 0; j < dyn->sym_dynidx_size; ++j)
+ if (dyn->sym_plt_vaddr[j]) dyn->sym_plt_vaddr[j] += delta;
+ }
+ if (dyn->rela_plt) {
+ u32 j;
+ for (j = 0; j < dyn->nrela_plt; ++j) dyn->rela_plt[j].r_offset += delta;
+ }
+ /* rela_dyn is populated by apply_all_relocs (which runs after this
+ * shift), so its records are already in post-shift coordinates. */
+ }
+}
+
+/* AArch64 ELF ABI: the per-thread TLS block starts at TP + 16 bytes
+ * (the TCB sits ahead of the TLS image). RISC-V psABI normally points
+ * tp at the start of the TLS image; the cfree harness's start.c
+ * places a 16-byte TCB ahead of .tdata and biases tp accordingly, so
+ * the TPREL offset for both arches is (target - tls_vaddr) + 16. */
+#define TLS_TCB_SIZE 16ull
+
+static int reloc_is_tlsle(RelocKind k) {
+ return k == R_AARCH64_TLSLE_ADD_TPREL_HI12 ||
+ k == R_AARCH64_TLSLE_ADD_TPREL_LO12_NC || k == R_RV_TPREL_HI20 ||
+ k == R_RV_TPREL_LO12_I || k == R_RV_TPREL_LO12_S;
+}
+
+/* x86_64 SysV ABI: TLS variant II — the per-thread TLS image sits at
+ * *negative* offsets from %fs (which points at the TCB). start.c
+ * lays out [tdata | tbss | TCB] and arch_prctl(ARCH_SET_FS, &TCB), so
+ * a symbol at offset X within the TLS image is at fs-relative offset
+ * (X - tls_memsz). The two ELF reloc kinds R_X86_64_TPOFF32/_TPOFF64
+ * encode that signed offset directly at the reloc site (no TCB bias —
+ * variant II's TCB sits *after* the image, so TPOFF is negative). */
+static int reloc_is_x64_tlsle(RelocKind k) {
+ return k == R_X64_TPOFF32 || k == R_X64_TPOFF64;
+}
+
+static int reloc_is_abs(RelocKind k) { return k == R_ABS32 || k == R_ABS64; }
+
+/* Function-call relocs that may route through the PLT when the target
+ * is imported. aarch64 CALL26/JUMP26, x86_64 PLT32, and risc-v CALL_PLT
+ * (which cfree maps to R_PLT32) all carry the "call this address; if
+ * it's not resolvable here use the PLT trampoline" contract; the apply
+ * pass overwrites S with the PLT entry vaddr in that case. */
+static int reloc_is_branch26(RelocKind k) {
+ return k == R_AARCH64_CALL26 || k == R_AARCH64_JUMP26 || k == R_X64_PLT32 ||
+ k == R_PLT32 || k == R_RV_CALL;
+}
+
+static void emit_dyn_record(LinkImage* img, u64 site_vaddr, u32 reloc_type,
+ u32 dynidx, i64 addend) {
+ LinkDynState* dyn = img->dyn;
+ if (!dyn || !dyn->rela_dyn) return;
+ if (dyn->nrela_dyn >= dyn->cap_rela_dyn) {
+ compiler_panic(img->c, no_loc(),
+ "link: too many .rela.dyn records (%u >= %u); raise "
+ "cap_rela_dyn in layout_dyn",
+ dyn->nrela_dyn, dyn->cap_rela_dyn);
+ }
+ DynRela* r = &dyn->rela_dyn[dyn->nrela_dyn++];
+ r->r_offset = site_vaddr;
+ r->r_info = ELF64_R_INFO((u64)dynidx, reloc_type);
+ r->r_addend = addend;
+}
+
+static const ObjElfArchOps* elf_arch_or_panic(Compiler* c, const char* where) {
+ const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_ELF);
+ const ObjElfArchOps* arch =
+ fmt && fmt->elf_arch ? fmt->elf_arch(c->target.arch) : NULL;
+ if (!arch)
+ compiler_panic(c, no_loc(), "%.*s: no ELF arch descriptor",
+ SLICE_ARG(slice_from_cstr(where)));
+ return arch;
+}
+
+static void emit_relative_record(LinkImage* img, u64 site_vaddr, u64 addend) {
+ const ObjElfArchOps* arch = elf_arch_or_panic(img->c, "link");
+ emit_dyn_record(img, site_vaddr, arch->r_relative, 0, (i64)addend);
+}
+
+static void emit_globdat_record(LinkImage* img, u64 site_vaddr, u32 dynidx,
+ i64 addend) {
+ const ObjElfArchOps* arch = elf_arch_or_panic(img->c, "link");
+ emit_dyn_record(img, site_vaddr, arch->r_glob_dat, dynidx, addend);
+}
+
+/* RISC-V PCREL_LO12_* references the address of an AUIPC carrying the
+ * paired PCREL_HI20. Given the AUIPC's site vaddr (post-shift), find
+ * its PCREL_HI20 reloc and compute the displacement that AUIPC
+ * encoded — the LO12 then takes the low 12 bits of the same disp.
+ *
+ * Linear scan over img->relocs is fine in practice: kernel images and
+ * cg cases produce at most a few hundred relocs total. */
+static i64 rv_pcrel_lo12_disp(LinkImage* img, u64 auipc_vaddr, u64 img_base) {
+ u32 i;
+ for (i = 0; i < LinkRelocs_count(&img->relocs); ++i) {
+ const LinkRelocApply* hi = LinkRelocs_at(&img->relocs, i);
+ const LinkSymbol* hi_tgt;
+ u64 hi_S, hi_P;
+ if (hi->kind != R_RV_PCREL_HI20 && hi->kind != R_RV_GOT_HI20) continue;
+ if (hi->write_vaddr + img_base != auipc_vaddr) continue;
+ hi_tgt = LinkSyms_at(&img->syms, hi->target - 1);
+ hi_S = (hi_tgt->kind == SK_ABS) ? hi_tgt->vaddr : hi_tgt->vaddr + img_base;
+ hi_P = hi->write_vaddr + img_base;
+ return (i64)hi_S + hi->addend - (i64)hi_P;
+ }
+ compiler_panic(img->c, no_loc(),
+ "link: PCREL_LO12 at 0x%llx has no paired PCREL_HI20",
+ (unsigned long long)auipc_vaddr);
+ return 0;
+}
+
+static void apply_all_relocs(LinkImage* img, u64 img_base) {
+ u32 i;
+ int pie = img->pie;
+ for (i = 0; i < LinkRelocs_count(&img->relocs); ++i) {
+ LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
+ const LinkSymbol* tgt = LinkSyms_at(&img->syms, r->target - 1);
+ const LinkSection* sec = &img->sections[r->link_section_id - 1];
+ const LinkSegment* seg = &img->segments[sec->segment_id - 1];
+ u64 S, P;
+ u8* P_bytes;
+ if (reloc_is_tlsle(r->kind)) {
+ /* S is the target's TP-relative offset: distance from the
+ * TLS image start plus the 16-byte TCB. Both vaddrs are
+ * in the same (post-shift, image-relative) coordinate
+ * system, so img_base cancels out. */
+ S = (tgt->vaddr - img->tls_vaddr) + TLS_TCB_SIZE;
+ } else if (reloc_is_x64_tlsle(r->kind)) {
+ /* x86_64 variant II: TP points just past the TLS image, so a
+ * symbol at offset X within the image is at TP-relative offset
+ * (X - tls_memsz). Cast through i64/u64 so the reloc apply
+ * writes the full 32- or 64-bit signed value. */
+ i64 off = (i64)(tgt->vaddr - img->tls_vaddr) - (i64)img->tls_memsz;
+ S = (u64)off;
+ } else if (r->kind == R_RV_PCREL_LO12_I || r->kind == R_RV_PCREL_LO12_S) {
+ /* PCREL_LO12: rewrite S so that link_reloc_apply's existing
+ * LO12_I/LO12_S encoder produces the right low 12 bits of the
+ * paired AUIPC's PC-relative displacement. The reloc's own
+ * addend is unused; signed lo12 = disp & 0xfff. */
+ P = r->write_vaddr + img_base;
+ P_bytes = img->segment_bytes[seg->id - 1] +
+ (size_t)(r->write_file_offset - seg->file_offset);
+ {
+ i64 disp = rv_pcrel_lo12_disp(img, tgt->vaddr + img_base, img_base);
+ RelocKind alias =
+ (r->kind == R_RV_PCREL_LO12_I) ? R_RV_LO12_I : R_RV_LO12_S;
+ link_reloc_apply(img->c, alias, P_bytes, (u64)disp, 0, P);
+ }
+ continue;
+ } else {
+ S = tgt->vaddr + img_base;
+ if (tgt->kind == SK_ABS) S = tgt->vaddr;
+ }
+ P = r->write_vaddr + img_base;
+ P_bytes = img->segment_bytes[seg->id - 1] +
+ (size_t)(r->write_file_offset - seg->file_offset);
+
+ /* Imported target: redirect / rewrite per reloc kind (Phase 5).
+ *
+ * - CALL26 / JUMP26: target the import's PLT entry. The PLT stub
+ * reads .got.plt[3+i], which the loader pre-fills via JUMP_SLOT
+ * (.rela.plt). S becomes the PLT-entry vaddr; the existing
+ * apply path computes the disp from there.
+ * - R_ABS{32,64}: leave the patch site at zero and emit a
+ * GLOB_DAT record so the loader writes the resolved address
+ * into the site at load time. This covers both
+ * layout_got-emitted .got slot fills (target = import) and any
+ * direct absolute reference in user data (e.g. a function
+ * pointer initializer).
+ * - GOT-page / LO12-NC against an import: emit_reloc_records has
+ * already redirected the target from the import to the
+ * synthetic .got slot symbol, so the apply path here sees the
+ * slot, not the import — nothing special needed; the slot's
+ * own R_ABS64 fill against the (vaddr=0) import will trip the
+ * abs-import branch above and emit GLOB_DAT.
+ *
+ * Anything else against an imported symbol (e.g. PREL19 / ADR
+ * etc.) is rare in real binaries and would need its own
+ * dynamic-reloc kind; panic loudly so a future test that needs
+ * it announces itself. */
+ if (tgt->imported) {
+ /* `tgt` may be a per-input shadow LinkSymbol — resolve_undefs
+ * stamps `imported = 1` on every undef matched by name, but
+ * collect_imports only stashes plt_vaddr / dynidx on the
+ * canonical entry registered in img->globals. Resolve to the
+ * canonical id before indexing the dyn-state arrays. */
+ LinkSymId canon_id = tgt->id;
+ if (tgt->name != 0) {
+ LinkSymId hit = symhash_get(&img->globals, tgt->name);
+ if (hit != LINK_SYM_NONE) canon_id = hit;
+ }
+ u32 dynidx = (img->dyn && canon_id < img->dyn->sym_dynidx_size)
+ ? img->dyn->sym_dynidx[canon_id]
+ : 0u;
+ if (reloc_is_branch26(r->kind)) {
+ u64 plt_v = (img->dyn && canon_id < img->dyn->sym_dynidx_size)
+ ? img->dyn->sym_plt_vaddr[canon_id]
+ : 0u;
+ if (plt_v == 0)
+ compiler_panic(img->c, no_loc(),
+ "link: imported sym has no PLT entry (CALL26)");
+ S = plt_v + img_base;
+ link_reloc_apply(img->c, r->kind, P_bytes, S, r->addend, P);
+ continue;
+ }
+ if (reloc_is_abs(r->kind)) {
+ if (dynidx == 0)
+ compiler_panic(img->c, no_loc(),
+ "link: imported sym has no .dynsym entry");
+ emit_globdat_record(img, r->write_vaddr, dynidx, r->addend);
+ /* Site bytes are irrelevant: the loader's GLOB_DAT writes
+ * (sym_value + r_addend) into r_offset before user code runs,
+ * overwriting whatever's there. Leaving the existing zero
+ * fill saves a write. */
+ continue;
+ }
+ {
+ Slice nm_s =
+ tgt->name ? pool_slice(img->c->global, tgt->name) : SLICE_NULL;
+ const char* nm = nm_s.s ? nm_s.s : "";
+ size_t nl = nm_s.len;
+ compiler_panic(
+ img->c, no_loc(),
+ "link: unhandled reloc kind %u against imported symbol '%.*s'",
+ (unsigned)r->kind, (int)nl, nm);
+ }
+ }
+
+ /* PIE: an absolute reloc against a defined non-imported symbol
+ * stays image-relative in the file (the loader adds load-base via
+ * a synthesized R_AARCH64_RELATIVE). img_base is 0 for PIE so
+ * S above is already image-relative — the apply writes that into
+ * the site, and the RELATIVE record tells the loader to add
+ * load_base on top. */
+ if (pie && reloc_is_abs(r->kind) && tgt->defined && tgt->kind != SK_ABS) {
+ emit_relative_record(img, r->write_vaddr, tgt->vaddr);
+ }
+ link_reloc_apply(img->c, r->kind, P_bytes, S, r->addend, P);
+ }
+}
+
+/* The build-id payload is a format-agnostic image identity hash —
+ * see link_image_id_compute in link_image_id.c. Mach-O wraps the
+ * same bytes in LC_UUID; ELF wraps them in a .note.gnu.build-id. */
+
+/* ---- string-table builder ---- */
+
+typedef struct StrBuilder {
+ Heap* heap;
+ u8* data;
+ u32 len;
+ u32 cap;
+} StrBuilder;
+
+static void strb_init(StrBuilder* s, Heap* h, u32 reserve) {
+ s->heap = h;
+ s->cap = reserve > 16u ? reserve : 16u;
+ s->data = (u8*)h->alloc(h, s->cap, 1);
+ if (!s->data) s->cap = 0;
+ s->len = 0;
+ if (s->cap) {
+ s->data[0] = 0;
+ s->len = 1;
+ } /* leading NUL */
+}
+
+static void strb_fini(StrBuilder* s) {
+ if (s->data) s->heap->free(s->heap, s->data, s->cap);
+ s->data = NULL;
+ s->cap = s->len = 0;
+}
+
+static void strb_grow(StrBuilder* s, u32 need) {
+ (void)VEC_GROW(s->heap, s->data, s->cap, need);
+}
+
+static u32 strb_add(StrBuilder* s, const char* str, u32 slen) {
+ u32 off;
+ if (slen == 0) return 0;
+ /* Linear dedup: scan existing data for a matching NUL-terminated
+ * substring. Strtabs are small enough to make this acceptable. */
+ if (s->len > slen) {
+ u32 i;
+ for (i = 0; i + slen < s->len; ++i) {
+ if (s->data[i + slen] == 0 && memcmp(s->data + i, str, slen) == 0)
+ return i;
+ }
+ }
+ off = s->len;
+ strb_grow(s, s->len + slen + 1u);
+ memcpy(s->data + s->len, str, slen);
+ s->data[s->len + slen] = 0;
+ s->len += slen + 1u;
+ return off;
+}
+
+static u32 strb_add_cstr(StrBuilder* s, const char* str) {
+ return strb_add(s, str, (u32)slice_from_cstr(str).len);
+}
+
+/* ---- symtab builder ---- */
+
+typedef struct SymRec {
+ u32 st_name;
+ u8 st_info;
+ u8 st_other;
+ u16 st_shndx;
+ u64 st_value;
+ u64 st_size;
+} SymRec;
+
+static u8 sym_kind_to_st_type(u8 kind) {
+ switch (kind) {
+ case SK_FUNC:
+ return STT_FUNC;
+ case SK_OBJ:
+ return STT_OBJECT;
+ case SK_SECTION:
+ return STT_SECTION;
+ case SK_FILE:
+ return STT_FILE;
+ case SK_TLS:
+ return STT_TLS;
+ case SK_IFUNC:
+ return STT_GNU_IFUNC;
+ case SK_NOTYPE:
+ case SK_ABS:
+ case SK_UNDEF:
+ default:
+ return STT_NOTYPE;
+ }
+}
+
+static u8 sym_bind_to_st_bind(u8 bind) {
+ switch (bind) {
+ case SB_GLOBAL:
+ return STB_GLOBAL;
+ case SB_WEAK:
+ return STB_WEAK;
+ case SB_LOCAL:
+ default:
+ return STB_LOCAL;
+ }
+}
+
+/* Produces one Elf64_Sym record on the wire from a SymRec. */
+static void write_sym_rec(Writer* w, const SymRec* r) {
+ u8 buf[ELF64_SYM_SIZE];
+ buf[0] = (u8)(r->st_name);
+ buf[1] = (u8)(r->st_name >> 8);
+ buf[2] = (u8)(r->st_name >> 16);
+ buf[3] = (u8)(r->st_name >> 24);
+ buf[4] = r->st_info;
+ buf[5] = r->st_other;
+ buf[6] = (u8)(r->st_shndx);
+ buf[7] = (u8)(r->st_shndx >> 8);
+ {
+ u32 i;
+ for (i = 0; i < 8; ++i) buf[8 + i] = (u8)(r->st_value >> (i * 8));
+ for (i = 0; i < 8; ++i) buf[16 + i] = (u8)(r->st_size >> (i * 8));
+ }
+ write_bytes(w, buf, sizeof buf);
+}
+
+/* ---- section header layout ---- *
+ *
+ * Per-segment cuts: each kept image segment contributes 1 .text/.rodata
+ * shdr for its file portion, plus a separate .bss shdr for the trailing
+ * NOBITS portion of an RW segment (memsz > filesz). The headers PT_LOAD
+ * contributes a single .note.gnu.build-id shdr. Trailing non-alloc
+ * shdrs: .symtab .strtab .shstrtab (always 3). */
+
+typedef struct OutShdr {
+ u32 shdr_idx; /* 1-based; assigned during planning */
+ LinkSegmentId segment_id;
+ Sym name;
+ u16 sem; /* SecSem from source LinkSection */
+ u32 flags; /* SF_* from source LinkSection */
+ u32 align;
+ u64 vaddr;
+ u64 file_offset;
+ u64 size;
+ int is_nobits;
+} OutShdr;
+
+static u16 sym_shndx_for(const LinkSymbol* s, const OutShdr* outshdrs,
+ u32 noutshdr) {
+ if (!s->defined) return SHN_UNDEF;
+ if (s->kind == SK_ABS) return SHN_ABS;
+ if (s->kind == SK_FILE) return SHN_ABS;
+ if (s->kind == SK_COMMON) return SHN_COMMON;
+ /* Find an output shdr whose [vaddr, vaddr+size) covers s->vaddr.
+ * Boundary symbols match at the upper edge. */
+ {
+ u32 i;
+ for (i = 0; i < noutshdr; ++i) {
+ u64 lo = outshdrs[i].vaddr;
+ u64 hi = lo + outshdrs[i].size;
+ if (s->vaddr >= lo && s->vaddr <= hi) return (u16)outshdrs[i].shdr_idx;
+ }
+ }
+ return SHN_ABS;
+}
+
+static u32 sec_sem_to_sht(u16 sem) {
+ switch (sem) {
+ case SSEM_PROGBITS:
+ return SHT_PROGBITS;
+ case SSEM_NOBITS:
+ return SHT_NOBITS;
+ case SSEM_NOTE:
+ return SHT_NOTE;
+ case SSEM_INIT_ARRAY:
+ return SHT_INIT_ARRAY;
+ case SSEM_FINI_ARRAY:
+ return SHT_FINI_ARRAY;
+ case SSEM_PREINIT_ARRAY:
+ return SHT_PREINIT_ARRAY;
+ default:
+ return SHT_PROGBITS;
+ }
+}
+
+static u64 sec_flags_to_shf(u32 flags) {
+ u64 r = 0;
+ if (flags & SF_ALLOC) r |= SHF_ALLOC;
+ if (flags & SF_EXEC) r |= SHF_EXECINSTR;
+ if (flags & SF_WRITE) r |= SHF_WRITE;
+ if (flags & SF_TLS) r |= SHF_TLS;
+ if (flags & SF_MERGE) r |= SHF_MERGE;
+ if (flags & SF_STRINGS) r |= SHF_STRINGS;
+ if (flags & SF_LINK_ORDER) r |= SHF_LINK_ORDER;
+ if (flags & SF_RETAIN) r |= SHF_GNU_RETAIN;
+ return r;
+}
+
+void link_emit_elf(LinkImage* img, Writer* w) {
+ Heap* heap = img->heap;
+ Compiler* c = img->c;
+ const ObjElfArchOps* arch = elf_arch_or_panic(c, "link_emit_elf");
+ u32 e_machine = arch->e_machine;
+ if (img->entry_sym == LINK_SYM_NONE)
+ compiler_panic(c, no_loc(), "link_emit_elf: no resolved entry symbol");
+ /* IFUNC trampolines: layout_iplt builds the .iplt stubs + .igot.plt
+ * slots and (when emit_static_exe was set) synthesizes a
+ * .init_array entry that calls __cfree_ifunc_init at startup. The
+ * rt member walks .iplt.pairs and fills each slot before user code
+ * runs. The ELF writer doesn't have to do anything special here. */
+
+ /* PIE / ET_DYN: img_base is 0 (the loader picks the runtime base;
+ * absolute relocs against internal symbols are emitted as
+ * R_AARCH64_RELATIVE in .rela.dyn). Otherwise classic ET_EXEC at
+ * IMAGE_BASE_STATIC.
+ *
+ * Scripted: the linker script pinned absolute vaddrs (e.g.
+ * `. = 0x40080000`); img_base stays 0 and the headers PT_LOAD /
+ * build-id note are dropped — the script's image is consumed by a
+ * raw loader (qemu -kernel, a bootloader) that doesn't need a
+ * self-describing memory image. */
+ int pie = img->pie;
+ int scripted = img->scripted;
+ u64 img_base = (pie || scripted) ? 0ULL : IMAGE_BASE_STATIC;
+
+ /* ---- plan number of program headers ----
+ *
+ * 1 headers PT_LOAD + nsegments PT_LOAD + 1 PT_NOTE (build-id)
+ * + 1 PT_TLS when this image carries any TLS sections.
+ * + 4 dyn phdrs (PT_PHDR / PT_INTERP / PT_DYNAMIC / PT_GNU_STACK) on PIE.
+ *
+ * Scripted images skip the headers PT_LOAD and PT_NOTE: phdrs are
+ * just the per-segment PT_LOADs. */
+ u32 has_tls = img->tls_memsz ? 1u : 0u;
+ u32 nphdr_extra_dyn = pie ? 4u : 0u;
+ u32 nphdr_headers = scripted ? 0u : 1u;
+ u32 nphdr_buildid = scripted ? 0u : 1u;
+ u32 nphdr_total = nphdr_headers + img->nsegments + nphdr_buildid + has_tls +
+ nphdr_extra_dyn;
+ u64 build_id_note_bytes = scripted ? 0ULL : BUILD_ID_NOTE_BYTES;
+ u64 headers_size =
+ sizeof(Ehdr64) + (u64)nphdr_total * sizeof(Phdr64) + build_id_note_bytes;
+ u64 headers_load = ALIGN_UP(headers_size, (u64)PAGE_SIZE);
+
+ /* The build-id note lives inside the headers PT_LOAD at this offset. */
+ u64 build_id_off = sizeof(Ehdr64) + (u64)nphdr_total * sizeof(Phdr64);
+ u64 build_id_addr = img_base + build_id_off;
+
+ /* ---- shift image addresses, apply relocations ----
+ *
+ * Must happen before segshdrs/symtab construction so they observe
+ * post-shift vaddrs (the values that will land in the file). */
+ if (scripted)
+ shift_image_file_offsets(img, headers_load);
+ else
+ shift_image_addresses(img, headers_load);
+ apply_all_relocs(img, img_base);
+
+ /* ---- write .dynamic body + re-serialize .rela.dyn (PIE only) ----
+ *
+ * Both depend on post-shift vaddrs. .dynamic embeds image-relative
+ * pointers to .dynsym/.dynstr/.gnu.hash/.rela.dyn/.rela.plt/.got.plt
+ * (the loader adds load_base at runtime). .rela.dyn picked up
+ * RELATIVE records during apply_all_relocs; rewrite the section
+ * bytes to include them. */
+ if (pie && img->dyn) {
+ LinkDynState* dyn = img->dyn;
+ const LinkSection* sec_dynamic = &img->sections[dyn->sec_dynamic - 1];
+ const LinkSection* sec_dynsym = &img->sections[dyn->sec_dynsym - 1];
+ const LinkSection* sec_dynstr = &img->sections[dyn->sec_dynstr - 1];
+ const LinkSection* sec_gnuhash = &img->sections[dyn->sec_gnu_hash - 1];
+ const LinkSection* sec_reladyn = &img->sections[dyn->sec_rela_dyn - 1];
+ const LinkSection* sec_relaplt = (dyn->sec_rela_plt != LINK_SEC_NONE)
+ ? &img->sections[dyn->sec_rela_plt - 1]
+ : NULL;
+ const LinkSection* sec_gotplt = (dyn->sec_got_plt != LINK_SEC_NONE)
+ ? &img->sections[dyn->sec_got_plt - 1]
+ : NULL;
+ const LinkSegment* dseg = &img->segments[sec_dynamic->segment_id - 1];
+ u8* dyn_bytes_at = img->segment_bytes[dseg->id - 1] +
+ (size_t)(sec_dynamic->file_offset - dseg->file_offset);
+
+ /* Build DT_* entries in order. Layout matches count_dynamic_entries. */
+ u32 written = 0;
+ u8* p = dyn_bytes_at;
+#define DT_PUT(TAG, VAL) \
+ do { \
+ wr_u64_le(p, (u64)(TAG)); \
+ wr_u64_le(p + 8, (u64)(VAL)); \
+ p += 16; \
+ written++; \
+ } while (0)
+
+ /* DT_NEEDED entries — d_un.d_val is the offset of the soname
+ * within .dynstr. The dynstr was built in layout_dyn with
+ * dedup; look each soname up by name to compute its offset. */
+ {
+ u32 ni;
+ for (ni = 0; ni < dyn->nneeded; ++ni) {
+ Sym soname = dyn->needed[ni];
+ Slice nm_s = pool_slice(c->global, soname);
+ const char* nm = nm_s.s;
+ size_t namelen = nm_s.len;
+ /* Linear search dynstr for this name. */
+ u32 off = 0;
+ if (nm && namelen) {
+ u32 si;
+ for (si = 0; si + namelen < dyn->dynstr_len; ++si) {
+ if (dyn->dynstr[si + namelen] == 0 &&
+ memcmp(dyn->dynstr + si, nm, namelen) == 0) {
+ off = si;
+ break;
+ }
+ }
+ /* Should always be present — collect_needed populated dynstr
+ * via build_dynsym? Actually build_dynsym only added import
+ * names. We need to also add NEEDED sonames. */
+ if (off == 0) {
+ /* Fallback: append to dynstr. Phase 4 layout_dyn pre-sized
+ * .dynstr exactly to its current content; appending here
+ * would overflow the section. Instead, panic with a clear
+ * message — the soname was supposed to be added during
+ * layout. */
+ compiler_panic(c, no_loc(),
+ "link_emit_elf: DT_NEEDED soname missing from "
+ ".dynstr");
+ }
+ }
+ DT_PUT(DT_NEEDED, off);
+ }
+ }
+
+ DT_PUT(DT_STRTAB, img_base + sec_dynstr->vaddr);
+ DT_PUT(DT_STRSZ, sec_dynstr->size);
+ DT_PUT(DT_SYMTAB, img_base + sec_dynsym->vaddr);
+ DT_PUT(DT_SYMENT, 24);
+ DT_PUT(DT_GNU_HASH, img_base + sec_gnuhash->vaddr);
+ /* DT_PLT* / DT_JMPREL only make sense when there's a PLT. Emitting
+ * them with size=0 / vaddr=0 (or pointing past the end of any
+ * PT_LOAD) trips llvm-readelf's "address not in any segment" check
+ * and confuses some loaders' DT walk. */
+ if (dyn->nrela_plt) {
+ DT_PUT(DT_PLTGOT, sec_gotplt ? (img_base + sec_gotplt->vaddr) : 0);
+ DT_PUT(DT_PLTRELSZ, sec_relaplt ? sec_relaplt->size : 0);
+ DT_PUT(DT_PLTREL, DT_RELA);
+ DT_PUT(DT_JMPREL, sec_relaplt ? (img_base + sec_relaplt->vaddr) : 0);
+ }
+ if (dyn->cap_rela_dyn) {
+ DT_PUT(DT_RELA, img_base + sec_reladyn->vaddr);
+ DT_PUT(DT_RELASZ, sec_reladyn->size);
+ DT_PUT(DT_RELAENT, 24);
+ }
+ DT_PUT(DT_FLAGS_1, DF_1_NOW);
+ DT_PUT(DT_NULL, 0);
+#undef DT_PUT
+
+ /* Pad any pre-allocated tail with DT_NULL. */
+ while (written < dyn->ndyn_entries) {
+ wr_u64_le(p, 0);
+ wr_u64_le(p + 8, 0);
+ p += 16;
+ written++;
+ }
+
+ /* Re-serialize .rela.dyn body. GLOB_DAT records (imports against
+ * .got slots) and RELATIVE records (PIE internal abs64 fixups)
+ * are both populated during apply_all_relocs; .rela.dyn was empty
+ * coming out of layout_dyn. Trailing capacity stays zero —
+ * readers stop at the first R_AARCH64_NONE record. */
+ {
+ const LinkSegment* rdseg = &img->segments[sec_reladyn->segment_id - 1];
+ u8* rd_bytes = img->segment_bytes[rdseg->id - 1] +
+ (size_t)(sec_reladyn->file_offset - rdseg->file_offset);
+ u32 i;
+ for (i = 0; i < dyn->nrela_dyn; ++i) {
+ const DynRela* rr = &dyn->rela_dyn[i];
+ u8* rp = rd_bytes + (u64)i * ELF64_RELA_SIZE;
+ wr_u64_le(rp + 0, rr->r_offset);
+ wr_u64_le(rp + 8, rr->r_info);
+ wr_u64_le(rp + 16, (u64)rr->r_addend);
+ }
+ }
+
+ /* Re-serialize .rela.plt body. JUMP_SLOT records were written by
+ * layout_dyn at pre-shift vaddrs; shift_image_addresses bumped
+ * dyn->rela_plt[i].r_offset along with the rest, so the post-shift
+ * values match the .got.plt slot vaddrs the loader will patch. */
+ if (sec_relaplt && dyn->nrela_plt) {
+ const LinkSegment* rpseg = &img->segments[sec_relaplt->segment_id - 1];
+ u8* rp_bytes = img->segment_bytes[rpseg->id - 1] +
+ (size_t)(sec_relaplt->file_offset - rpseg->file_offset);
+ u32 i;
+ for (i = 0; i < dyn->nrela_plt; ++i) {
+ const DynRela* rr = &dyn->rela_plt[i];
+ u8* rp = rp_bytes + (u64)i * ELF64_RELA_SIZE;
+ wr_u64_le(rp + 0, rr->r_offset);
+ wr_u64_le(rp + 8, rr->r_info);
+ wr_u64_le(rp + 16, (u64)rr->r_addend);
+ }
+ }
+
+ /* Re-write .got.plt[0] = &.dynamic with the post-shift vaddr.
+ * layout_dyn wrote the pre-shift value into the segment bytes;
+ * shift_image_addresses bumped dyn->dynamic_vaddr so we can refill
+ * the slot here. Slots 1 and 2 (link_map cookie,
+ * _dl_runtime_resolve) are loader-owned for lazy binding; under
+ * DF_1_NOW they're never read so leaving them zero is fine. */
+ if (sec_gotplt && dyn->dynamic_vaddr) {
+ const LinkSegment* gpseg = &img->segments[sec_gotplt->segment_id - 1];
+ u8* gp_bytes = img->segment_bytes[gpseg->id - 1] +
+ (size_t)(sec_gotplt->file_offset - gpseg->file_offset);
+ wr_u64_le(gp_bytes, dyn->dynamic_vaddr);
+ }
+ }
+
+ /* ---- compute build-id (post-reloc, deterministic) ----
+ *
+ * Format-agnostic — Mach-O LC_UUID will hash the same bytes. */
+ u8 build_id[BUILD_ID_DESC_LEN];
+ link_image_id_compute(img, build_id);
+
+ /* ---- plan section headers covering loaded segments ----
+ *
+ * Worst case: 1 file shdr per segment + 1 .bss shdr if RW has a tail.
+ * shdr indices: 0=NULL, 1..nsegshdr=these, then build-id/symtab/...
+ */
+ /* Walk img->sections sorted by (segment_id, vaddr) and merge into
+ * one OutShdr per (segment_id, name) run. layout already places
+ * same-name sections adjacent within a segment, so a stable
+ * by-vaddr sort followed by run-length grouping captures it. */
+ OutShdr* outshdrs;
+ u32 noutshdr = 0;
+ u32 outshdr_cap = img->nsections + 1u;
+ outshdrs = (OutShdr*)heap->alloc(heap, sizeof(*outshdrs) * outshdr_cap,
+ _Alignof(OutShdr));
+ if (!outshdrs) compiler_panic(c, no_loc(), "link_emit_elf: oom on outshdrs");
+ memset(outshdrs, 0, sizeof(*outshdrs) * outshdr_cap);
+ {
+ /* Build a sort index over LinkSection ids by (segment_id, vaddr). */
+ u32* order = (u32*)heap->alloc(heap, sizeof(u32) * (img->nsections + 1u),
+ _Alignof(u32));
+ if (!order && img->nsections)
+ compiler_panic(c, no_loc(), "link_emit_elf: oom on shdr sort");
+ u32 i, j;
+ for (i = 0; i < img->nsections; ++i) order[i] = i;
+ /* Insertion sort — section count is small. */
+ for (i = 1; i < img->nsections; ++i) {
+ u32 cur = order[i];
+ const LinkSection* a = &img->sections[cur];
+ j = i;
+ while (j > 0) {
+ const LinkSection* b = &img->sections[order[j - 1]];
+ if ((b->segment_id < a->segment_id) ||
+ (b->segment_id == a->segment_id && b->vaddr <= a->vaddr))
+ break;
+ order[j] = order[j - 1];
+ --j;
+ }
+ order[j] = cur;
+ }
+ for (i = 0; i < img->nsections; ++i) {
+ const LinkSection* ls = &img->sections[order[i]];
+ OutShdr* tail = noutshdr ? &outshdrs[noutshdr - 1] : NULL;
+ int merge = tail && tail->segment_id == ls->segment_id &&
+ tail->name == ls->name &&
+ tail->is_nobits == (ls->sem == SSEM_NOBITS);
+ if (merge) {
+ u64 end = ls->vaddr + ls->size;
+ u64 prev_end = tail->vaddr + tail->size;
+ if (end > prev_end) tail->size = end - tail->vaddr;
+ if (ls->align > tail->align) tail->align = ls->align;
+ } else {
+ OutShdr* o = &outshdrs[noutshdr];
+ o->shdr_idx = 1u + noutshdr;
+ o->segment_id = ls->segment_id;
+ o->name = ls->name;
+ o->sem = ls->sem;
+ o->flags = ls->flags;
+ o->align = ls->align;
+ o->vaddr = ls->vaddr;
+ o->file_offset = ls->file_offset;
+ o->size = ls->size;
+ o->is_nobits = (ls->sem == SSEM_NOBITS);
+ noutshdr++;
+ }
+ }
+ heap->free(heap, order, sizeof(u32) * (img->nsections + 1u));
+ }
+
+ /* ---- build .shstrtab ---- */
+ StrBuilder shstrtab;
+ strb_init(&shstrtab, heap, 128);
+ u32 sh_name_symtab = strb_add_cstr(&shstrtab, ".symtab");
+ u32 sh_name_strtab = strb_add_cstr(&shstrtab, ".strtab");
+ u32 sh_name_shstrtab = strb_add_cstr(&shstrtab, ".shstrtab");
+ u32 sh_name_buildid = strb_add_cstr(&shstrtab, ".note.gnu.build-id");
+ /* Per-output-shdr names — interned strings from input section names. */
+ u32* outshdr_name_off =
+ (u32*)heap->alloc(heap, sizeof(u32) * (noutshdr + 1u), _Alignof(u32));
+ if (!outshdr_name_off && noutshdr)
+ compiler_panic(c, no_loc(), "link_emit_elf: oom on shdr name table");
+ {
+ u32 i;
+ for (i = 0; i < noutshdr; ++i) {
+ const OutShdr* o = &outshdrs[i];
+ if (o->name) {
+ Slice nm_s = pool_slice(c->global, o->name);
+ const char* nm = nm_s.s;
+ size_t nlen = nm_s.len;
+ outshdr_name_off[i] =
+ nm && nlen ? strb_add(&shstrtab, nm, (u32)nlen) : 0;
+ } else {
+ outshdr_name_off[i] = 0;
+ }
+ }
+ }
+
+ u32 nshdr = 1u + noutshdr + 4u;
+ u32 shndx_buildid = 1u + noutshdr;
+ u32 shndx_symtab = shndx_buildid + 1u;
+ u32 shndx_strtab = shndx_symtab + 1u;
+ u32 shndx_shstrtab = shndx_strtab + 1u;
+
+ /* ---- build .symtab + .strtab ----
+ *
+ * Two passes (locals first, then globals/weaks). Slot 0 is
+ * STN_UNDEF. Globals are deduped via img->globals — only the
+ * canonical entry per name is emitted, since per-input undef
+ * records keep their own LinkSymId after resolve_undefs's
+ * "copy fields from canonical def" step. sh_info = first non-local
+ * idx. */
+ StrBuilder strtab;
+ strb_init(&strtab, heap, 256);
+
+ SymRec* recs = (SymRec*)heap->alloc(
+ heap, sizeof(*recs) * (LinkSyms_count(&img->syms) + 1u),
+ _Alignof(SymRec));
+ if (!recs) compiler_panic(c, no_loc(), "link_emit_elf: oom on symrecs");
+ u32 nsyms_emit = 0;
+ u32 first_global_idx;
+ memset(&recs[nsyms_emit++], 0, sizeof(*recs)); /* slot 0 */
+ first_global_idx = nsyms_emit;
+
+ {
+ u32 pass, i;
+ for (pass = 0; pass < 2; ++pass) {
+ int want_local = (pass == 0);
+ if (!want_local) first_global_idx = nsyms_emit;
+ for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
+ const LinkSymbol* s = LinkSyms_at(&img->syms, i);
+ int is_local = (s->bind == SB_LOCAL);
+ size_t namelen = 0;
+ const char* nm;
+ u8 st_type, st_bind;
+ u16 shndx;
+ u64 st_value;
+ SymRec* r;
+ if (want_local != is_local) continue;
+ if (s->name == 0 && s->kind != SK_FILE) continue;
+ /* Dedupe globals: per-input undef-of-X and the canonical
+ * def-of-X are separate img->syms entries (resolve_undefs
+ * mirrors fields onto the undef). Only the canonical
+ * (first registered) entry is in img->globals. Skip the
+ * shadow copies. */
+ if (!is_local && s->name) {
+ LinkSymId canonical = symhash_get(&img->globals, s->name);
+ if (canonical != LINK_SYM_NONE && canonical != s->id) continue;
+ }
+ {
+ Slice nm_s = s->name ? pool_slice(c->global, s->name) : SLICE_NULL;
+ nm = nm_s.s ? nm_s.s : "";
+ namelen = nm_s.len;
+ }
+ shndx = sym_shndx_for(s, outshdrs, noutshdr);
+ /* st_value: in ET_EXEC, defined non-ABS symbols carry
+ * absolute virtual addresses (IMAGE_BASE + image
+ * vaddr); ABS symbols carry their own value verbatim. */
+ if (s->kind == SK_FILE)
+ st_value = 0;
+ else if (s->kind == SK_ABS)
+ st_value = s->vaddr;
+ else if (s->defined)
+ st_value = img_base + s->vaddr;
+ else
+ st_value = 0;
+ st_type = sym_kind_to_st_type(s->kind);
+ st_bind = sym_bind_to_st_bind(s->bind);
+ r = &recs[nsyms_emit++];
+ memset(r, 0, sizeof(*r));
+ r->st_name = (nm && namelen) ? strb_add(&strtab, nm, (u32)namelen) : 0;
+ r->st_info = ELF64_ST_INFO(st_bind, st_type);
+ r->st_other = STV_DEFAULT;
+ r->st_shndx = shndx;
+ r->st_value = st_value;
+ r->st_size = s->size;
+ }
+ }
+ }
+
+ /* ---- compute file offsets for trailing non-alloc sections ---- */
+ /* End of segment data: the highest (file_offset + file_size) across
+ * loaded segments. */
+ u64 end_of_segs = headers_load;
+ {
+ u32 i;
+ for (i = 0; i < img->nsegments; ++i) {
+ const LinkSegment* seg = &img->segments[i];
+ u64 e = seg->file_offset + seg->file_size;
+ if (e > end_of_segs) end_of_segs = e;
+ }
+ }
+ u64 symtab_off = ALIGN_UP(end_of_segs, (u64)8u);
+ u64 symtab_size = (u64)ELF64_SYM_SIZE * nsyms_emit;
+ u64 strtab_off = symtab_off + symtab_size;
+ u64 strtab_size = strtab.len;
+ u64 shstrtab_off = strtab_off + strtab_size;
+ u64 shstrtab_size = shstrtab.len;
+ u64 shdr_off = ALIGN_UP(shstrtab_off + shstrtab_size, (u64)8u);
+
+ /* ---- build phdrs ---- */
+ Phdr64* phdrs = (Phdr64*)heap->alloc(heap, sizeof(Phdr64) * nphdr_total,
+ _Alignof(Phdr64));
+ if (!phdrs) compiler_panic(c, no_loc(), "link_emit_elf: oom on phdrs");
+ memset(phdrs, 0, sizeof(Phdr64) * nphdr_total);
+ {
+ u32 pi = 0;
+ /* PT_PHDR points at the phdr table itself within the headers
+ * PT_LOAD. Required by the runtime loader for ET_DYN to know
+ * where its own program headers live. Must appear before the
+ * first PT_LOAD on dynamic exes (musl checks). */
+ if (pie) {
+ phdrs[pi].p_type = PT_PHDR;
+ phdrs[pi].p_flags = PF_R;
+ phdrs[pi].p_offset = sizeof(Ehdr64);
+ phdrs[pi].p_vaddr = img_base + sizeof(Ehdr64);
+ phdrs[pi].p_paddr = phdrs[pi].p_vaddr;
+ phdrs[pi].p_filesz = (u64)nphdr_total * sizeof(Phdr64);
+ phdrs[pi].p_memsz = phdrs[pi].p_filesz;
+ phdrs[pi].p_align = 8;
+ pi++;
+ }
+ /* Headers PT_LOAD (covers ehdr + phdrs + build-id note).
+ * Scripted images don't emit one — see plan note above. */
+ if (!scripted) {
+ phdrs[pi].p_type = PT_LOAD;
+ phdrs[pi].p_flags = PF_R;
+ phdrs[pi].p_offset = 0;
+ phdrs[pi].p_vaddr = img_base;
+ phdrs[pi].p_paddr = img_base;
+ phdrs[pi].p_filesz = headers_size;
+ phdrs[pi].p_memsz = headers_size;
+ phdrs[pi].p_align = PAGE_SIZE;
+ pi++;
+ }
+ /* Per-segment PT_LOAD. */
+ u32 i;
+ for (i = 0; i < img->nsegments; ++i) {
+ const LinkSegment* seg = &img->segments[i];
+ Phdr64* p = &phdrs[pi++];
+ p->p_type = PT_LOAD;
+ p->p_flags = perms_to_pflags(seg->flags);
+ p->p_offset = seg->file_offset;
+ p->p_vaddr = img_base + seg->vaddr; /* post-shift */
+ p->p_paddr = p->p_vaddr;
+ p->p_filesz = seg->file_size;
+ /* TLS .tbss is per-thread template space, not a loadable bss
+ * region — PT_TLS already records the full memsz (incl. .tbss)
+ * for the loader's per-thread allocation, so the matching
+ * PT_LOAD must not extend memsz past filesz. qemu-riscv64
+ * rejects PT_LOADs with memsz>filesz on non-writable mappings
+ * ("PT_LOAD with non-writable bss"), and the SEG_TLS perms are
+ * SF_ALLOC|SF_TLS only. */
+ p->p_memsz = (seg->flags & SF_TLS) ? seg->file_size : seg->mem_size;
+ p->p_align = seg->align ? seg->align : PAGE_SIZE;
+ }
+ /* PT_NOTE for build-id. Scripted images skip the build-id entirely. */
+ if (!scripted) {
+ phdrs[pi].p_type = PT_NOTE;
+ phdrs[pi].p_flags = PF_R;
+ phdrs[pi].p_offset = build_id_off;
+ phdrs[pi].p_vaddr = build_id_addr;
+ phdrs[pi].p_paddr = build_id_addr;
+ phdrs[pi].p_filesz = BUILD_ID_NOTE_BYTES;
+ phdrs[pi].p_memsz = BUILD_ID_NOTE_BYTES;
+ phdrs[pi].p_align = 4;
+ pi++;
+ }
+ /* PT_TLS describing the .tdata template + .tbss zero-fill.
+ * vaddr/file_offset point at the same bytes the matching
+ * PT_LOAD already covers — the loader uses PT_TLS to size
+ * each thread's TLS block and to seed it from .tdata. */
+ if (has_tls) {
+ phdrs[pi].p_type = PT_TLS;
+ phdrs[pi].p_flags = PF_R;
+ phdrs[pi].p_offset = img->tls_vaddr;
+ phdrs[pi].p_vaddr = img_base + img->tls_vaddr;
+ phdrs[pi].p_paddr = phdrs[pi].p_vaddr;
+ phdrs[pi].p_filesz = img->tls_filesz;
+ phdrs[pi].p_memsz = img->tls_memsz;
+ phdrs[pi].p_align = img->tls_align ? img->tls_align : 1u;
+ pi++;
+ }
+ /* Dynamic phdrs. PT_INTERP and PT_DYNAMIC point at the matching
+ * sections (which layout_dyn placed in the ro/rw_dyn segments).
+ * PT_GNU_STACK marks the stack as non-executable (filesz=0). */
+ if (pie && img->dyn) {
+ LinkDynState* dyn = img->dyn;
+ const LinkSection* sec_interp = &img->sections[dyn->sec_interp - 1];
+ const LinkSection* sec_dynamic = &img->sections[dyn->sec_dynamic - 1];
+ phdrs[pi].p_type = PT_INTERP;
+ phdrs[pi].p_flags = PF_R;
+ phdrs[pi].p_offset = sec_interp->file_offset;
+ phdrs[pi].p_vaddr = img_base + sec_interp->vaddr;
+ phdrs[pi].p_paddr = phdrs[pi].p_vaddr;
+ phdrs[pi].p_filesz = sec_interp->size;
+ phdrs[pi].p_memsz = sec_interp->size;
+ phdrs[pi].p_align = 1;
+ pi++;
+ phdrs[pi].p_type = PT_DYNAMIC;
+ phdrs[pi].p_flags = PF_R | PF_W;
+ phdrs[pi].p_offset = sec_dynamic->file_offset;
+ phdrs[pi].p_vaddr = img_base + sec_dynamic->vaddr;
+ phdrs[pi].p_paddr = phdrs[pi].p_vaddr;
+ phdrs[pi].p_filesz = sec_dynamic->size;
+ phdrs[pi].p_memsz = sec_dynamic->size;
+ phdrs[pi].p_align = 8;
+ pi++;
+ phdrs[pi].p_type = PT_GNU_STACK;
+ phdrs[pi].p_flags = PF_R | PF_W;
+ phdrs[pi].p_offset = 0;
+ phdrs[pi].p_vaddr = 0;
+ phdrs[pi].p_paddr = 0;
+ phdrs[pi].p_filesz = 0;
+ phdrs[pi].p_memsz = 0;
+ phdrs[pi].p_align = 16;
+ pi++;
+ /* PT_GNU_RELRO would mark the read-only-after-relocation span
+ * here. Phase 6 leaves it out — it's an optimization the loader
+ * can live without, and our ro_seg already lives in a PF_R
+ * PT_LOAD that's never made writable. */
+ } else if (pie) {
+ /* dyn was nominally requested but layout_dyn early-out — no
+ * imports and no DSO inputs. The image still needs a PT_GNU_STACK
+ * for kernels that demand it; INTERP/DYNAMIC are skipped. */
+ (void)0;
+ }
+ (void)pi;
+ }
+
+ /* ---- build ehdr ---- */
+ Ehdr64 ehdr;
+ memset(&ehdr, 0, sizeof(ehdr));
+ ehdr.e_ident[0] = ELFMAG0;
+ ehdr.e_ident[1] = ELFMAG1;
+ ehdr.e_ident[2] = ELFMAG2;
+ ehdr.e_ident[3] = ELFMAG3;
+ ehdr.e_ident[4] = ELFCLASS64;
+ ehdr.e_ident[5] = ELFDATA2LSB;
+ ehdr.e_ident[6] = EV_CURRENT;
+ ehdr.e_ident[7] = ELFOSABI_NONE;
+ ehdr.e_type = pie ? ET_DYN : ET_EXEC;
+ ehdr.e_machine = (u16)e_machine;
+ ehdr.e_version = EV_CURRENT;
+ ehdr.e_entry = img_base + LinkSyms_at(&img->syms, img->entry_sym - 1)->vaddr;
+ ehdr.e_phoff = sizeof(Ehdr64);
+ ehdr.e_shoff = shdr_off;
+ ehdr.e_flags = 0;
+ ehdr.e_ehsize = sizeof(Ehdr64);
+ ehdr.e_phentsize = sizeof(Phdr64);
+ ehdr.e_phnum = (u16)nphdr_total;
+ ehdr.e_shentsize = sizeof(Shdr64);
+ ehdr.e_shnum = (u16)nshdr;
+ ehdr.e_shstrndx = (u16)shndx_shstrtab;
+
+ /* ---- write ehdr, phdrs, build-id note, pad ---- */
+ u64 cur_off;
+ write_bytes(w, &ehdr, sizeof(ehdr));
+ write_bytes(w, phdrs, sizeof(Phdr64) * nphdr_total);
+ cur_off = sizeof(ehdr) + sizeof(Phdr64) * nphdr_total;
+
+ /* .note.gnu.build-id wire format:
+ * u32 namesz = 4 ("GNU\0")
+ * u32 descsz = 16
+ * u32 type = NT_GNU_BUILD_ID (3)
+ * "GNU\0"
+ * <16 bytes of build-id>
+ *
+ * Scripted images don't carry build-id; they have no PT_NOTE phdr to
+ * point at it and the file payload would just be dead bytes. */
+ if (!scripted) {
+ u8 nh[12];
+ u32 v;
+ v = NOTE_NAME_GNU_LEN;
+ nh[0] = (u8)v;
+ nh[1] = (u8)(v >> 8);
+ nh[2] = (u8)(v >> 16);
+ nh[3] = (u8)(v >> 24);
+ v = BUILD_ID_DESC_LEN;
+ nh[4] = (u8)v;
+ nh[5] = (u8)(v >> 8);
+ nh[6] = (u8)(v >> 16);
+ nh[7] = (u8)(v >> 24);
+ v = NOTE_BUILD_ID_TYPE;
+ nh[8] = (u8)v;
+ nh[9] = (u8)(v >> 8);
+ nh[10] = (u8)(v >> 16);
+ nh[11] = (u8)(v >> 24);
+ write_bytes(w, nh, sizeof nh);
+ write_bytes(w, NOTE_NAME_GNU "\0", NOTE_NAME_GNU_LEN);
+ write_bytes(w, build_id, BUILD_ID_DESC_LEN);
+ cur_off += BUILD_ID_NOTE_BYTES;
+ }
+
+ /* Pad to first segment file_offset (== headers_load). */
+ {
+ u32 i;
+ for (i = 0; i < img->nsegments; ++i) {
+ const LinkSegment* seg = &img->segments[i];
+ if (seg->file_size == 0) continue;
+ if (cur_off < seg->file_offset) {
+ write_zeroes(w, (size_t)(seg->file_offset - cur_off));
+ cur_off = seg->file_offset;
+ }
+ write_bytes(w, img->segment_bytes[seg->id - 1], (size_t)seg->file_size);
+ cur_off += seg->file_size;
+ }
+ }
+
+ /* ---- write trailing non-alloc sections ---- */
+ if (cur_off < symtab_off) {
+ write_zeroes(w, (size_t)(symtab_off - cur_off));
+ cur_off = symtab_off;
+ }
+ {
+ u32 i;
+ for (i = 0; i < nsyms_emit; ++i) write_sym_rec(w, &recs[i]);
+ cur_off += symtab_size;
+ }
+ if (strtab.len) {
+ write_bytes(w, strtab.data, strtab.len);
+ cur_off += strtab.len;
+ }
+ if (shstrtab.len) {
+ write_bytes(w, shstrtab.data, shstrtab.len);
+ cur_off += shstrtab.len;
+ }
+
+ /* ---- write section header table ---- */
+ if (cur_off < shdr_off) {
+ write_zeroes(w, (size_t)(shdr_off - cur_off));
+ cur_off = shdr_off;
+ }
+ {
+ Shdr64 sh;
+ u32 i;
+ /* shdr 0: NULL */
+ memset(&sh, 0, sizeof(sh));
+ write_bytes(w, &sh, sizeof(sh));
+ /* Locate dyn-section names (interned earlier in layout_dyn) so
+ * we can override sh_type / sh_link / sh_info / sh_entsize for
+ * .dynsym / .dynstr / .gnu.hash / .rela.dyn / .rela.plt /
+ * .dynamic. The sh_link cross-references (e.g., .dynsym ->
+ * .dynstr) need the matching shdr indices, which we look up by
+ * comparing OutShdr.name to the same Sym values. */
+ Sym n_dynsym = 0, n_dynstr = 0, n_gnuhash = 0;
+ Sym n_reladyn = 0, n_relaplt = 0, n_dynamic = 0;
+ Sym n_gotplt = 0;
+ if (pie && img->dyn) {
+ n_dynsym = pool_intern_slice(c->global, SLICE_LIT(".dynsym"));
+ n_dynstr = pool_intern_slice(c->global, SLICE_LIT(".dynstr"));
+ n_gnuhash = pool_intern_slice(c->global, SLICE_LIT(".gnu.hash"));
+ n_reladyn = pool_intern_slice(c->global, SLICE_LIT(".rela.dyn"));
+ n_relaplt = pool_intern_slice(c->global, SLICE_LIT(".rela.plt"));
+ n_dynamic = pool_intern_slice(c->global, SLICE_LIT(".dynamic"));
+ n_gotplt = pool_intern_slice(c->global, SLICE_LIT(".got.plt"));
+ }
+ /* Two-pass: first find dynsym/dynstr/gotplt indices for sh_link
+ * fixups, then emit. */
+ u32 idx_dynsym = 0, idx_dynstr = 0, idx_gotplt = 0;
+ if (pie && img->dyn) {
+ for (i = 0; i < noutshdr; ++i) {
+ Sym nm = outshdrs[i].name;
+ u32 ix = outshdrs[i].shdr_idx;
+ if (nm == n_dynsym)
+ idx_dynsym = ix;
+ else if (nm == n_dynstr)
+ idx_dynstr = ix;
+ else if (nm == n_gotplt)
+ idx_gotplt = ix;
+ }
+ }
+ /* per-name output shdrs */
+ for (i = 0; i < noutshdr; ++i) {
+ const OutShdr* o = &outshdrs[i];
+ memset(&sh, 0, sizeof(sh));
+ sh.sh_name = outshdr_name_off[i];
+ sh.sh_type = sec_sem_to_sht(o->sem);
+ sh.sh_flags = sec_flags_to_shf(o->flags);
+ sh.sh_addr = img_base + o->vaddr;
+ sh.sh_offset = o->file_offset;
+ sh.sh_size = o->size;
+ sh.sh_link = 0;
+ sh.sh_info = 0;
+ sh.sh_addralign = o->align ? o->align : 1;
+ sh.sh_entsize = (o->sem == SSEM_INIT_ARRAY || o->sem == SSEM_FINI_ARRAY ||
+ o->sem == SSEM_PREINIT_ARRAY)
+ ? 8
+ : 0;
+ /* Dyn-section overrides: sh_type / sh_link / sh_info / entsize. */
+ if (pie && img->dyn) {
+ if (o->name == n_dynsym) {
+ sh.sh_type = SHT_DYNSYM;
+ sh.sh_link = idx_dynstr;
+ sh.sh_info = img->dyn->first_global;
+ sh.sh_entsize = 24;
+ } else if (o->name == n_dynstr) {
+ sh.sh_type = SHT_STRTAB;
+ } else if (o->name == n_gnuhash) {
+ sh.sh_type = SHT_GNU_HASH;
+ sh.sh_link = idx_dynsym;
+ } else if (o->name == n_reladyn) {
+ sh.sh_type = SHT_RELA;
+ sh.sh_link = idx_dynsym;
+ sh.sh_entsize = 24;
+ } else if (o->name == n_relaplt) {
+ sh.sh_type = SHT_RELA;
+ sh.sh_link = idx_dynsym;
+ sh.sh_info = idx_gotplt;
+ sh.sh_entsize = 24;
+ sh.sh_flags |= SHF_INFO_LINK;
+ } else if (o->name == n_dynamic) {
+ sh.sh_type = SHT_DYNAMIC;
+ sh.sh_link = idx_dynstr;
+ sh.sh_entsize = 16;
+ } else if (o->name == n_gotplt) {
+ sh.sh_entsize = 8;
+ }
+ }
+ write_bytes(w, &sh, sizeof(sh));
+ }
+ /* shdr: .note.gnu.build-id (allocatable; in headers PT_LOAD) */
+ memset(&sh, 0, sizeof(sh));
+ sh.sh_name = sh_name_buildid;
+ sh.sh_type = SHT_NOTE;
+ sh.sh_flags = SHF_ALLOC;
+ sh.sh_addr = build_id_addr;
+ sh.sh_offset = build_id_off;
+ sh.sh_size = BUILD_ID_NOTE_BYTES;
+ sh.sh_addralign = 4;
+ write_bytes(w, &sh, sizeof(sh));
+ /* shdr: .symtab */
+ memset(&sh, 0, sizeof(sh));
+ sh.sh_name = sh_name_symtab;
+ sh.sh_type = SHT_SYMTAB;
+ sh.sh_flags = 0;
+ sh.sh_addr = 0;
+ sh.sh_offset = symtab_off;
+ sh.sh_size = symtab_size;
+ sh.sh_link = shndx_strtab;
+ sh.sh_info = first_global_idx;
+ sh.sh_addralign = 8;
+ sh.sh_entsize = ELF64_SYM_SIZE;
+ write_bytes(w, &sh, sizeof(sh));
+ /* shdr: .strtab */
+ memset(&sh, 0, sizeof(sh));
+ sh.sh_name = sh_name_strtab;
+ sh.sh_type = SHT_STRTAB;
+ sh.sh_offset = strtab_off;
+ sh.sh_size = strtab_size;
+ sh.sh_addralign = 1;
+ write_bytes(w, &sh, sizeof(sh));
+ /* shdr: .shstrtab */
+ memset(&sh, 0, sizeof(sh));
+ sh.sh_name = sh_name_shstrtab;
+ sh.sh_type = SHT_STRTAB;
+ sh.sh_offset = shstrtab_off;
+ sh.sh_size = shstrtab_size;
+ sh.sh_addralign = 1;
+ write_bytes(w, &sh, sizeof(sh));
+ }
+
+ heap->free(heap, phdrs, sizeof(Phdr64) * nphdr_total);
+ heap->free(heap, recs, sizeof(*recs) * (LinkSyms_count(&img->syms) + 1u));
+ heap->free(heap, outshdrs, sizeof(*outshdrs) * outshdr_cap);
+ if (outshdr_name_off)
+ heap->free(heap, outshdr_name_off, sizeof(u32) * (noutshdr + 1u));
+ strb_fini(&strtab);
+ strb_fini(&shstrtab);
+}
diff --git a/src/obj/elf/link_dyn.c b/src/obj/elf/link_dyn.c
@@ -0,0 +1,992 @@
+/* Phase 4 of dynamic linking: synthesize the dyn-link tables and
+ * sections an ET_DYN ELF exe needs to be loadable by a real runtime
+ * loader (musl ld-musl-aarch64.so.1).
+ *
+ * Inputs (computed by earlier passes):
+ * - LinkSymbol entries with `imported = 1` (set by resolve_undefs's
+ * DSO-search path; their dso_input_id names the providing DSO).
+ * - LinkInputs of kind LINK_INPUT_DSO_BYTES carrying SONAMEs.
+ *
+ * Outputs (deposited on LinkImage.dyn):
+ * - .interp PT_INTERP target string
+ * - .dynsym + .dynstr symbol table + name pool
+ * - .gnu.hash GNU-style hash for the loader
+ * - .rela.dyn GLOB_DAT (data imports) + space for
+ * R_AARCH64_RELATIVE records that
+ * Phase 6 emit fills in
+ * - .rela.plt JUMP_SLOT records (one per imported func)
+ * - .plt allocated, body NOT emitted (Phase 5)
+ * - .got.plt 3 reserved slots + 1 per PLT slot,
+ * allocated, body NOT emitted
+ * - .dynamic PT_DYNAMIC body, populated
+ *
+ * The .plt body / GOT-slot fill / CALL26 reloc rewriting are Phase 5;
+ * they're called out at the relevant allocation site so the missing
+ * pieces are obvious to anyone reading the output. The static-exe path
+ * is unaffected — layout_dyn early-outs when emit_pie is 0.
+ *
+ * Allocator pattern follows layout_iplt (link_layout.c): grow segments
+ * + sections via realloc, then page-align each new segment after the
+ * existing image span. Synthetic sections carry input_id == LINK_INPUT_NONE
+ * so downstream passes (emit_reloc_records, GC) leave them alone.
+ */
+
+#include <string.h>
+
+#include "core/bytes.h"
+#include "core/heap.h"
+#include "core/pool.h"
+#include "core/slice.h"
+#include "core/util.h"
+#include "core/vec.h"
+#include "link/link.h"
+#include "link/link_arch.h"
+#include "link/link_internal.h"
+#include "obj/elf/elf.h"
+#include "obj/format.h"
+
+static SrcLoc no_loc(void) {
+ SrcLoc l = {0, 0, 0};
+ return l;
+}
+
+/* ---- small allocators (mirror layout_iplt's helpers) ---- */
+
+static u32 dyn_alloc_segments(LinkImage* img, u32 nseg) {
+ Heap* h = img->heap;
+ u32 base = img->nsegments;
+ u32 new_nseg = base + nseg;
+ LinkSegment* nsegs = (LinkSegment*)h->realloc(
+ h, img->segments, sizeof(*img->segments) * img->nsegments,
+ sizeof(*img->segments) * new_nseg, _Alignof(LinkSegment));
+ u8** nsbufs = (u8**)h->realloc(
+ h, img->segment_bytes, sizeof(*img->segment_bytes) * img->nsegments,
+ sizeof(*img->segment_bytes) * new_nseg, _Alignof(u8*));
+ size_t* nscaps = (size_t*)h->realloc(
+ h, img->segment_bytes_cap,
+ sizeof(*img->segment_bytes_cap) * img->nsegments,
+ sizeof(*img->segment_bytes_cap) * new_nseg, _Alignof(size_t));
+ if (!nsegs || !nsbufs || !nscaps)
+ compiler_panic(img->c, no_loc(), "link: oom on dyn segments");
+ img->segments = nsegs;
+ img->segment_bytes = nsbufs;
+ img->segment_bytes_cap = nscaps;
+ return base;
+}
+
+static u32 dyn_alloc_sections(LinkImage* img, u32 nsec) {
+ Heap* h = img->heap;
+ u32 base = img->nsections;
+ u32 new_nsec = base + nsec;
+ LinkSection* nsections = (LinkSection*)h->realloc(
+ h, img->sections, sizeof(*img->sections) * img->nsections,
+ sizeof(*img->sections) * new_nsec, _Alignof(LinkSection));
+ if (!nsections) compiler_panic(img->c, no_loc(), "link: oom on dyn sections");
+ img->sections = nsections;
+ return base;
+}
+
+/* ---- byte-builder for .dynstr / .gnu.hash ---- */
+
+typedef struct ByteBuf {
+ Heap* heap;
+ u8* data;
+ u32 len;
+ u32 cap;
+} ByteBuf;
+
+static void bb_init(ByteBuf* b, Heap* h) {
+ b->heap = h;
+ b->data = NULL;
+ b->len = 0;
+ b->cap = 0;
+}
+static void bb_reserve(ByteBuf* b, u32 need) {
+ if (need <= b->cap) return;
+ (void)VEC_GROW(b->heap, b->data, b->cap, need);
+}
+static u32 bb_append(ByteBuf* b, const void* src, u32 n) {
+ u32 off = b->len;
+ bb_reserve(b, b->len + n);
+ if (n) memcpy(b->data + b->len, src, n);
+ b->len += n;
+ return off;
+}
+static u32 bb_append_str(ByteBuf* b, const char* s, u32 n) {
+ /* Linear dedup over what we've appended so far. Strtabs are small. */
+ if (n == 0) return 0;
+ if (b->len > n) {
+ u32 i;
+ for (i = 0; i + n < b->len; ++i) {
+ if (b->data[i + n] == 0 && memcmp(b->data + i, s, n) == 0) return i;
+ }
+ }
+ u32 off = b->len;
+ bb_reserve(b, b->len + n + 1u);
+ memcpy(b->data + b->len, s, n);
+ b->data[b->len + n] = 0;
+ b->len += n + 1u;
+ return off;
+}
+
+/* ---- GNU-hash computation (psABI v1 hash) ----
+ * Body layout:
+ * u32 nbuckets
+ * u32 symoffset (first hashed dynsym index)
+ * u32 bloom_size (in 64-bit words)
+ * u32 bloom_shift
+ * u64 bloom[bloom_size]
+ * u32 buckets[nbuckets]
+ * u32 chains[ndynsym - symoffset]
+ *
+ * For Phase 4 we keep this very small: nbuckets = max(1, n/2),
+ * bloom_size = 1, bloom_shift = 6 (64-bit ELFCLASS64). All hashed
+ * symbols (sym_offset..ndynsym-1) participate in bloom + buckets +
+ * chains. Slot 0..symoffset-1 are STN_UNDEF + locals, which the
+ * loader doesn't hash. */
+
+static u32 gnu_hash_name(const char* s, u32 n) {
+ /* h = 5381; for c in s: h = h * 33 + c */
+ u32 h = 5381u;
+ u32 i;
+ for (i = 0; i < n; ++i) h = (h * 33u) + (u8)s[i];
+ return h;
+}
+
+/* ---- partition: enumerate imports ----
+ *
+ * Walks LinkSyms and collects each `imported` symbol that's the
+ * canonical entry in img->globals (resolve_undefs may stamp `imported`
+ * onto multiple shadow slots of the same name; only the canonical one
+ * lands in dynsym). The two output arrays are LinkSymIds: funcs first
+ * (PLT-bound), then data (GOT-bound via GLOB_DAT). */
+
+typedef struct ImportLists {
+ LinkSymId* funcs;
+ u32 nfuncs;
+ LinkSymId* datas;
+ u32 ndatas;
+} ImportLists;
+
+static int sym_is_func_import(const LinkSymbol* s) {
+ /* Most undef shadows have kind = SK_UNDEF (the obj reader keys kind
+ * off shndx, not STT_*). Only useful when the canonical entry
+ * carried a real type — fall through to the DSO lookup otherwise. */
+ return s->kind == SK_FUNC || s->kind == SK_IFUNC;
+}
+
+/* Resolve an import's classifier kind by consulting its providing
+ * DSO's dynsym. read_elf_dso preserves STT_FUNC / STT_OBJECT / etc.
+ * on each defined export; the consumer's undef may have arrived as
+ * SK_UNDEF (clang emits external refs as SHN_UNDEF, which the reader
+ * collapses to SK_UNDEF regardless of STT_*). Returns 1 for func /
+ * ifunc, 0 for everything else (or if the DSO export is missing). */
+static int dso_export_is_func(Linker* l, const LinkSymbol* s) {
+ if (s->dso_input_id == LINK_INPUT_NONE) return 0;
+ if (s->dso_input_id - 1u >= LinkInputs_count(&l->inputs)) return 0;
+ LinkInput* in = LinkInputs_at(&l->inputs, s->dso_input_id - 1u);
+ if (!in->obj) return 0;
+ ObjSymIter* it = obj_symiter_new(in->obj);
+ ObjSymEntry e;
+ int is_func = 0;
+ while (obj_symiter_next(it, &e)) {
+ const ObjSym* es = e.sym;
+ if (!es || es->name != s->name) continue;
+ if (es->kind == SK_UNDEF) continue;
+ is_func = (es->kind == SK_FUNC || es->kind == SK_IFUNC);
+ break;
+ }
+ obj_symiter_free(it);
+ return is_func;
+}
+
+static void collect_imports(Linker* l, LinkImage* img, Heap* h,
+ ImportLists* il) {
+ u32 i;
+ u32 cap_f = 0, cap_d = 0;
+ il->funcs = NULL;
+ il->datas = NULL;
+ il->nfuncs = il->ndatas = 0;
+ for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
+ LinkSymbol* s = LinkSyms_at(&img->syms, i);
+ if (!s->imported) continue;
+ if (s->name == 0) continue;
+ /* Only the canonical (img->globals) entry per name. */
+ LinkSymId canonical = symhash_get(&img->globals, s->name);
+ if (canonical != LINK_SYM_NONE && canonical != s->id) continue;
+ int is_func = sym_is_func_import(s) || dso_export_is_func(l, s);
+ if (is_func) {
+ if (VEC_GROW(h, il->funcs, cap_f, il->nfuncs + 1u))
+ compiler_panic(img->c, no_loc(), "link: oom on import-funcs");
+ il->funcs[il->nfuncs++] = s->id;
+ } else {
+ if (VEC_GROW(h, il->datas, cap_d, il->ndatas + 1u))
+ compiler_panic(img->c, no_loc(), "link: oom on import-datas");
+ il->datas[il->ndatas++] = s->id;
+ }
+ }
+}
+
+static void free_imports(Heap* h, ImportLists* il) {
+ if (il->funcs) h->free(h, il->funcs, sizeof(*il->funcs) * il->nfuncs);
+ if (il->datas) h->free(h, il->datas, sizeof(*il->datas) * il->ndatas);
+}
+
+/* ---- DT_NEEDED set: each DSO input that contributed at least one
+ * import. Order is input order so the loader sees deps in declaration
+ * order. */
+static void collect_needed(Linker* l, LinkImage* img, LinkDynState* dyn) {
+ Heap* h = img->heap;
+ u8* used;
+ u32 ninputs = LinkInputs_count(&l->inputs);
+ u32 i, nused = 0;
+
+ used = (u8*)h->alloc(h, ninputs ? ninputs : 1u, 1);
+ if (!used) compiler_panic(img->c, no_loc(), "link: oom on needed map");
+ memset(used, 0, ninputs ? ninputs : 1u);
+
+ /* Mark every DSO that ended up satisfying at least one import. */
+ for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
+ LinkSymbol* s = LinkSyms_at(&img->syms, i);
+ if (!s->imported) continue;
+ if (s->dso_input_id == LINK_INPUT_NONE) continue;
+ if (s->dso_input_id - 1u >= ninputs) continue;
+ used[s->dso_input_id - 1u] = 1;
+ }
+ /* Always pull every explicitly-supplied DSO into DT_NEEDED, even if
+ * no import landed on it — matches GNU ld without --as-needed.
+ * Phase 4 doesn't plumb --as-needed through to the resolver, so the
+ * default "needed" behavior is the right baseline. */
+ for (i = 0; i < ninputs; ++i) {
+ LinkInput* in = LinkInputs_at(&l->inputs, i);
+ if (in->kind == LINK_INPUT_DSO_BYTES && in->soname != 0) used[i] = 1;
+ }
+ for (i = 0; i < ninputs; ++i)
+ if (used[i]) ++nused;
+
+ dyn->needed =
+ nused ? (Sym*)h->alloc(h, sizeof(Sym) * nused, _Alignof(Sym)) : NULL;
+ if (nused && !dyn->needed)
+ compiler_panic(img->c, no_loc(), "link: oom on needed list");
+ dyn->nneeded = 0;
+ for (i = 0; i < ninputs; ++i) {
+ LinkInput* in = LinkInputs_at(&l->inputs, i);
+ if (!used[i]) continue;
+ if (in->soname == 0) continue;
+ dyn->needed[dyn->nneeded++] = in->soname;
+ }
+ h->free(h, used, ninputs ? ninputs : 1u);
+}
+
+/* ---- dynsym + dynstr build ----
+ *
+ * Slot 0: STN_UNDEF (zero entry). The loader ignores names with index
+ * 0; we still emit a dynstr entry at offset 0 (the leading NUL).
+ *
+ * Slots 1..nimports: imported symbols (functions first, then data).
+ * st_shndx = SHN_UNDEF; the loader fills in the value at bind time.
+ * st_value/size are zero — the static linker has no value for an
+ * imported symbol.
+ *
+ * No `--export-dynamic` plumbing in Phase 4: only imports + the null
+ * slot land in .dynsym. Adding exports is mechanical (walk
+ * img->globals, append entries with st_shndx = matching .text/.data
+ * section index) but isn't on the test/musl path. */
+
+static void build_dynsym(LinkImage* img, LinkDynState* dyn,
+ const ImportLists* il, ByteBuf* dynstr) {
+ Heap* h = img->heap;
+ u32 nimports = il->nfuncs + il->ndatas;
+ u32 ndynsym = 1u + nimports; /* +1 for null slot */
+ u32 i;
+
+ dyn->ndynsym = ndynsym;
+ dyn->dynsym = (DynSymRec*)h->alloc(h, sizeof(*dyn->dynsym) * ndynsym,
+ _Alignof(DynSymRec));
+ if (!dyn->dynsym) compiler_panic(img->c, no_loc(), "link: oom on dynsym");
+ memset(dyn->dynsym, 0, sizeof(*dyn->dynsym) * ndynsym);
+
+ /* Slot 0: STN_UNDEF. dynstr leads with a NUL so st_name=0 reads as
+ * the empty string. */
+ {
+ u8 z = 0;
+ bb_append(dynstr, &z, 1);
+ }
+
+ /* Per-symbol: dedupe `sym_dynidx` lookup table. Sized to LinkSymId
+ * upper bound. Clean (zero-filled) by alloc convention; we set
+ * indices for imports below. */
+ dyn->sym_dynidx_size = LinkSyms_count(&img->syms) + 1u;
+ dyn->sym_dynidx = (u32*)h->alloc(
+ h, sizeof(*dyn->sym_dynidx) * dyn->sym_dynidx_size, _Alignof(u32));
+ if (!dyn->sym_dynidx)
+ compiler_panic(img->c, no_loc(), "link: oom on sym_dynidx");
+ memset(dyn->sym_dynidx, 0, sizeof(*dyn->sym_dynidx) * dyn->sym_dynidx_size);
+ /* sym_plt_vaddr is populated alongside the PLT body emit below; here
+ * we only allocate the parallel array. */
+ dyn->sym_plt_vaddr = (u64*)h->alloc(
+ h, sizeof(*dyn->sym_plt_vaddr) * dyn->sym_dynidx_size, _Alignof(u64));
+ if (!dyn->sym_plt_vaddr)
+ compiler_panic(img->c, no_loc(), "link: oom on sym_plt_vaddr");
+ memset(dyn->sym_plt_vaddr, 0,
+ sizeof(*dyn->sym_plt_vaddr) * dyn->sym_dynidx_size);
+
+ /* All imports have STB_GLOBAL so first_global is right after the
+ * single STN_UNDEF slot. (When local exports land via
+ * --export-dynamic, this needs to grow.) */
+ dyn->first_global = 1u;
+
+ u32 idx = 1u;
+ for (i = 0; i < il->nfuncs; ++i) {
+ LinkSymId lsid = il->funcs[i];
+ LinkSymbol* s = LinkSyms_at(&img->syms, lsid - 1);
+ DynSymRec* r = &dyn->dynsym[idx];
+ Slice nm_s = pool_slice(img->c->global, s->name);
+ const char* nm = nm_s.s;
+ size_t namelen = nm_s.len;
+ r->st_name = bb_append_str(dynstr, nm, (u32)namelen);
+ r->st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
+ r->st_other = STV_DEFAULT;
+ r->st_shndx = SHN_UNDEF;
+ r->st_value = 0;
+ r->st_size = 0;
+ dyn->sym_dynidx[lsid] = idx;
+ ++idx;
+ }
+ for (i = 0; i < il->ndatas; ++i) {
+ LinkSymId lsid = il->datas[i];
+ LinkSymbol* s = LinkSyms_at(&img->syms, lsid - 1);
+ DynSymRec* r = &dyn->dynsym[idx];
+ Slice nm_s = pool_slice(img->c->global, s->name);
+ const char* nm = nm_s.s;
+ size_t namelen = nm_s.len;
+ u8 elf_type = STT_OBJECT;
+ if (s->kind == SK_TLS)
+ elf_type = STT_TLS;
+ else if (s->kind == SK_NOTYPE)
+ elf_type = STT_NOTYPE;
+ r->st_name = bb_append_str(dynstr, nm, (u32)namelen);
+ r->st_info = ELF64_ST_INFO(STB_GLOBAL, elf_type);
+ r->st_other = STV_DEFAULT;
+ r->st_shndx = SHN_UNDEF;
+ r->st_value = 0;
+ r->st_size = 0;
+ dyn->sym_dynidx[lsid] = idx;
+ ++idx;
+ }
+}
+
+/* ---- .gnu.hash builder ----
+ *
+ * Hashed range is [first_global, ndynsym) — slot 0 (STN_UNDEF) is
+ * unhashed. Layout matches loader expectations (musl, glibc, FreeBSD).
+ *
+ * Bucket count: max(1, hashed_count / 2), rounded up to odd so the
+ * mod operation distributes more uniformly. Bloom is 1 word for
+ * Phase 4 — a real implementation would scale with hashed_count, but
+ * 1 word with shift=6 still satisfies the loader's correctness check
+ * (any bit set is "maybe present"; false-positives only cost a chain
+ * scan). */
+
+static void build_gnu_hash(Heap* h, LinkImage* img, LinkDynState* dyn,
+ const ByteBuf* dynstr) {
+ u32 hashed = (dyn->ndynsym > dyn->first_global)
+ ? (dyn->ndynsym - dyn->first_global)
+ : 0u;
+ u32 nbuckets = hashed ? hashed : 1u;
+ /* Round nbuckets up to next odd number. */
+ if ((nbuckets & 1u) == 0u) nbuckets += 1u;
+ u32 bloom_size = 1u; /* 64-bit word */
+ u32 bloom_shift = 6u;
+ u32 sym_offset = dyn->first_global;
+ u32 hdr_bytes = 16u; /* nbuckets/symoff/bloomsz/bloomshift */
+ u32 bloom_bytes = bloom_size * 8u;
+ u32 buckets_bytes = nbuckets * 4u;
+ u32 chains_bytes = hashed * 4u;
+ u32 total = hdr_bytes + bloom_bytes + buckets_bytes + chains_bytes;
+
+ u8* buf = (u8*)h->alloc(h, total ? total : 1u, 4);
+ if (!buf) compiler_panic(img->c, no_loc(), "link: oom on .gnu.hash");
+ memset(buf, 0, total);
+
+ wr_u32_le(buf + 0, nbuckets);
+ wr_u32_le(buf + 4, sym_offset);
+ wr_u32_le(buf + 8, bloom_size);
+ wr_u32_le(buf + 12, bloom_shift);
+
+ /* Bloom + buckets + chains. We need each hashed symbol's hash. */
+ if (hashed) {
+ u32 i;
+ u32* hashes = (u32*)h->alloc(h, sizeof(u32) * hashed, _Alignof(u32));
+ if (!hashes)
+ compiler_panic(img->c, no_loc(), "link: oom on .gnu.hash hashes");
+ for (i = 0; i < hashed; ++i) {
+ const DynSymRec* r = &dyn->dynsym[sym_offset + i];
+ const char* name = (const char*)dynstr->data + r->st_name;
+ size_t n = name ? slice_from_cstr(name).len : 0;
+ hashes[i] = gnu_hash_name(name, (u32)n);
+ }
+
+ /* Bloom filter: H[i] / H[i] >> shift */
+ u64 bloom = 0;
+ for (i = 0; i < hashed; ++i) {
+ u32 h1 = hashes[i] % 64u;
+ u32 h2 = (hashes[i] >> bloom_shift) % 64u;
+ bloom |= ((u64)1 << h1) | ((u64)1 << h2);
+ }
+ wr_u64_le(buf + hdr_bytes, bloom);
+
+ /* Buckets/chains: for each hashed sym, append to its bucket's
+ * chain. The chain encodes (hash & ~1) per entry; the LSB is set
+ * on the LAST entry in a bucket to terminate. Buckets are filled
+ * with the first chain index that hashes there (1-based into the
+ * dynsym, i.e. `sym_offset + i`). */
+ u32* buckets = (u32*)(buf + hdr_bytes + bloom_bytes);
+ u32* chains = (u32*)(buf + hdr_bytes + bloom_bytes + buckets_bytes);
+ /* First pass: bucket = first sym index that hashes there. */
+ for (i = 0; i < hashed; ++i) {
+ u32 b = hashes[i] % nbuckets;
+ if (buckets[b] == 0) buckets[b] = sym_offset + i;
+ }
+ /* Second pass: chain[i] = hash with LSB cleared; LSB set if next
+ * sym is in a different bucket. Walk symbols in order; LSB on
+ * chain[i] when sym i+1 is in a different bucket OR is the end. */
+ for (i = 0; i < hashed; ++i) {
+ u32 v = hashes[i] & ~1u;
+ int last = (i + 1 == hashed) ||
+ ((hashes[i + 1] % nbuckets) != (hashes[i] % nbuckets));
+ if (last) v |= 1u;
+ chains[i] = v;
+ }
+ /* Fix bucket→first-sym indices: if multiple syms share a bucket
+ * but were inserted out of contiguous order, we need them
+ * contiguous. We assumed contiguity above without enforcing it.
+ * For Phase 4 with small hashed sets this is fine, but flag the
+ * shortcut. */
+ h->free(h, hashes, sizeof(u32) * hashed);
+ }
+
+ dyn->gnu_hash = buf;
+ dyn->gnu_hash_len = total;
+}
+
+/* ---- .dynamic body builder ----
+ *
+ * Computed at layout time so the size is known before segments are
+ * placed. Each entry is two u64s (d_tag, d_un.d_val|d_un.d_ptr).
+ * Final entry is DT_NULL. The d_ptr fields that point at other
+ * synthetic sections are filled with image-relative vaddrs; the emit
+ * pass adds load-base / IMAGE_BASE only when ET_EXEC. */
+
+typedef struct DynEntry {
+ u64 tag;
+ u64 val; /* either d_val or d_ptr; emit just writes 8 bytes */
+} DynEntry;
+
+static u32 count_dynamic_entries(const LinkDynState* dyn) {
+ /* Required: DT_STRTAB DT_STRSZ DT_SYMTAB DT_SYMENT DT_GNU_HASH
+ * DT_FLAGS_1 (DF_1_NOW for eager binding)
+ * DT_NULL terminator
+ * Optional (only when there are .rela.dyn records):
+ * DT_RELA DT_RELASZ DT_RELAENT
+ * Optional (only when there are imported functions / a PLT):
+ * DT_PLTGOT DT_PLTRELSZ DT_PLTREL DT_JMPREL
+ * Plus DT_NEEDED per dependency. */
+ u32 n = dyn->nneeded;
+ n += 6; /* 5 fixed + DT_NULL */
+ if (dyn->cap_rela_dyn) n += 3; /* DT_RELA + DT_RELASZ + DT_RELAENT */
+ if (dyn->nrela_plt) n += 4; /* PLT-only entries */
+ return n;
+}
+
+/* ---- main entry ---- */
+
+void layout_dyn(Linker* l, LinkImage* img) {
+ Heap* h = img->heap;
+ LinkDynState* dyn;
+ ImportLists imports;
+ ByteBuf dynstr;
+ u64 page;
+ const LinkArchDesc* arch;
+ const ObjElfArchOps* elf_arch;
+
+ if (!l->emit_pie) return;
+
+ arch = link_arch_desc_for(l->c);
+ if (!arch)
+ compiler_panic(img->c, no_loc(), "link: layout_dyn: no arch descriptor");
+ {
+ const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_ELF);
+ elf_arch = fmt && fmt->elf_arch ? fmt->elf_arch(l->c->target.arch) : NULL;
+ if (!elf_arch)
+ compiler_panic(img->c, no_loc(),
+ "link: layout_dyn: no ELF arch descriptor");
+ }
+
+ dyn = (LinkDynState*)h->alloc(h, sizeof(*dyn), _Alignof(LinkDynState));
+ if (!dyn) compiler_panic(img->c, no_loc(), "link: oom on dyn state");
+ memset(dyn, 0, sizeof(*dyn));
+ img->dyn = dyn;
+ img->pie = 1;
+
+ /* PT_INTERP path. Default to the canonical musl loader matching the
+ * target arch (per-arch table in src/arch/<arch>/link.c) when the caller
+ * didn't set one. Drivers like cfree-cc always override via
+ * link_set_interp_path; this default is correctness for direct
+ * libcfree consumers. glibc users have to set their interp
+ * explicitly — we don't pick a default for them. */
+ dyn->interp_path =
+ l->interp_path
+ ? l->interp_path
+ : pool_intern_slice(l->c->global,
+ slice_from_cstr(elf_arch->default_musl_interp));
+
+ /* Step 1: enumerate imports + DT_NEEDED. */
+ collect_imports(l, img, h, &imports);
+ collect_needed(l, img, dyn);
+
+ /* Step 2: build .dynstr + .dynsym. .dynstr must also carry the
+ * DT_NEEDED soname strings the .dynamic body references; intern
+ * them after the import names so build_dynsym's de-dup also covers
+ * any name that happens to collide with a soname. */
+ bb_init(&dynstr, h);
+ build_dynsym(img, dyn, &imports, &dynstr);
+ {
+ u32 ni;
+ for (ni = 0; ni < dyn->nneeded; ++ni) {
+ Slice s_s = pool_slice(l->c->global, dyn->needed[ni]);
+ const char* s = s_s.s;
+ size_t slen = s_s.len;
+ if (s && slen) (void)bb_append_str(&dynstr, s, (u32)slen);
+ }
+ }
+ dyn->dynstr = dynstr.data;
+ dyn->dynstr_len = dynstr.len;
+
+ /* Step 3: .gnu.hash. */
+ build_gnu_hash(h, img, dyn, &dynstr);
+
+ /* Step 4: pre-size all the synthetic sections.
+ * .interp: strlen + 1
+ * .dynsym: 24 * ndynsym
+ * .dynstr: dynstr_len
+ * .gnu.hash: gnu_hash_len
+ * .rela.dyn: 24 * (ndatas + cap_relative) — we reserve 4096 entries
+ * for RELATIVE; emit fills them. (Quick-and-dirty: the
+ * static path never has so many internal absolute relocs.)
+ * .rela.plt: 24 * nfuncs
+ * .plt: 32 + 16 * nfuncs (PLT0 + per-slot)
+ * .got.plt: 8 * (3 + nfuncs)
+ * .dynamic: 16 * count_dynamic_entries
+ */
+ dyn->nplt = imports.nfuncs;
+ dyn->nrela_plt = imports.nfuncs;
+ dyn->rela_plt = imports.nfuncs
+ ? (DynRela*)h->alloc(h, sizeof(DynRela) * imports.nfuncs,
+ _Alignof(DynRela))
+ : NULL;
+ if (imports.nfuncs && !dyn->rela_plt)
+ compiler_panic(img->c, no_loc(), "link: oom on rela_plt");
+
+ /* RELA dyn: GLOB_DAT (one per imported abs-relocated symbol) +
+ * RELATIVE (one per PIE internal abs reloc against a defined sym).
+ * Phase 5 emits these dynamically during reloc-apply; pre-count the
+ * exact total here (img->relocs and the resolve-time `imported` flags
+ * are already settled by the time layout_dyn runs) so the section
+ * isn't padded with hundreds of trailing R_*_NONE records. */
+ u32 cap_rel = 0;
+ {
+ u32 ri;
+ for (ri = 0; ri < LinkRelocs_count(&img->relocs); ++ri) {
+ const LinkRelocApply* r = LinkRelocs_at(&img->relocs, ri);
+ const LinkSymbol* tgt = LinkSyms_at(&img->syms, r->target - 1);
+ if (r->kind != R_ABS32 && r->kind != R_ABS64) continue;
+ if (tgt->imported) {
+ cap_rel++; /* GLOB_DAT */
+ } else if (tgt->defined && tgt->kind != SK_ABS) {
+ cap_rel++; /* RELATIVE */
+ }
+ }
+ }
+ dyn->cap_rela_dyn = cap_rel;
+ dyn->rela_dyn =
+ dyn->cap_rela_dyn
+ ? (DynRela*)h->alloc(h, sizeof(DynRela) * dyn->cap_rela_dyn,
+ _Alignof(DynRela))
+ : NULL;
+ if (dyn->cap_rela_dyn && !dyn->rela_dyn)
+ compiler_panic(img->c, no_loc(), "link: oom on rela_dyn");
+ dyn->nrela_dyn = 0;
+
+ Slice interp_s = pool_slice(l->c->global, dyn->interp_path);
+ const char* interp_str = interp_s.s;
+ size_t namelen = interp_s.len;
+ u64 interp_bytes = (u64)namelen + 1u;
+ u64 dynsym_bytes = (u64)dyn->ndynsym * ELF64_SYM_SIZE;
+ u64 dynstr_bytes = (u64)dyn->dynstr_len;
+ u64 gnuhash_bytes = (u64)dyn->gnu_hash_len;
+ /* rela.dyn / rela.plt sized for full capacity; emit only writes
+ * what's populated, but the section's file_size matches capacity
+ * so PT_LOAD/.rela.dyn shdr sh_size add up. Trailing zero records
+ * are harmless to the loader (R_AARCH64_NONE). */
+ u64 rela_dyn_bytes = (u64)dyn->cap_rela_dyn * ELF64_RELA_SIZE;
+ u64 rela_plt_bytes = (u64)dyn->nrela_plt * ELF64_RELA_SIZE;
+ u64 plt_bytes =
+ (u64)(imports.nfuncs
+ ? arch->plt0_size + arch->plt_entry_size * imports.nfuncs
+ : 0u);
+ u64 gotplt_bytes = (u64)(imports.nfuncs ? 8u * (3u + imports.nfuncs) : 0u);
+ dyn->ndyn_entries = count_dynamic_entries(dyn);
+ u64 dynamic_bytes = (u64)dyn->ndyn_entries * ELF64_DYN_SIZE;
+
+ /* Step 5: place segments, page-aligned after the existing image
+ * span. New segments:
+ * ro_seg (PF_R) — .interp + .dynsym + .dynstr + .gnu.hash +
+ * .rela.dyn + .rela.plt
+ * rx_seg (PF_R+X)— .plt (only when imports.nfuncs > 0)
+ * rw_seg (PF_R+W)— .got.plt + .dynamic
+ *
+ * .dynamic lives in rw_seg because glibc's loader patches DT_*
+ * d_un.d_ptr fields in-place at startup (elf_get_dynamic_info
+ * adjusts STRTAB/SYMTAB/etc. by l_addr); a PF_R-only segment
+ * causes SEGV_ACCERR. musl's loader doesn't do this rewrite, but
+ * the RW placement is conventional and works for both.
+ */
+ page = 0x4000u; /* keep aligned with layout_page_size default */
+ {
+ /* Read the page size from layout_page_size by re-using the
+ * configured execmem if present — duplicates the helper rather
+ * than expose it; the value is only used for alignment. */
+ const CfreeExecMem* m = (l && l->jit_host) ? l->jit_host->execmem : NULL;
+ if (m && m->page_size) page = (u64)m->page_size;
+ }
+
+ u64 base_vaddr = 0;
+ u32 i;
+ for (i = 0; i < img->nsegments; ++i) {
+ u64 end = img->segments[i].vaddr + img->segments[i].mem_size;
+ if (end > base_vaddr) base_vaddr = end;
+ }
+ base_vaddr = ALIGN_UP(base_vaddr, page);
+
+ /* Pack ro section offsets (relative to ro_seg.vaddr). 8-byte
+ * alignment for tables; 4-byte for .interp string. */
+ u64 off = 0;
+ u64 interp_off = off;
+ off = ALIGN_UP(off + interp_bytes, 8u);
+ u64 dynsym_off = off;
+ off = ALIGN_UP(off + dynsym_bytes, 8u);
+ u64 dynstr_off = off;
+ off = ALIGN_UP(off + dynstr_bytes, 8u);
+ u64 gnuhash_off = off;
+ off = ALIGN_UP(off + gnuhash_bytes, 8u);
+ u64 rela_dyn_off = off;
+ off = ALIGN_UP(off + rela_dyn_bytes, 8u);
+ u64 rela_plt_off = off;
+ off = ALIGN_UP(off + rela_plt_bytes, 8u);
+ u64 ro_seg_size = off;
+
+ /* When no PLT is needed, suppress the RX/.plt segment entirely. */
+ int has_plt = imports.nfuncs > 0;
+
+ /* Pack rw_seg offsets: .got.plt (when has_plt) followed by .dynamic. */
+ u64 rw_off = 0;
+ u64 gotplt_off = rw_off;
+ if (has_plt) rw_off = ALIGN_UP(rw_off + gotplt_bytes, 8u);
+ u64 dynamic_off = rw_off;
+ rw_off = ALIGN_UP(rw_off + dynamic_bytes, 8u);
+ u64 rw_seg_size = rw_off;
+
+ u64 ro_vaddr = base_vaddr;
+ u64 rx_vaddr = ALIGN_UP(ro_vaddr + ro_seg_size, page);
+ u64 rw_vaddr = ALIGN_UP(rx_vaddr + (has_plt ? plt_bytes : 0u), page);
+
+ /* rw_seg always exists (it carries .dynamic). */
+ u32 nseg = 2u + (has_plt ? 1u : 0u);
+ u32 seg_base = dyn_alloc_segments(img, nseg);
+ u32 ro_seg_idx = seg_base + 0u;
+ u32 rx_seg_idx = has_plt ? seg_base + 1u : 0u;
+ u32 rw_seg_idx = seg_base + (has_plt ? 2u : 1u);
+
+ LinkSegment* ro_seg = &img->segments[ro_seg_idx];
+ memset(ro_seg, 0, sizeof(*ro_seg));
+ ro_seg->id = (LinkSegmentId)(ro_seg_idx + 1u);
+ ro_seg->flags = SF_ALLOC; /* PF_R */
+ ro_seg->file_offset = ro_vaddr;
+ ro_seg->vaddr = ro_vaddr;
+ ro_seg->file_size = ro_seg_size;
+ ro_seg->mem_size = ro_seg_size;
+ ro_seg->align = (u32)page;
+ ro_seg->nsections = 6;
+ img->segment_bytes[ro_seg_idx] =
+ ro_seg_size ? (u8*)h->alloc(h, (size_t)ro_seg_size, 16) : NULL;
+ img->segment_bytes_cap[ro_seg_idx] = (size_t)ro_seg_size;
+ if (ro_seg_size && !img->segment_bytes[ro_seg_idx])
+ compiler_panic(img->c, no_loc(), "link: oom on ro dyn segment");
+ if (ro_seg_size)
+ memset(img->segment_bytes[ro_seg_idx], 0, (size_t)ro_seg_size);
+
+ if (has_plt) {
+ LinkSegment* rx_seg = &img->segments[rx_seg_idx];
+ memset(rx_seg, 0, sizeof(*rx_seg));
+ rx_seg->id = (LinkSegmentId)(rx_seg_idx + 1u);
+ rx_seg->flags = SF_ALLOC | SF_EXEC;
+ rx_seg->file_offset = rx_vaddr;
+ rx_seg->vaddr = rx_vaddr;
+ rx_seg->file_size = plt_bytes;
+ rx_seg->mem_size = plt_bytes;
+ rx_seg->align = (u32)page;
+ rx_seg->nsections = 1;
+ img->segment_bytes[rx_seg_idx] = (u8*)h->alloc(h, (size_t)plt_bytes, 16);
+ img->segment_bytes_cap[rx_seg_idx] = (size_t)plt_bytes;
+ if (!img->segment_bytes[rx_seg_idx])
+ compiler_panic(img->c, no_loc(), "link: oom on .plt segment");
+ memset(img->segment_bytes[rx_seg_idx], 0, (size_t)plt_bytes);
+ /* Stash plt / got.plt vaddrs now — the PLT body emit just below
+ * reads them, and the post-shift fixup in shift_image_addresses
+ * (link_elf.c) keys on these fields too. */
+ dyn->plt_vaddr = rx_vaddr;
+ dyn->plt_size = plt_bytes;
+ dyn->got_plt_vaddr = rw_vaddr;
+ dyn->got_plt_size = gotplt_bytes;
+ /* PLT body emit: the descriptor owns the psABI-specific bytes. */
+ if (!arch->emit_plt0 || !arch->emit_plt_entry)
+ compiler_panic(l->c, no_loc(), "link: PLT emit not configured");
+ {
+ u8* plt_b = img->segment_bytes[rx_seg_idx];
+ u32 ki;
+ arch->emit_plt0(plt_b, dyn->plt_vaddr, dyn->got_plt_vaddr);
+ for (ki = 0; ki < imports.nfuncs; ++ki) {
+ u64 entry_vaddr = dyn->plt_vaddr + arch->plt0_size +
+ (u64)arch->plt_entry_size * (u64)ki;
+ u64 slot_vaddr = dyn->got_plt_vaddr + 8u * (3u + ki);
+ u8* p =
+ plt_b + arch->plt0_size + (size_t)arch->plt_entry_size * (size_t)ki;
+ arch->emit_plt_entry(p, entry_vaddr, slot_vaddr);
+ }
+ }
+ }
+ /* rw_seg always exists — it carries .dynamic, plus .got.plt when
+ * imports are present. */
+ {
+ LinkSegment* rw_seg = &img->segments[rw_seg_idx];
+ memset(rw_seg, 0, sizeof(*rw_seg));
+ rw_seg->id = (LinkSegmentId)(rw_seg_idx + 1u);
+ rw_seg->flags = SF_ALLOC | SF_WRITE;
+ rw_seg->file_offset = rw_vaddr;
+ rw_seg->vaddr = rw_vaddr;
+ rw_seg->file_size = rw_seg_size;
+ rw_seg->mem_size = rw_seg_size;
+ rw_seg->align = (u32)page;
+ rw_seg->nsections = has_plt ? 2u : 1u;
+ img->segment_bytes[rw_seg_idx] = (u8*)h->alloc(h, (size_t)rw_seg_size, 16);
+ img->segment_bytes_cap[rw_seg_idx] = (size_t)rw_seg_size;
+ if (!img->segment_bytes[rw_seg_idx])
+ compiler_panic(img->c, no_loc(), "link: oom on rw dyn segment");
+ /* Zero-initialize. .got.plt[0] (&.dynamic) is filled later, after
+ * shift_image_addresses has bumped dyn->dynamic_vaddr. .dynamic
+ * body is built post-shift in link_emit_elf. Loader
+ * patches all .got.plt slots from .rela.plt before user code
+ * under DF_1_NOW. */
+ memset(img->segment_bytes[rw_seg_idx], 0, (size_t)rw_seg_size);
+ }
+ img->nsegments += nseg;
+
+ /* Step 6: synthetic LinkSection entries. Order in img->sections
+ * matches the loader-friendly file order and feeds emit's
+ * outshdr-merge pass. */
+ u32 nsec = 7u + (has_plt ? 2u : 0u);
+ u32 sec_base = dyn_alloc_sections(img, nsec);
+
+ /* helper: populate a fresh LinkSection for a segment-internal range */
+ /* Inline because the args differ enough (sem, name) per slot. */
+ Sym name_interp = pool_intern_slice(l->c->global, SLICE_LIT(".interp"));
+ Sym name_dynsym = pool_intern_slice(l->c->global, SLICE_LIT(".dynsym"));
+ Sym name_dynstr = pool_intern_slice(l->c->global, SLICE_LIT(".dynstr"));
+ Sym name_gnu_hash = pool_intern_slice(l->c->global, SLICE_LIT(".gnu.hash"));
+ Sym name_rela_dyn = pool_intern_slice(l->c->global, SLICE_LIT(".rela.dyn"));
+ Sym name_rela_plt = pool_intern_slice(l->c->global, SLICE_LIT(".rela.plt"));
+ Sym name_dynamic = pool_intern_slice(l->c->global, SLICE_LIT(".dynamic"));
+ Sym name_plt = pool_intern_slice(l->c->global, SLICE_LIT(".plt"));
+ Sym name_got_plt = pool_intern_slice(l->c->global, SLICE_LIT(".got.plt"));
+
+#define INIT_SEC(IDX, NAME, SEG_IDX, OFF_IN_SEG, SIZE, ALIGN, FLAGS, SEM) \
+ do { \
+ LinkSection* ls = &img->sections[sec_base + (IDX)]; \
+ memset(ls, 0, sizeof(*ls)); \
+ ls->id = (LinkSectionId)(sec_base + (IDX) + 1u); \
+ ls->input_id = LINK_INPUT_NONE; \
+ ls->obj_section_id = OBJ_SEC_NONE; \
+ ls->segment_id = img->segments[(SEG_IDX)].id; \
+ ls->input_offset = (OFF_IN_SEG); \
+ ls->file_offset = img->segments[(SEG_IDX)].file_offset + (OFF_IN_SEG); \
+ ls->vaddr = img->segments[(SEG_IDX)].vaddr + (OFF_IN_SEG); \
+ ls->size = (SIZE); \
+ ls->flags = (FLAGS); \
+ ls->align = (ALIGN); \
+ ls->name = (NAME); \
+ ls->sem = (SEM); \
+ } while (0)
+
+ INIT_SEC(0, name_interp, ro_seg_idx, interp_off, interp_bytes, 1, SF_ALLOC,
+ SSEM_PROGBITS);
+ INIT_SEC(1, name_dynsym, ro_seg_idx, dynsym_off, dynsym_bytes, 8, SF_ALLOC,
+ SSEM_PROGBITS);
+ INIT_SEC(2, name_dynstr, ro_seg_idx, dynstr_off, dynstr_bytes, 1, SF_ALLOC,
+ SSEM_PROGBITS);
+ INIT_SEC(3, name_gnu_hash, ro_seg_idx, gnuhash_off, gnuhash_bytes, 8,
+ SF_ALLOC, SSEM_PROGBITS);
+ INIT_SEC(4, name_rela_dyn, ro_seg_idx, rela_dyn_off, rela_dyn_bytes, 8,
+ SF_ALLOC, SSEM_PROGBITS);
+ INIT_SEC(5, name_rela_plt, ro_seg_idx, rela_plt_off, rela_plt_bytes, 8,
+ SF_ALLOC, SSEM_PROGBITS);
+ INIT_SEC(6, name_dynamic, rw_seg_idx, dynamic_off, dynamic_bytes, 8,
+ SF_ALLOC | SF_WRITE, SSEM_PROGBITS);
+
+ dyn->sec_interp = (LinkSectionId)(sec_base + 0 + 1u);
+ dyn->sec_dynsym = (LinkSectionId)(sec_base + 1 + 1u);
+ dyn->sec_dynstr = (LinkSectionId)(sec_base + 2 + 1u);
+ dyn->sec_gnu_hash = (LinkSectionId)(sec_base + 3 + 1u);
+ dyn->sec_rela_dyn = (LinkSectionId)(sec_base + 4 + 1u);
+ dyn->sec_rela_plt = (LinkSectionId)(sec_base + 5 + 1u);
+ dyn->sec_dynamic = (LinkSectionId)(sec_base + 6 + 1u);
+ dyn->dynamic_vaddr = img->segments[rw_seg_idx].vaddr + dynamic_off;
+ dyn->dynamic_size = dynamic_bytes;
+
+ if (has_plt) {
+ INIT_SEC(7, name_plt, rx_seg_idx, 0, plt_bytes, 16, SF_ALLOC | SF_EXEC,
+ SSEM_PROGBITS);
+ INIT_SEC(8, name_got_plt, rw_seg_idx, gotplt_off, gotplt_bytes, 8,
+ SF_ALLOC | SF_WRITE, SSEM_PROGBITS);
+ dyn->sec_plt = (LinkSectionId)(sec_base + 7 + 1u);
+ dyn->sec_got_plt = (LinkSectionId)(sec_base + 8 + 1u);
+ }
+#undef INIT_SEC
+
+ img->nsections += nsec;
+
+ /* Step 7: copy .interp / .dynsym / .dynstr / .gnu.hash bytes into
+ * the ro segment. .dynamic body is built during emit (it embeds
+ * runtime vaddrs that PIE keeps image-relative; emit just reads
+ * the section ids' final vaddrs). */
+ u8* ro_bytes = img->segment_bytes[ro_seg_idx];
+
+ /* .interp */
+ if (interp_bytes && ro_bytes)
+ memcpy(ro_bytes + interp_off, interp_str, (size_t)interp_bytes);
+
+ /* .dynsym: serialize DynSymRec to ELF64 wire layout. */
+ {
+ u32 si;
+ for (si = 0; si < dyn->ndynsym; ++si) {
+ u8* p = ro_bytes + dynsym_off + (u64)si * ELF64_SYM_SIZE;
+ const DynSymRec* r = &dyn->dynsym[si];
+ wr_u32_le(p + 0, r->st_name);
+ p[4] = r->st_info;
+ p[5] = r->st_other;
+ wr_u16_le(p + 6, r->st_shndx);
+ wr_u64_le(p + 8, r->st_value);
+ wr_u64_le(p + 16, r->st_size);
+ }
+ }
+
+ /* .dynstr */
+ if (dynstr_bytes && ro_bytes && dyn->dynstr)
+ memcpy(ro_bytes + dynstr_off, dyn->dynstr, dyn->dynstr_len);
+
+ /* .gnu.hash */
+ if (gnuhash_bytes && ro_bytes && dyn->gnu_hash)
+ memcpy(ro_bytes + gnuhash_off, dyn->gnu_hash, dyn->gnu_hash_len);
+
+ /* .rela.plt: emit JUMP_SLOT records, one per imported function, and
+ * stash each import's PLT-entry vaddr in `sym_plt_vaddr` so the
+ * apply pass can redirect CALL26/JUMP26 against the import. The
+ * record's r_offset addresses the .got.plt slot the PLT stub reads
+ * through; the loader patches that slot to the resolved runtime
+ * address before user code runs (DF_1_NOW, BIND_NOW). Bytes are
+ * written here at pre-shift vaddrs; link_emit re-serializes them
+ * after shift_image_addresses bumps the dyn vaddrs by headers_load. */
+ {
+ u32 ki;
+ for (ki = 0; ki < imports.nfuncs; ++ki) {
+ LinkSymId lsid = imports.funcs[ki];
+ u32 dynidx = dyn->sym_dynidx[lsid];
+ u64 slot_vaddr = dyn->got_plt_vaddr + 8u * (3u + ki);
+ u64 plt_entry_vaddr = dyn->plt_vaddr + arch->plt0_size +
+ (u64)arch->plt_entry_size * (u64)ki;
+ DynRela* r = &dyn->rela_plt[ki];
+ r->r_offset = slot_vaddr;
+ r->r_info = ELF64_R_INFO((u64)dynidx, elf_arch->r_jump_slot);
+ r->r_addend = 0;
+ /* Serialize into segment bytes (will be re-serialized post-shift). */
+ u8* p = ro_bytes + rela_plt_off + (u64)ki * ELF64_RELA_SIZE;
+ wr_u64_le(p + 0, r->r_offset);
+ wr_u64_le(p + 8, r->r_info);
+ wr_u64_le(p + 16, (u64)r->r_addend);
+ /* sym_plt_vaddr is consulted by apply_all_relocs. */
+ dyn->sym_plt_vaddr[lsid] = plt_entry_vaddr;
+ }
+ }
+
+ /* .rela.dyn entries (GLOB_DAT for imports referenced via .got, and
+ * RELATIVE for PIE internal abs fixups) are emitted by
+ * apply_all_relocs as it walks every relocation. layout_dyn
+ * leaves .rela.dyn empty here; the bytes are written post-shift in
+ * link_emit_elf. */
+
+ /* .got.plt prelude: for BIND_NOW we leave the body zero — the
+ * loader patches every slot from .rela.plt before user code. Some
+ * loaders still inspect slot 0 (&.dynamic) at startup; provide it
+ * so glibc-style loaders don't fault. The loader writes the link_map
+ * cookie into slot 1 at load time. */
+ if (has_plt) {
+ u8* gp_bytes = img->segment_bytes[rw_seg_idx];
+ if (gp_bytes && gotplt_bytes >= 8u) {
+ wr_u64_le(gp_bytes, dyn->dynamic_vaddr);
+ /* Slots 1, 2, and per-PLT slots stay zero until the loader
+ * fills them. Phase 5 would prefill the per-PLT slots with
+ * the address of PLT0 to support lazy binding. */
+ }
+ }
+
+ /* The .dynamic body is built later, after segment shifts are
+ * applied during emit (link_elf.c). emit_dynamic_body takes the
+ * post-shift vaddrs of every other dyn section and writes one
+ * DT_* entry per index. */
+
+ /* Synthesize linker-defined symbols that reference the .dynamic
+ * vaddr. Scrt1.o on Linux loads `_DYNAMIC` via ADRP+ADD, and
+ * libc_nonshared.a's atexit shim takes `__dso_handle` as the
+ * per-image identity (we use the .dynamic vaddr — any stable
+ * per-image address satisfies the contract since the shim only
+ * passes it through to __cxa_atexit, which the program-side glibc
+ * just stashes). */
+ link_define_boundary(l, img, "_DYNAMIC", dyn->dynamic_vaddr);
+ link_define_boundary(l, img, "__dso_handle", dyn->dynamic_vaddr);
+
+ free_imports(h, &imports);
+}
+
+/* ---- cleanup ---- */
+
+void link_dyn_state_free(LinkImage* img) {
+ Heap* h = img->heap;
+ LinkDynState* dyn = img->dyn;
+ if (!dyn) return;
+ if (dyn->dynsym) h->free(h, dyn->dynsym, sizeof(*dyn->dynsym) * dyn->ndynsym);
+ if (dyn->dynstr) h->free(h, dyn->dynstr, dyn->dynstr_len);
+ if (dyn->gnu_hash) h->free(h, dyn->gnu_hash, dyn->gnu_hash_len);
+ if (dyn->rela_dyn)
+ h->free(h, dyn->rela_dyn, sizeof(*dyn->rela_dyn) * dyn->cap_rela_dyn);
+ if (dyn->rela_plt)
+ h->free(h, dyn->rela_plt, sizeof(*dyn->rela_plt) * dyn->nrela_plt);
+ if (dyn->needed) h->free(h, dyn->needed, sizeof(*dyn->needed) * dyn->nneeded);
+ if (dyn->sym_dynidx)
+ h->free(h, dyn->sym_dynidx,
+ sizeof(*dyn->sym_dynidx) * dyn->sym_dynidx_size);
+ if (dyn->sym_plt_vaddr)
+ h->free(h, dyn->sym_plt_vaddr,
+ sizeof(*dyn->sym_plt_vaddr) * dyn->sym_dynidx_size);
+ h->free(h, dyn, sizeof(*dyn));
+ img->dyn = NULL;
+}
diff --git a/src/obj/elf/read.c b/src/obj/elf/read.c
@@ -0,0 +1,694 @@
+/* ELF ET_REL reader. Parses a 64-bit little-endian relocatable object
+ * back into a fresh ObjBuilder. The post-finalize ObjBuilder shape is
+ * the canonical superset doc/DESIGN.md §5.5 promises: read_elf of an
+ * emit_elf output produces an ObjBuilder equivalent to the writer's
+ * input, modulo (a) section ordering and (b) STT_SECTION symbols
+ * synthesized by the writer.
+ *
+ * Scope: AArch64 little-endian. Other archs / endianness produce a
+ * compiler_panic with a diagnostic. */
+
+#include <string.h>
+
+#include "core/heap.h"
+#include "core/pool.h"
+#include "core/slice.h"
+#include "obj/elf/elf.h"
+#include "obj/format.h"
+
+static SrcLoc no_loc(void) {
+ SrcLoc l = {0, 0, 0};
+ return l;
+}
+
+/* ---- shdr scratch struct ---- */
+
+typedef struct ShdrRec {
+ u32 sh_name;
+ u32 sh_type;
+ u64 sh_flags;
+ u64 sh_addr;
+ u64 sh_offset;
+ u64 sh_size;
+ u32 sh_link;
+ u32 sh_info;
+ u64 sh_addralign;
+ u64 sh_entsize;
+} ShdrRec;
+
+static void parse_shdr(const u8* p, ShdrRec* out) {
+ out->sh_name = elf_rd_u32(p + 0);
+ out->sh_type = elf_rd_u32(p + 4);
+ out->sh_flags = elf_rd_u64(p + 8);
+ out->sh_addr = elf_rd_u64(p + 16);
+ out->sh_offset = elf_rd_u64(p + 24);
+ out->sh_size = elf_rd_u64(p + 32);
+ out->sh_link = elf_rd_u32(p + 40);
+ out->sh_info = elf_rd_u32(p + 44);
+ out->sh_addralign = elf_rd_u64(p + 48);
+ out->sh_entsize = elf_rd_u64(p + 56);
+}
+
+/* ---- mappers ---- */
+
+/* The bits this function maps to SecFlag — anything outside this mask is
+ * treated as opaque and stashed in Section.ext_flags by the caller so the
+ * emitter can write it back unchanged. Examples of bits left over:
+ * SHF_EXCLUDE (0x80000000) on .llvm_addrsig, SHF_COMPRESSED (0x800) on
+ * compressed .debug_*, SHF_INFO_LINK (0x40) on .rela.* sections. */
+#define ELF_KNOWN_FLAGS_MASK \
+ ((u64)(SHF_ALLOC | SHF_EXECINSTR | SHF_WRITE | SHF_TLS | SHF_MERGE | \
+ SHF_STRINGS | SHF_GROUP | SHF_LINK_ORDER | SHF_GNU_RETAIN))
+
+static u16 elf_flags_to_obj(u64 f) {
+ u16 r = 0;
+ if (f & SHF_ALLOC) r |= SF_ALLOC;
+ if (f & SHF_EXECINSTR) r |= SF_EXEC;
+ if (f & SHF_WRITE) r |= SF_WRITE;
+ if (f & SHF_TLS) r |= SF_TLS;
+ if (f & SHF_MERGE) r |= SF_MERGE;
+ if (f & SHF_STRINGS) r |= SF_STRINGS;
+ if (f & SHF_GROUP) r |= SF_GROUP;
+ if (f & SHF_LINK_ORDER) r |= SF_LINK_ORDER;
+ if (f & SHF_GNU_RETAIN) r |= SF_RETAIN;
+ return r;
+}
+
+/* Map ELF sh_type -> SecSem. Sets *known to 1 if the value is one of
+ * the canonical types the cfree model knows about; 0 means the caller
+ * fell through to the SSEM_PROGBITS fallback and should preserve the
+ * raw sh_type via Section.ext_type so emit_elf can write it back. */
+static u16 elf_type_to_sem(u32 t, int* known) {
+ *known = 1;
+ switch (t) {
+ case SHT_PROGBITS:
+ return SSEM_PROGBITS;
+ case SHT_NOBITS:
+ return SSEM_NOBITS;
+ case SHT_SYMTAB:
+ return SSEM_SYMTAB;
+ case SHT_STRTAB:
+ return SSEM_STRTAB;
+ case SHT_RELA:
+ return SSEM_RELA;
+ case SHT_REL:
+ return SSEM_REL;
+ case SHT_NOTE:
+ return SSEM_NOTE;
+ case SHT_INIT_ARRAY:
+ return SSEM_INIT_ARRAY;
+ case SHT_FINI_ARRAY:
+ return SSEM_FINI_ARRAY;
+ case SHT_PREINIT_ARRAY:
+ return SSEM_PREINIT_ARRAY;
+ case SHT_GROUP:
+ return SSEM_GROUP;
+ default:
+ *known = 0;
+ return SSEM_PROGBITS;
+ }
+}
+
+static u16 elf_kind_from_name(const char* name, u32 nlen, u64 sh_flags,
+ u32 sh_type) {
+ if (sh_type == SHT_NOBITS) return SEC_BSS;
+ if (nlen >= 5 && memcmp(name, ".text", 5) == 0) return SEC_TEXT;
+ if (nlen >= 7 && memcmp(name, ".rodata", 7) == 0) return SEC_RODATA;
+ if (nlen >= 5 && memcmp(name, ".data", 5) == 0) return SEC_DATA;
+ if (nlen >= 4 && memcmp(name, ".bss", 4) == 0) return SEC_BSS;
+ if (nlen >= 7 && memcmp(name, ".debug_", 7) == 0) return SEC_DEBUG;
+ /* Fallback: classify by flags. */
+ if (sh_flags & SHF_EXECINSTR) return SEC_TEXT;
+ if (sh_flags & SHF_WRITE) return SEC_DATA;
+ if (sh_flags & SHF_ALLOC) return SEC_RODATA;
+ return SEC_OTHER;
+}
+
+static u16 elf_bind_to_obj(u32 b) {
+ switch (b) {
+ case STB_GLOBAL:
+ return SB_GLOBAL;
+ case STB_WEAK:
+ return SB_WEAK;
+ default:
+ return SB_LOCAL;
+ }
+}
+
+static u16 elf_type_to_kind(u32 t, u16 shndx) {
+ if (shndx == SHN_UNDEF) return SK_UNDEF;
+ if (shndx == SHN_COMMON) return SK_COMMON;
+ /* SHN_ABS is the convention for STT_FILE and a few other defined
+ * symbols whose value is not an address. Don't smother the type
+ * with SK_ABS when the type field carries real information — only
+ * fall through to SK_ABS for STT_NOTYPE-at-SHN_ABS. */
+ if (shndx == SHN_ABS && t == STT_NOTYPE) return SK_ABS;
+ switch (t) {
+ case STT_FUNC:
+ return SK_FUNC;
+ case STT_OBJECT:
+ return SK_OBJ;
+ case STT_SECTION:
+ return SK_SECTION;
+ case STT_FILE:
+ return SK_FILE;
+ case STT_TLS:
+ return SK_TLS;
+ case STT_COMMON:
+ return SK_COMMON;
+ case STT_GNU_IFUNC:
+ return SK_IFUNC;
+ default:
+ /* STT_NOTYPE on a defined symbol (e.g. AArch64 mapping symbols
+ * `$x` / `$d`, or assembly labels) round-trips as SK_NOTYPE.
+ * The linker keeps definedness keyed on SK_UNDEF; SK_NOTYPE is
+ * "defined but typeless". */
+ return SK_NOTYPE;
+ }
+}
+
+static u8 elf_other_to_vis(u32 other) {
+ switch (other & 3) {
+ case STV_HIDDEN:
+ return SV_HIDDEN;
+ case STV_PROTECTED:
+ return SV_PROTECTED;
+ case STV_INTERNAL:
+ return SV_INTERNAL;
+ default:
+ return SV_DEFAULT;
+ }
+}
+
+/* Bounds-checked C-string slice from a strtab section. Returns "" on
+ * out-of-range so callers don't have to special-case it. `len_out` is
+ * set to the result's byte length. */
+static const char* strtab_lookup(const u8* tab, u64 tab_size, u32 off,
+ u32* len_out) {
+ if (off >= tab_size) {
+ *len_out = 0;
+ return "";
+ }
+ const char* s = (const char*)(tab + off);
+ u32 max = (u32)(tab_size - off);
+ u32 n = 0;
+ while (n < max && s[n] != '\0') ++n;
+ *len_out = n;
+ return s;
+}
+
+ObjBuilder* read_elf(Compiler* c, const char* name, const u8* data,
+ size_t len) {
+ (void)name;
+
+ if (len < ELF64_EHDR_SIZE)
+ compiler_panic(c, no_loc(), "read_elf: input shorter than ELF header");
+
+ if (data[EI_MAG0] != ELFMAG0 || data[EI_MAG1] != ELFMAG1 ||
+ data[EI_MAG2] != ELFMAG2 || data[EI_MAG3] != ELFMAG3)
+ compiler_panic(c, no_loc(), "read_elf: bad ELF magic");
+
+ if (data[EI_CLASS] != ELFCLASS64)
+ compiler_panic(c, no_loc(), "read_elf: not ELFCLASS64 (got %u)",
+ data[EI_CLASS]);
+ if (data[EI_DATA] != ELFDATA2LSB)
+ compiler_panic(c, no_loc(), "read_elf: not ELFDATA2LSB (got %u)",
+ data[EI_DATA]);
+
+ u16 e_type = elf_rd_u16(data + 16);
+ if (e_type != ET_REL)
+ compiler_panic(
+ c, no_loc(),
+ "read_elf: only ET_REL inputs are accepted by read_elf "
+ "(got e_type=%u); use read_elf_dso for ET_DYN shared objects",
+ (u32)e_type);
+
+ u16 e_machine = elf_rd_u16(data + 18);
+ const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_ELF);
+ const ObjElfArchOps* arch =
+ fmt && fmt->elf_machine ? fmt->elf_machine(e_machine) : NULL;
+ u32 (*reloc_from)(u32);
+ if (!arch || !arch->reloc_from) {
+ compiler_panic(c, no_loc(), "read_elf: unsupported e_machine 0x%x",
+ (u32)e_machine);
+ }
+ reloc_from = arch->reloc_from;
+
+ u64 e_shoff = elf_rd_u64(data + 40);
+ u32 e_flags = elf_rd_u32(data + 48);
+ u16 e_shentsize = elf_rd_u16(data + 58);
+ u16 e_shnum = elf_rd_u16(data + 60);
+ u16 e_shstrndx = elf_rd_u16(data + 62);
+
+ if (e_shentsize != ELF64_SHDR_SIZE)
+ compiler_panic(c, no_loc(), "read_elf: unexpected e_shentsize %u",
+ (u32)e_shentsize);
+ if (e_shoff + (u64)e_shnum * ELF64_SHDR_SIZE > len)
+ compiler_panic(c, no_loc(), "read_elf: section header table out of range");
+ if (e_shstrndx >= e_shnum)
+ compiler_panic(c, no_loc(), "read_elf: e_shstrndx %u >= e_shnum %u",
+ (u32)e_shstrndx, (u32)e_shnum);
+
+ /* Parse all shdrs into scratch. */
+ ShdrRec* shdrs = arena_array(c->scratch, ShdrRec, e_shnum);
+ for (u32 i = 0; i < e_shnum; ++i)
+ parse_shdr(data + e_shoff + (u64)i * ELF64_SHDR_SIZE, &shdrs[i]);
+
+ const ShdrRec* shstr_sh = &shdrs[e_shstrndx];
+ if (shstr_sh->sh_offset + shstr_sh->sh_size > len)
+ compiler_panic(c, no_loc(), "read_elf: .shstrtab out of range");
+ const u8* shstrtab = data + shstr_sh->sh_offset;
+ u64 shstrtab_sz = shstr_sh->sh_size;
+
+ /* Build the ObjBuilder. */
+ ObjBuilder* ob = obj_new(c);
+ if (!ob) compiler_panic(c, no_loc(), "read_elf: obj_new failed");
+ obj_set_elf_e_flags(ob, e_flags);
+
+ /* elf_to_obj[shndx] -> ObjSecId, OBJ_SEC_NONE for skipped sections. */
+ u32* elf_to_obj = arena_zarray(c->scratch, u32, e_shnum);
+
+ /* Pass 1: create obj sections for every non-NULL shdr that carries
+ * load-bearing model state. SYMTAB / STRTAB / RELA / REL are
+ * consumed below for symbols and relocations and do NOT round-trip
+ * as obj sections — emit_elf re-synthesizes them from the
+ * ObjBuilder's symbols / strtab / relocs. The shstrtab is a STRTAB
+ * too, so it falls out the same way. */
+ for (u32 i = 1; i < e_shnum; ++i) {
+ const ShdrRec* sh = &shdrs[i];
+ if (sh->sh_type == SHT_NULL) continue;
+ if (sh->sh_type == SHT_SYMTAB) continue;
+ if (sh->sh_type == SHT_STRTAB) continue;
+ if (sh->sh_type == SHT_RELA) continue;
+ if (sh->sh_type == SHT_REL) continue;
+ /* SHT_GROUP is consumed below into an ObjGroup record (signature
+ * symbol + member ObjSecIds). emit_elf re-synthesizes the group
+ * section bytes from the ObjGroup, using current section indices
+ * — so the original section's raw body would be stale anyway. */
+ if (sh->sh_type == SHT_GROUP) continue;
+
+ u32 nlen;
+ const char* nm = strtab_lookup(shstrtab, shstrtab_sz, sh->sh_name, &nlen);
+ Sym sym = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen});
+
+ u16 sec_kind = elf_kind_from_name(nm, nlen, sh->sh_flags, sh->sh_type);
+ int type_known;
+ u16 sec_sem = elf_type_to_sem(sh->sh_type, &type_known);
+ u16 flags = elf_flags_to_obj(sh->sh_flags);
+ u32 align = sh->sh_addralign ? (u32)sh->sh_addralign : 1;
+
+ ObjSecId id =
+ obj_section_ex(ob, sym, (SecKind)sec_kind, (SecSem)sec_sem, flags,
+ align, (u32)sh->sh_entsize, sh->sh_link, sh->sh_info);
+ if (id == OBJ_SEC_NONE)
+ compiler_panic(c, no_loc(), "read_elf: obj_section_ex failed for '%.*s'",
+ SLICE_ARG(((Slice){.s = nm, .len = nlen})));
+ elf_to_obj[i] = id;
+
+ /* Preserve format-specific bits the canonical SecSem/SecFlag
+ * mapping can't represent so emit_elf can write them back
+ * verbatim. ext_type only set when the sh_type fell through
+ * to the "unknown" path. */
+ u32 leftover = (u32)(sh->sh_flags & ~ELF_KNOWN_FLAGS_MASK);
+ if (!type_known || leftover) {
+ obj_section_set_ext(ob, id, OBJ_EXT_ELF, type_known ? 0 : sh->sh_type,
+ leftover);
+ }
+
+ /* Body bytes. */
+ if (sh->sh_type == SHT_NOBITS) {
+ obj_reserve_bss(ob, id, (u32)sh->sh_size, align);
+ } else if (sh->sh_size) {
+ if (sh->sh_offset + sh->sh_size > len)
+ compiler_panic(c, no_loc(),
+ "read_elf: section '%.*s' bytes out of range",
+ SLICE_ARG(((Slice){.s = nm, .len = nlen})));
+ /* For SYMTAB/STRTAB/RELA we still copy the raw bytes — the
+ * post-finalize shape contract says these sections are
+ * present; emit_elf will regenerate them on re-emit, so the
+ * preserved bytes are informational rather than load-bearing.
+ */
+ obj_write(ob, id, data + sh->sh_offset, (size_t)sh->sh_size);
+ }
+ }
+
+ /* Pass 2: parse the .symtab into ObjSyms, building an
+ * elf_sym_idx -> ObjSymId table. There may be zero or one SYMTAB in
+ * an ET_REL; pick the first. */
+ u32 symtab_shndx = 0;
+ for (u32 i = 1; i < e_shnum; ++i) {
+ if (shdrs[i].sh_type == SHT_SYMTAB) {
+ symtab_shndx = i;
+ break;
+ }
+ }
+
+ u32 nsyms = 0;
+ u32* sym_elf_to_obj = NULL;
+
+ if (symtab_shndx) {
+ const ShdrRec* sh = &shdrs[symtab_shndx];
+ if (sh->sh_entsize != ELF64_SYM_SIZE)
+ compiler_panic(c, no_loc(), "read_elf: .symtab entsize %llu != %u",
+ (unsigned long long)sh->sh_entsize, (u32)ELF64_SYM_SIZE);
+ if (sh->sh_size % ELF64_SYM_SIZE)
+ compiler_panic(c, no_loc(),
+ "read_elf: .symtab size %llu not a multiple of %u",
+ (unsigned long long)sh->sh_size, (u32)ELF64_SYM_SIZE);
+ if (sh->sh_link >= e_shnum)
+ compiler_panic(c, no_loc(), "read_elf: .symtab sh_link %u out of range",
+ sh->sh_link);
+ const ShdrRec* str_sh = &shdrs[sh->sh_link];
+ if (str_sh->sh_offset + str_sh->sh_size > len)
+ compiler_panic(c, no_loc(), "read_elf: .strtab out of range");
+ const u8* strtab = data + str_sh->sh_offset;
+ u64 strtab_sz = str_sh->sh_size;
+
+ nsyms = (u32)(sh->sh_size / ELF64_SYM_SIZE);
+ sym_elf_to_obj = arena_zarray(c->scratch, u32, nsyms ? nsyms : 1);
+
+ const u8* base = data + sh->sh_offset;
+ for (u32 i = 1; i < nsyms; ++i) { /* skip index 0 */
+ const u8* p = base + (u64)i * ELF64_SYM_SIZE;
+ u32 st_name = elf_rd_u32(p + 0);
+ u8 st_info = p[4];
+ u8 st_other = p[5];
+ u16 st_shndx = elf_rd_u16(p + 6);
+ u64 st_value = elf_rd_u64(p + 8);
+ u64 st_size = elf_rd_u64(p + 16);
+
+ u32 nlen;
+ const char* nm = strtab_lookup(strtab, strtab_sz, st_name, &nlen);
+ Sym sn = nlen
+ ? pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen})
+ : 0;
+
+ u32 e_bind = ELF64_ST_BIND(st_info);
+ u32 e_type = ELF64_ST_TYPE(st_info);
+ u16 bind = elf_bind_to_obj(e_bind);
+ u16 kind = elf_type_to_kind(e_type, st_shndx);
+ u8 vis = elf_other_to_vis(st_other);
+
+ ObjSecId sec_id;
+ u64 value;
+ u64 cmnalign = 0;
+ if (st_shndx == SHN_UNDEF) {
+ sec_id = OBJ_SEC_NONE;
+ value = st_value;
+ } else if (st_shndx == SHN_ABS || st_shndx == SHN_COMMON) {
+ sec_id = OBJ_SEC_NONE;
+ value = st_value;
+ if (st_shndx == SHN_COMMON) cmnalign = st_value;
+ } else if (st_shndx < e_shnum) {
+ sec_id = elf_to_obj[st_shndx];
+ value = st_value;
+ } else {
+ compiler_panic(c, no_loc(), "read_elf: symbol shndx %u out of range",
+ (u32)st_shndx);
+ sec_id = OBJ_SEC_NONE;
+ value = 0; /* unreachable */
+ }
+
+ ObjSymId id =
+ obj_symbol_ex(ob, sn, (SymBind)bind, (SymVis)vis, (SymKind)kind,
+ sec_id, value, st_size, cmnalign);
+ obj_sym_mark_referenced(ob, id);
+ sym_elf_to_obj[i] = id;
+ }
+ }
+
+ /* Pass 3: parse each SHT_RELA / SHT_REL into ObjBuilder relocations
+ * targeting the section the rela header's sh_info points at. */
+ for (u32 i = 1; i < e_shnum; ++i) {
+ const ShdrRec* sh = &shdrs[i];
+ int is_rela = (sh->sh_type == SHT_RELA);
+ int is_rel = (sh->sh_type == SHT_REL);
+ if (!is_rela && !is_rel) continue;
+
+ u32 entsize = is_rela ? ELF64_RELA_SIZE : 16;
+ if (sh->sh_entsize != entsize)
+ compiler_panic(c, no_loc(), "read_elf: rela entsize %llu != %u",
+ (unsigned long long)sh->sh_entsize, entsize);
+ if (sh->sh_info == 0 || sh->sh_info >= e_shnum)
+ compiler_panic(c, no_loc(), "read_elf: rela sh_info %u out of range",
+ sh->sh_info);
+ ObjSecId target = elf_to_obj[sh->sh_info];
+ if (target == OBJ_SEC_NONE) continue;
+
+ u32 nrec = (u32)(sh->sh_size / entsize);
+ const u8* base = data + sh->sh_offset;
+ for (u32 j = 0; j < nrec; ++j) {
+ const u8* p = base + (u64)j * entsize;
+ u64 r_offset = elf_rd_u64(p + 0);
+ u64 r_info = elf_rd_u64(p + 8);
+ i64 r_addend = is_rela ? (i64)elf_rd_u64(p + 16) : 0;
+ u32 esym = ELF64_R_SYM(r_info);
+ u32 etype = ELF64_R_TYPE(r_info);
+
+ u32 kind = reloc_from(etype);
+ if (kind == (u32)-1)
+ compiler_panic(c, no_loc(),
+ "read_elf: unsupported reloc type %u for e_machine 0x%x",
+ etype, (u32)e_machine);
+
+ ObjSymId target_sym = OBJ_SYM_NONE;
+ if (esym && sym_elf_to_obj && esym < nsyms)
+ target_sym = sym_elf_to_obj[esym];
+
+ obj_reloc_ex(ob, target, (u32)r_offset, (RelocKind)kind, target_sym,
+ r_addend, is_rela ? 1 : 0, 0);
+ }
+ }
+
+ /* Pass 4: SHT_GROUP. Each GROUP section's body is a sequence of
+ * 4-byte LE indices: [flags, shndx, shndx, ...]. The signature is
+ * the symbol named by sh_link/sh_info convention (sh_link=symtab,
+ * sh_info=symbol index in that symtab). */
+ for (u32 i = 1; i < e_shnum; ++i) {
+ const ShdrRec* sh = &shdrs[i];
+ if (sh->sh_type != SHT_GROUP) continue;
+
+ if (sh->sh_size < 4 || (sh->sh_size % 4)) continue;
+ const u8* p = data + sh->sh_offset;
+ u32 flags = elf_rd_u32(p);
+ u32 nm_len;
+ const char* gnm =
+ strtab_lookup(shstrtab, shstrtab_sz, sh->sh_name, &nm_len);
+ Sym gname = pool_intern_slice(c->global, (Slice){.s = gnm, .len = nm_len});
+
+ ObjSymId signature = OBJ_SYM_NONE;
+ if (sym_elf_to_obj && sh->sh_info < nsyms)
+ signature = sym_elf_to_obj[sh->sh_info];
+
+ ObjGroupId gid = obj_group(ob, gname, signature, flags);
+ u32 n = (u32)(sh->sh_size / 4) - 1;
+ for (u32 j = 0; j < n; ++j) {
+ u32 shndx = elf_rd_u32(p + 4 + j * 4);
+ if (shndx < e_shnum && elf_to_obj[shndx] != OBJ_SEC_NONE)
+ obj_group_add_section(ob, gid, elf_to_obj[shndx]);
+ }
+ }
+
+ obj_finalize(ob);
+ return ob;
+}
+
+/* ---- ET_DYN (shared object) reader ----
+ *
+ * Produces an ObjBuilder containing only the DSO's exported symbols
+ * (parsed from .dynsym, not .symtab). The DSO's sections, relocations,
+ * and groups are skipped — DSOs contribute no bytes to the output
+ * image. The DT_SONAME (if any) is interned and returned via
+ * `*soname_out` so the caller can record DT_NEEDED at link time.
+ *
+ * Symbol shape: each defined dynsym entry produces an ObjSym whose
+ * (bind, kind, vis) match the source. `section_id` is OBJ_SEC_NONE —
+ * the symbol's value is its DSO-internal vaddr, not meaningful to the
+ * consuming linker, so we record `value=0`. The linker layer
+ * (resolve_undefs) only consults the name and the defined-ness flag.
+ *
+ * Undefined dynsym entries (st_shndx==SHN_UNDEF) are imports the DSO
+ * itself has against other libraries; they're not relevant to a
+ * consumer that's linking against this DSO and are dropped. */
+
+static int parse_phdr(const u8* data, size_t len, u64 e_phoff, u16 e_phentsize,
+ u16 e_phnum, u32 want_type, u64* out_offset,
+ u64* out_filesz) {
+ u32 i;
+ if (e_phentsize != ELF64_PHDR_SIZE) return 0;
+ if (e_phoff + (u64)e_phnum * ELF64_PHDR_SIZE > len) return 0;
+ for (i = 0; i < e_phnum; ++i) {
+ const u8* p = data + e_phoff + (u64)i * ELF64_PHDR_SIZE;
+ u32 p_type = elf_rd_u32(p + 0);
+ if (p_type != want_type) continue;
+ *out_offset = elf_rd_u64(p + 8);
+ *out_filesz = elf_rd_u64(p + 32);
+ return 1;
+ }
+ return 0;
+}
+
+ObjBuilder* read_elf_dso(Compiler* c, const char* name, const u8* data,
+ size_t len, Sym* soname_out) {
+ (void)name;
+ if (soname_out) *soname_out = 0;
+
+ if (len < ELF64_EHDR_SIZE)
+ compiler_panic(c, no_loc(), "read_elf_dso: input shorter than ELF header");
+ if (data[EI_MAG0] != ELFMAG0 || data[EI_MAG1] != ELFMAG1 ||
+ data[EI_MAG2] != ELFMAG2 || data[EI_MAG3] != ELFMAG3)
+ compiler_panic(c, no_loc(), "read_elf_dso: bad ELF magic");
+ if (data[EI_CLASS] != ELFCLASS64)
+ compiler_panic(c, no_loc(), "read_elf_dso: not ELFCLASS64");
+ if (data[EI_DATA] != ELFDATA2LSB)
+ compiler_panic(c, no_loc(), "read_elf_dso: not ELFDATA2LSB");
+
+ u16 e_type = elf_rd_u16(data + 16);
+ if (e_type != ET_DYN)
+ compiler_panic(c, no_loc(), "read_elf_dso: expected ET_DYN, got e_type=%u",
+ (u32)e_type);
+
+ u16 e_machine = elf_rd_u16(data + 18);
+ {
+ const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_ELF);
+ const ObjElfArchOps* arch =
+ fmt && fmt->elf_machine ? fmt->elf_machine(e_machine) : NULL;
+ if (!arch)
+ compiler_panic(c, no_loc(), "read_elf_dso: unsupported e_machine 0x%x",
+ (u32)e_machine);
+ }
+
+ u64 e_phoff = elf_rd_u64(data + 32);
+ u64 e_shoff = elf_rd_u64(data + 40);
+ u16 e_phentsize = elf_rd_u16(data + 54);
+ u16 e_phnum = elf_rd_u16(data + 56);
+ u16 e_shentsize = elf_rd_u16(data + 58);
+ u16 e_shnum = elf_rd_u16(data + 60);
+ u16 e_shstrndx = elf_rd_u16(data + 62);
+
+ if (e_shentsize != ELF64_SHDR_SIZE)
+ compiler_panic(c, no_loc(), "read_elf_dso: unexpected e_shentsize %u",
+ (u32)e_shentsize);
+ if (e_shoff + (u64)e_shnum * ELF64_SHDR_SIZE > len)
+ compiler_panic(c, no_loc(),
+ "read_elf_dso: section header table out of range");
+ if (e_shstrndx >= e_shnum)
+ compiler_panic(c, no_loc(), "read_elf_dso: e_shstrndx out of range");
+
+ ShdrRec* shdrs = arena_array(c->scratch, ShdrRec, e_shnum);
+ for (u32 i = 0; i < e_shnum; ++i)
+ parse_shdr(data + e_shoff + (u64)i * ELF64_SHDR_SIZE, &shdrs[i]);
+
+ /* Locate .dynsym (preferred over .symtab — a stripped DSO carries
+ * only .dynsym) and its associated strtab via sh_link. */
+ u32 dynsym_idx = 0, dynamic_idx = 0;
+ for (u32 i = 1; i < e_shnum; ++i) {
+ if (shdrs[i].sh_type == SHT_DYNSYM && !dynsym_idx) dynsym_idx = i;
+ if (shdrs[i].sh_type == SHT_DYNAMIC && !dynamic_idx) dynamic_idx = i;
+ }
+
+ if (!dynsym_idx)
+ compiler_panic(c, no_loc(), "read_elf_dso: no SHT_DYNSYM in shared object");
+
+ /* Parse PT_DYNAMIC for DT_SONAME. The .dynamic section gives us the
+ * dynstr to resolve the SONAME's offset; if there's no .dynamic
+ * section we fall back to scanning the PT_DYNAMIC segment. */
+ Sym soname = 0;
+ if (dynamic_idx) {
+ const ShdrRec* dsh = &shdrs[dynamic_idx];
+ if (dsh->sh_link >= e_shnum)
+ compiler_panic(c, no_loc(),
+ "read_elf_dso: .dynamic sh_link %u out of range",
+ dsh->sh_link);
+ const ShdrRec* str_sh = &shdrs[dsh->sh_link];
+ if (str_sh->sh_offset + str_sh->sh_size > len)
+ compiler_panic(c, no_loc(), "read_elf_dso: .dynamic strtab out of range");
+ const u8* dynstr = data + str_sh->sh_offset;
+ u64 dynstr_sz = str_sh->sh_size;
+
+ if (dsh->sh_offset + dsh->sh_size > len)
+ compiler_panic(c, no_loc(), "read_elf_dso: .dynamic body out of range");
+ const u8* dynp = data + dsh->sh_offset;
+ u64 dynsz = dsh->sh_size;
+ /* DT entries are 16 bytes: (d_tag: u64, d_un: u64). */
+ for (u64 off = 0; off + 16 <= dynsz; off += 16) {
+ u64 tag = elf_rd_u64(dynp + off);
+ u64 val = elf_rd_u64(dynp + off + 8);
+ if (tag == DT_NULL) break;
+ if (tag == DT_SONAME) {
+ u32 nlen;
+ const char* nm = strtab_lookup(dynstr, dynstr_sz, (u32)val, &nlen);
+ if (nlen)
+ soname = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen});
+ break;
+ }
+ }
+ } else if (e_phnum) {
+ /* Fallback: walk PT_DYNAMIC straight from program headers. We
+ * only need DT_SONAME, so skip if we can't find a strtab pointer
+ * inline (DT_STRTAB carries a vaddr, not a file offset — stripped
+ * DSOs without SHT_DYNAMIC are exceedingly rare in practice). */
+ u64 dyn_off, dyn_sz;
+ (void)parse_phdr(data, len, e_phoff, e_phentsize, e_phnum, PT_DYNAMIC,
+ &dyn_off, &dyn_sz);
+ }
+ if (soname_out) *soname_out = soname;
+
+ /* Now parse .dynsym. */
+ const ShdrRec* sh = &shdrs[dynsym_idx];
+ if (sh->sh_entsize != ELF64_SYM_SIZE)
+ compiler_panic(c, no_loc(), "read_elf_dso: .dynsym entsize %llu != %u",
+ (unsigned long long)sh->sh_entsize, (u32)ELF64_SYM_SIZE);
+ if (sh->sh_size % ELF64_SYM_SIZE)
+ compiler_panic(c, no_loc(),
+ "read_elf_dso: .dynsym size not multiple of entry size");
+ if (sh->sh_link >= e_shnum)
+ compiler_panic(c, no_loc(), "read_elf_dso: .dynsym sh_link out of range");
+ const ShdrRec* str_sh = &shdrs[sh->sh_link];
+ if (str_sh->sh_offset + str_sh->sh_size > len)
+ compiler_panic(c, no_loc(), "read_elf_dso: .dynstr out of range");
+ const u8* strtab = data + str_sh->sh_offset;
+ u64 strtab_sz = str_sh->sh_size;
+
+ ObjBuilder* ob = obj_new(c);
+ if (!ob) compiler_panic(c, no_loc(), "read_elf_dso: obj_new failed");
+
+ u32 nsyms = (u32)(sh->sh_size / ELF64_SYM_SIZE);
+ const u8* base = data + sh->sh_offset;
+ for (u32 i = 1; i < nsyms; ++i) { /* skip index 0 */
+ const u8* p = base + (u64)i * ELF64_SYM_SIZE;
+ u32 st_name = elf_rd_u32(p + 0);
+ u8 st_info = p[4];
+ u8 st_other = p[5];
+ u16 st_shndx = elf_rd_u16(p + 6);
+
+ /* Skip the DSO's own undefined imports — they don't satisfy any
+ * undef in our consumer. Locals (STB_LOCAL) likewise aren't
+ * exported and would only confuse the resolver. */
+ if (st_shndx == SHN_UNDEF) continue;
+ u32 e_bind = ELF64_ST_BIND(st_info);
+ if (e_bind == STB_LOCAL) continue;
+
+ u32 nlen;
+ const char* nm = strtab_lookup(strtab, strtab_sz, st_name, &nlen);
+ if (!nlen) continue;
+ Sym sn = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen});
+
+ u32 e_type_field = ELF64_ST_TYPE(st_info);
+ u16 bind = elf_bind_to_obj(e_bind);
+ u16 kind = elf_type_to_kind(e_type_field, st_shndx);
+ u8 vis = elf_other_to_vis(st_other);
+
+ /* DSO exports land as defined symbols in OBJ_SEC_NONE with
+ * value=0. The consumer treats them as imports — see
+ * resolve_undefs in src/link/link_layout.c. */
+ {
+ ObjSymId did = obj_symbol_ex(ob, sn, (SymBind)bind, (SymVis)vis,
+ (SymKind)kind, OBJ_SEC_NONE, 0, 0, 0);
+ obj_sym_mark_referenced(ob, did);
+ }
+ }
+
+ obj_finalize(ob);
+ return ob;
+}
diff --git a/src/obj/elf/reloc_aarch64.c b/src/obj/elf/reloc_aarch64.c
@@ -0,0 +1,182 @@
+/* RelocKind <-> AArch64 ELF reloc-type mapping.
+ *
+ * Cfree's RelocKind enum is arch-agnostic at its top (R_ABS, R_REL, R_PC
+ * variants) and arch-specific in its lower entries. On AArch64, R_REL and
+ * R_PC collapse to ELF_R_AARCH64_PREL32 / ELF_R_AARCH64_PREL64 — both
+ * mean "PC-relative relative to the symbol" once the linker has resolved
+ * final addresses.
+ *
+ * Returning 0 (ELF_R_AARCH64_NONE) for an unsupported kind is the signal
+ * to the caller to either panic (emit) or panic (read with diagnostic). */
+
+#include "obj/elf/elf.h"
+
+u32 elf_aarch64_reloc_to(u32 kind /* RelocKind */) {
+ switch (kind) {
+ case R_NONE:
+ return ELF_R_AARCH64_NONE;
+ case R_ABS64:
+ return ELF_R_AARCH64_ABS64;
+ case R_ABS32:
+ return ELF_R_AARCH64_ABS32;
+ case R_PC64:
+ return ELF_R_AARCH64_PREL64;
+ case R_PC32:
+ return ELF_R_AARCH64_PREL32;
+ case R_REL64:
+ return ELF_R_AARCH64_PREL64;
+ case R_REL32:
+ return ELF_R_AARCH64_PREL32;
+ case R_AARCH64_JUMP26:
+ return ELF_R_AARCH64_JUMP26;
+ case R_AARCH64_CALL26:
+ return ELF_R_AARCH64_CALL26;
+ case R_AARCH64_CONDBR19:
+ return ELF_R_AARCH64_CONDBR19;
+ case R_AARCH64_TSTBR14:
+ return ELF_R_AARCH64_TSTBR14;
+ case R_AARCH64_LD_PREL_LO19:
+ return ELF_R_AARCH64_LD_PREL_LO19;
+ case R_AARCH64_ADR_PREL_LO21:
+ return ELF_R_AARCH64_ADR_PREL_LO21;
+ case R_AARCH64_ADR_PREL_PG_HI21:
+ return ELF_R_AARCH64_ADR_PREL_PG_HI21;
+ case R_AARCH64_ADR_PREL_PG_HI21_NC:
+ return ELF_R_AARCH64_ADR_PREL_PG_HI21_NC;
+ case R_AARCH64_ADD_ABS_LO12_NC:
+ return ELF_R_AARCH64_ADD_ABS_LO12_NC;
+ case R_AARCH64_ABS16:
+ return ELF_R_AARCH64_ABS16;
+ case R_AARCH64_PREL16:
+ return ELF_R_AARCH64_PREL16;
+ case R_AARCH64_LDST8_ABS_LO12_NC:
+ return ELF_R_AARCH64_LDST8_ABS_LO12_NC;
+ case R_AARCH64_LDST16_ABS_LO12_NC:
+ return ELF_R_AARCH64_LDST16_ABS_LO12_NC;
+ case R_AARCH64_LDST32_ABS_LO12_NC:
+ return ELF_R_AARCH64_LDST32_ABS_LO12_NC;
+ case R_AARCH64_LDST64_ABS_LO12_NC:
+ return ELF_R_AARCH64_LDST64_ABS_LO12_NC;
+ case R_AARCH64_LDST128_ABS_LO12_NC:
+ return ELF_R_AARCH64_LDST128_ABS_LO12_NC;
+ case R_AARCH64_ADR_GOT_PAGE:
+ return ELF_R_AARCH64_ADR_GOT_PAGE;
+ case R_AARCH64_LD64_GOT_LO12_NC:
+ return ELF_R_AARCH64_LD64_GOT_LO12_NC;
+ case R_AARCH64_TLSLE_ADD_TPREL_HI12:
+ return ELF_R_AARCH64_TLSLE_ADD_TPREL_HI12;
+ case R_AARCH64_TLSLE_ADD_TPREL_LO12:
+ return ELF_R_AARCH64_TLSLE_ADD_TPREL_LO12;
+ case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
+ return ELF_R_AARCH64_TLSLE_ADD_TPREL_LO12_NC;
+ case R_AARCH64_TLSLE_LDST8_TPREL_LO12:
+ return ELF_R_AARCH64_TLSLE_LDST8_TPREL_LO12;
+ case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC:
+ return ELF_R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC;
+ case R_AARCH64_TLSLE_LDST16_TPREL_LO12:
+ return ELF_R_AARCH64_TLSLE_LDST16_TPREL_LO12;
+ case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC:
+ return ELF_R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC;
+ case R_AARCH64_TLSLE_LDST32_TPREL_LO12:
+ return ELF_R_AARCH64_TLSLE_LDST32_TPREL_LO12;
+ case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC:
+ return ELF_R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC;
+ case R_AARCH64_TLSLE_LDST64_TPREL_LO12:
+ return ELF_R_AARCH64_TLSLE_LDST64_TPREL_LO12;
+ case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC:
+ return ELF_R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC;
+ case R_AARCH64_GLOB_DAT:
+ return ELF_R_AARCH64_GLOB_DAT;
+ case R_AARCH64_JUMP_SLOT:
+ return ELF_R_AARCH64_JUMP_SLOT;
+ case R_AARCH64_RELATIVE:
+ return ELF_R_AARCH64_RELATIVE;
+ case R_AARCH64_COPY:
+ return ELF_R_AARCH64_COPY;
+ default:
+ return ELF_R_AARCH64_NONE;
+ }
+}
+
+u32 elf_aarch64_reloc_from(u32 elf_type) {
+ switch (elf_type) {
+ case ELF_R_AARCH64_NONE:
+ return R_NONE;
+ case ELF_R_AARCH64_ABS64:
+ return R_ABS64;
+ case ELF_R_AARCH64_ABS32:
+ return R_ABS32;
+ case ELF_R_AARCH64_PREL64:
+ return R_PC64;
+ case ELF_R_AARCH64_PREL32:
+ return R_PC32;
+ case ELF_R_AARCH64_JUMP26:
+ return R_AARCH64_JUMP26;
+ case ELF_R_AARCH64_CALL26:
+ return R_AARCH64_CALL26;
+ case ELF_R_AARCH64_CONDBR19:
+ return R_AARCH64_CONDBR19;
+ case ELF_R_AARCH64_TSTBR14:
+ return R_AARCH64_TSTBR14;
+ case ELF_R_AARCH64_LD_PREL_LO19:
+ return R_AARCH64_LD_PREL_LO19;
+ case ELF_R_AARCH64_ADR_PREL_LO21:
+ return R_AARCH64_ADR_PREL_LO21;
+ case ELF_R_AARCH64_ADR_PREL_PG_HI21:
+ return R_AARCH64_ADR_PREL_PG_HI21;
+ case ELF_R_AARCH64_ADR_PREL_PG_HI21_NC:
+ return R_AARCH64_ADR_PREL_PG_HI21_NC;
+ case ELF_R_AARCH64_ADD_ABS_LO12_NC:
+ return R_AARCH64_ADD_ABS_LO12_NC;
+ case ELF_R_AARCH64_ABS16:
+ return R_AARCH64_ABS16;
+ case ELF_R_AARCH64_PREL16:
+ return R_AARCH64_PREL16;
+ case ELF_R_AARCH64_LDST8_ABS_LO12_NC:
+ return R_AARCH64_LDST8_ABS_LO12_NC;
+ case ELF_R_AARCH64_LDST16_ABS_LO12_NC:
+ return R_AARCH64_LDST16_ABS_LO12_NC;
+ case ELF_R_AARCH64_LDST32_ABS_LO12_NC:
+ return R_AARCH64_LDST32_ABS_LO12_NC;
+ case ELF_R_AARCH64_LDST64_ABS_LO12_NC:
+ return R_AARCH64_LDST64_ABS_LO12_NC;
+ case ELF_R_AARCH64_LDST128_ABS_LO12_NC:
+ return R_AARCH64_LDST128_ABS_LO12_NC;
+ case ELF_R_AARCH64_ADR_GOT_PAGE:
+ return R_AARCH64_ADR_GOT_PAGE;
+ case ELF_R_AARCH64_LD64_GOT_LO12_NC:
+ return R_AARCH64_LD64_GOT_LO12_NC;
+ case ELF_R_AARCH64_TLSLE_ADD_TPREL_HI12:
+ return R_AARCH64_TLSLE_ADD_TPREL_HI12;
+ case ELF_R_AARCH64_TLSLE_ADD_TPREL_LO12:
+ return R_AARCH64_TLSLE_ADD_TPREL_LO12;
+ case ELF_R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
+ return R_AARCH64_TLSLE_ADD_TPREL_LO12_NC;
+ case ELF_R_AARCH64_TLSLE_LDST8_TPREL_LO12:
+ return R_AARCH64_TLSLE_LDST8_TPREL_LO12;
+ case ELF_R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC:
+ return R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC;
+ case ELF_R_AARCH64_TLSLE_LDST16_TPREL_LO12:
+ return R_AARCH64_TLSLE_LDST16_TPREL_LO12;
+ case ELF_R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC:
+ return R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC;
+ case ELF_R_AARCH64_TLSLE_LDST32_TPREL_LO12:
+ return R_AARCH64_TLSLE_LDST32_TPREL_LO12;
+ case ELF_R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC:
+ return R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC;
+ case ELF_R_AARCH64_TLSLE_LDST64_TPREL_LO12:
+ return R_AARCH64_TLSLE_LDST64_TPREL_LO12;
+ case ELF_R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC:
+ return R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC;
+ case ELF_R_AARCH64_GLOB_DAT:
+ return R_AARCH64_GLOB_DAT;
+ case ELF_R_AARCH64_JUMP_SLOT:
+ return R_AARCH64_JUMP_SLOT;
+ case ELF_R_AARCH64_RELATIVE:
+ return R_AARCH64_RELATIVE;
+ case ELF_R_AARCH64_COPY:
+ return R_AARCH64_COPY;
+ default:
+ return (u32)-1; /* sentinel */
+ }
+}
diff --git a/src/obj/elf/reloc_riscv64.c b/src/obj/elf/reloc_riscv64.c
@@ -0,0 +1,182 @@
+/* RelocKind <-> RISC-V ELF reloc-type mapping.
+ *
+ * Mirror of elf_reloc_x86_64.c for the RISC-V LP64 ABI. The arch-
+ * agnostic R_ABS / R_PC RelocKind entries fan out to the native
+ * RISC-V codes; the RISC-V-specific encodings (HI20/LO12, BRANCH,
+ * JAL, CALL, PCREL_*, TPREL_*, ADD/SUB/SET, RELAX, ALIGN, RVC_*)
+ * live in the lower band as R_RV_*.
+ *
+ * Returning ELF_R_RISCV_NONE for an unsupported kind is the signal
+ * to the caller to either panic (emit) or panic (read with diagnostic). */
+
+#include "obj/elf/elf.h"
+
+u32 elf_riscv64_reloc_to(u32 kind /* RelocKind */) {
+ switch (kind) {
+ case R_NONE:
+ return ELF_R_RISCV_NONE;
+ case R_ABS64:
+ return ELF_R_RISCV_64;
+ case R_ABS32:
+ return ELF_R_RISCV_32;
+ case R_PC32:
+ return ELF_R_RISCV_32_PCREL;
+ case R_RV_HI20:
+ return ELF_R_RISCV_HI20;
+ case R_RV_LO12_I:
+ return ELF_R_RISCV_LO12_I;
+ case R_RV_LO12_S:
+ return ELF_R_RISCV_LO12_S;
+ case R_RV_BRANCH:
+ return ELF_R_RISCV_BRANCH;
+ case R_RV_JAL:
+ return ELF_R_RISCV_JAL;
+ case R_RV_CALL:
+ return ELF_R_RISCV_CALL;
+ case R_PLT32:
+ return ELF_R_RISCV_CALL_PLT;
+ case R_RV_PCREL_HI20:
+ return ELF_R_RISCV_PCREL_HI20;
+ case R_RV_PCREL_LO12_I:
+ return ELF_R_RISCV_PCREL_LO12_I;
+ case R_RV_PCREL_LO12_S:
+ return ELF_R_RISCV_PCREL_LO12_S;
+ case R_RV_GOT_HI20:
+ return ELF_R_RISCV_GOT_HI20;
+ case R_RV_TLS_GOT_HI20:
+ return ELF_R_RISCV_TLS_GOT_HI20;
+ case R_RV_TPREL_HI20:
+ return ELF_R_RISCV_TPREL_HI20;
+ case R_RV_TPREL_LO12_I:
+ return ELF_R_RISCV_TPREL_LO12_I;
+ case R_RV_TPREL_LO12_S:
+ return ELF_R_RISCV_TPREL_LO12_S;
+ case R_RV_TPREL_ADD:
+ return ELF_R_RISCV_TPREL_ADD;
+ case R_RV_ADD8:
+ return ELF_R_RISCV_ADD8;
+ case R_RV_ADD16:
+ return ELF_R_RISCV_ADD16;
+ case R_RV_ADD32:
+ return ELF_R_RISCV_ADD32;
+ case R_RV_ADD64:
+ return ELF_R_RISCV_ADD64;
+ case R_RV_SUB8:
+ return ELF_R_RISCV_SUB8;
+ case R_RV_SUB16:
+ return ELF_R_RISCV_SUB16;
+ case R_RV_SUB32:
+ return ELF_R_RISCV_SUB32;
+ case R_RV_SUB64:
+ return ELF_R_RISCV_SUB64;
+ case R_RV_ALIGN:
+ return ELF_R_RISCV_ALIGN;
+ case R_RV_RVC_BRANCH:
+ return ELF_R_RISCV_RVC_BRANCH;
+ case R_RV_RVC_JUMP:
+ return ELF_R_RISCV_RVC_JUMP;
+ case R_RV_RELAX:
+ return ELF_R_RISCV_RELAX;
+ case R_RV_SUB6:
+ return ELF_R_RISCV_SUB6;
+ case R_RV_SET6:
+ return ELF_R_RISCV_SET6;
+ case R_RV_SET8:
+ return ELF_R_RISCV_SET8;
+ case R_RV_SET16:
+ return ELF_R_RISCV_SET16;
+ case R_RV_SET32:
+ return ELF_R_RISCV_SET32;
+ case R_RV_SET_ULEB128:
+ return ELF_R_RISCV_SET_ULEB128;
+ case R_RV_SUB_ULEB128:
+ return ELF_R_RISCV_SUB_ULEB128;
+ default:
+ return ELF_R_RISCV_NONE;
+ }
+}
+
+u32 elf_riscv64_reloc_from(u32 elf_type) {
+ switch (elf_type) {
+ case ELF_R_RISCV_NONE:
+ return R_NONE;
+ case ELF_R_RISCV_64:
+ return R_ABS64;
+ case ELF_R_RISCV_32:
+ return R_ABS32;
+ case ELF_R_RISCV_32_PCREL:
+ return R_PC32;
+ case ELF_R_RISCV_HI20:
+ return R_RV_HI20;
+ case ELF_R_RISCV_LO12_I:
+ return R_RV_LO12_I;
+ case ELF_R_RISCV_LO12_S:
+ return R_RV_LO12_S;
+ case ELF_R_RISCV_BRANCH:
+ return R_RV_BRANCH;
+ case ELF_R_RISCV_JAL:
+ return R_RV_JAL;
+ case ELF_R_RISCV_CALL:
+ return R_RV_CALL;
+ case ELF_R_RISCV_CALL_PLT:
+ return R_PLT32;
+ case ELF_R_RISCV_PCREL_HI20:
+ return R_RV_PCREL_HI20;
+ case ELF_R_RISCV_PCREL_LO12_I:
+ return R_RV_PCREL_LO12_I;
+ case ELF_R_RISCV_PCREL_LO12_S:
+ return R_RV_PCREL_LO12_S;
+ case ELF_R_RISCV_GOT_HI20:
+ return R_RV_GOT_HI20;
+ case ELF_R_RISCV_TLS_GOT_HI20:
+ return R_RV_TLS_GOT_HI20;
+ case ELF_R_RISCV_TPREL_HI20:
+ return R_RV_TPREL_HI20;
+ case ELF_R_RISCV_TPREL_LO12_I:
+ return R_RV_TPREL_LO12_I;
+ case ELF_R_RISCV_TPREL_LO12_S:
+ return R_RV_TPREL_LO12_S;
+ case ELF_R_RISCV_TPREL_ADD:
+ return R_RV_TPREL_ADD;
+ case ELF_R_RISCV_ADD8:
+ return R_RV_ADD8;
+ case ELF_R_RISCV_ADD16:
+ return R_RV_ADD16;
+ case ELF_R_RISCV_ADD32:
+ return R_RV_ADD32;
+ case ELF_R_RISCV_ADD64:
+ return R_RV_ADD64;
+ case ELF_R_RISCV_SUB8:
+ return R_RV_SUB8;
+ case ELF_R_RISCV_SUB16:
+ return R_RV_SUB16;
+ case ELF_R_RISCV_SUB32:
+ return R_RV_SUB32;
+ case ELF_R_RISCV_SUB64:
+ return R_RV_SUB64;
+ case ELF_R_RISCV_ALIGN:
+ return R_RV_ALIGN;
+ case ELF_R_RISCV_RVC_BRANCH:
+ return R_RV_RVC_BRANCH;
+ case ELF_R_RISCV_RVC_JUMP:
+ return R_RV_RVC_JUMP;
+ case ELF_R_RISCV_RELAX:
+ return R_RV_RELAX;
+ case ELF_R_RISCV_SUB6:
+ return R_RV_SUB6;
+ case ELF_R_RISCV_SET6:
+ return R_RV_SET6;
+ case ELF_R_RISCV_SET8:
+ return R_RV_SET8;
+ case ELF_R_RISCV_SET16:
+ return R_RV_SET16;
+ case ELF_R_RISCV_SET32:
+ return R_RV_SET32;
+ case ELF_R_RISCV_SET_ULEB128:
+ return R_RV_SET_ULEB128;
+ case ELF_R_RISCV_SUB_ULEB128:
+ return R_RV_SUB_ULEB128;
+ default:
+ return (u32)-1; /* sentinel */
+ }
+}
diff --git a/src/obj/elf/reloc_x86_64.c b/src/obj/elf/reloc_x86_64.c
@@ -0,0 +1,134 @@
+/* RelocKind <-> x86_64 ELF reloc-type mapping.
+ *
+ * Mirror of elf_reloc_aarch64.c for the x86_64 SysV ABI. The arch-
+ * agnostic R_ABS / R_PC / R_REL RelocKind entries fan out to the
+ * native x86_64 codes; the x86_64-only encodings (R_X64_PC8, PLT32,
+ * GOTPCREL, dynamic-only entries) live in the lower band.
+ *
+ * Returning ELF_R_X86_64_NONE for an unsupported kind is the signal
+ * to the caller to either panic (emit) or panic (read with diagnostic). */
+
+#include "obj/elf/elf.h"
+
+u32 elf_x86_64_reloc_to(u32 kind /* RelocKind */) {
+ switch (kind) {
+ case R_NONE:
+ return ELF_R_X86_64_NONE;
+ case R_ABS64:
+ return ELF_R_X86_64_64;
+ case R_ABS32:
+ return ELF_R_X86_64_32;
+ case R_X64_32S:
+ return ELF_R_X86_64_32S;
+ case R_PC32:
+ return ELF_R_X86_64_PC32;
+ case R_PC64:
+ return ELF_R_X86_64_PC64;
+ case R_REL32:
+ return ELF_R_X86_64_PC32;
+ case R_REL64:
+ return ELF_R_X86_64_PC64;
+ case R_X64_PC8:
+ return ELF_R_X86_64_PC8;
+ case R_PLT32:
+ case R_X64_PLT32:
+ return ELF_R_X86_64_PLT32;
+ case R_GOT32:
+ return ELF_R_X86_64_GOT32;
+ case R_X64_GOTPCREL:
+ return ELF_R_X86_64_GOTPCREL;
+ case R_X64_GOTPCRELX:
+ return ELF_R_X86_64_GOTPCRELX;
+ case R_X64_REX_GOTPCRELX:
+ return ELF_R_X86_64_REX_GOTPCRELX;
+ case R_X64_GOTPC32:
+ return ELF_R_X86_64_GOTPC32;
+ case R_X64_GOTOFF64:
+ return ELF_R_X86_64_GOTOFF64;
+ case R_X64_TPOFF32:
+ return ELF_R_X86_64_TPOFF32;
+ case R_X64_TPOFF64:
+ return ELF_R_X86_64_TPOFF64;
+ case R_X64_DTPOFF32:
+ return ELF_R_X86_64_DTPOFF32;
+ case R_X64_DTPMOD64:
+ return ELF_R_X86_64_DTPMOD64;
+ case R_X64_DTPOFF64:
+ return ELF_R_X86_64_DTPOFF64;
+ case R_X64_TLSGD:
+ return ELF_R_X86_64_TLSGD;
+ case R_X64_TLSLD:
+ return ELF_R_X86_64_TLSLD;
+ case R_X64_GOTTPOFF:
+ return ELF_R_X86_64_GOTTPOFF;
+ case R_X64_GLOB_DAT:
+ return ELF_R_X86_64_GLOB_DAT;
+ case R_X64_JUMP_SLOT:
+ return ELF_R_X86_64_JUMP_SLOT;
+ case R_X64_RELATIVE:
+ return ELF_R_X86_64_RELATIVE;
+ case R_X64_COPY:
+ return ELF_R_X86_64_COPY;
+ default:
+ return ELF_R_X86_64_NONE;
+ }
+}
+
+u32 elf_x86_64_reloc_from(u32 elf_type) {
+ switch (elf_type) {
+ case ELF_R_X86_64_NONE:
+ return R_NONE;
+ case ELF_R_X86_64_64:
+ return R_ABS64;
+ case ELF_R_X86_64_32:
+ return R_ABS32;
+ case ELF_R_X86_64_32S:
+ return R_X64_32S;
+ case ELF_R_X86_64_PC32:
+ return R_PC32;
+ case ELF_R_X86_64_PC64:
+ return R_PC64;
+ case ELF_R_X86_64_PC8:
+ return R_X64_PC8;
+ case ELF_R_X86_64_PLT32:
+ return R_X64_PLT32;
+ case ELF_R_X86_64_GOT32:
+ return R_GOT32;
+ case ELF_R_X86_64_GOTPCREL:
+ return R_X64_GOTPCREL;
+ case ELF_R_X86_64_GOTPCRELX:
+ return R_X64_GOTPCRELX;
+ case ELF_R_X86_64_REX_GOTPCRELX:
+ return R_X64_REX_GOTPCRELX;
+ case ELF_R_X86_64_GOTPC32:
+ return R_X64_GOTPC32;
+ case ELF_R_X86_64_GOTOFF64:
+ return R_X64_GOTOFF64;
+ case ELF_R_X86_64_TPOFF32:
+ return R_X64_TPOFF32;
+ case ELF_R_X86_64_TPOFF64:
+ return R_X64_TPOFF64;
+ case ELF_R_X86_64_DTPOFF32:
+ return R_X64_DTPOFF32;
+ case ELF_R_X86_64_DTPMOD64:
+ return R_X64_DTPMOD64;
+ case ELF_R_X86_64_DTPOFF64:
+ return R_X64_DTPOFF64;
+ case ELF_R_X86_64_TLSGD:
+ return R_X64_TLSGD;
+ case ELF_R_X86_64_TLSLD:
+ return R_X64_TLSLD;
+ case ELF_R_X86_64_GOTTPOFF:
+ return R_X64_GOTTPOFF;
+ case ELF_R_X86_64_GLOB_DAT:
+ return R_X64_GLOB_DAT;
+ case ELF_R_X86_64_JUMP_SLOT:
+ return R_X64_JUMP_SLOT;
+ case ELF_R_X86_64_RELATIVE:
+ return R_X64_RELATIVE;
+ case ELF_R_X86_64_COPY:
+ return R_X64_COPY;
+ default:
+ return (u32)-1; /* sentinel */
+ }
+}
diff --git a/src/obj/elf_emit.c b/src/obj/elf_emit.c
@@ -1,751 +0,0 @@
-/* ELF ET_REL writer. Walks a finalized ObjBuilder and emits a 64-bit
- * little-endian relocatable object via the supplied Writer.
- *
- * Layout strategy:
- * 1. plan ELF section headers (one per obj section, plus synthesized
- * .symtab / .strtab / .shstrtab and one .rela.<name> per obj section
- * that carries relocations);
- * 2. build .symtab + .strtab content (locals first — STT_SECTION
- * synthesized for every input section, then ordinary locals, then
- * globals/weaks);
- * 3. build .rela.* content using the per-arch reloc map (selected
- * by Compiler.target.arch);
- * 4. build .shstrtab;
- * 5. assign file offsets sequentially, respecting per-section
- * addralign;
- * 6. write Ehdr, then each section's bytes (seeking to its sh_offset),
- * then the section header table.
- *
- * 64-bit little-endian only. Per-arch reloc tables (elf_reloc_<arch>.c)
- * supply the RelocKind -> ELF type mapping; e_machine is selected from
- * Compiler.target.arch. Big-endian / 32-bit ELF panic at entry.
- *
- * See doc/DESIGN.md §5.5 for the round-trip invariant: read_elf of this
- * output must produce an ObjBuilder shape-equivalent to the input,
- * modulo (a) section ordering and (b) the synthesized STT_SECTION
- * symbols (which are visible to read_elf but were not in the input). */
-
-#include <string.h>
-
-#include "arch/arch.h"
-#include "core/arena.h"
-#include "core/buf.h"
-#include "core/heap.h"
-#include "core/pool.h"
-#include "core/slice.h"
-#include "core/util.h"
-#include "obj/elf.h"
-
-static SrcLoc no_loc(void) {
- SrcLoc l = {0, 0, 0};
- return l;
-}
-
-/* ---- per-ELF-section plan record ---- */
-
-/* Internal section descriptor used during planning. Mirrors Elf64_Shdr
- * but with an explicit pointer to the source bytes (either an obj
- * Section's chunked Buf or a synthesized linear buffer). NOBITS sections
- * have no source bytes and consume no file space. */
-typedef struct ElfSec {
- /* Final shdr fields (little-endian-encoded at write time). */
- u32 sh_name; /* offset into shstrtab */
- u32 sh_type;
- u64 sh_flags;
- u64 sh_addr; /* always 0 for ET_REL */
- u64 sh_offset;
- u64 sh_size;
- u32 sh_link;
- u32 sh_info;
- u64 sh_addralign;
- u64 sh_entsize;
-
- /* Section name. The name string lives in scratch (synthesized) or in
- * the global pool (obj-section names); buf-source is set for sections
- * carrying obj-section bytes, raw_bytes for synthesized. */
- const char* name;
- u32 name_len;
-
- const Buf* obj_bytes; /* one of these three is set: */
- const u8* raw_bytes; /* */
- int is_nobits; /* */
-} ElfSec;
-
-/* ---- emit ---- */
-
-static u32 sec_flags_to_elf(u16 flags) {
- u64 r = 0;
- if (flags & SF_ALLOC) r |= SHF_ALLOC;
- if (flags & SF_EXEC) r |= SHF_EXECINSTR;
- if (flags & SF_WRITE) r |= SHF_WRITE;
- if (flags & SF_TLS) r |= SHF_TLS;
- if (flags & SF_MERGE) r |= SHF_MERGE;
- if (flags & SF_STRINGS) r |= SHF_STRINGS;
- if (flags & SF_GROUP) r |= SHF_GROUP;
- if (flags & SF_LINK_ORDER) r |= SHF_LINK_ORDER;
- if (flags & SF_RETAIN) r |= SHF_GNU_RETAIN;
- return (u32)r;
-}
-
-static u32 sec_sem_to_elf(u16 sem) {
- switch (sem) {
- case SSEM_PROGBITS:
- return SHT_PROGBITS;
- case SSEM_NOBITS:
- return SHT_NOBITS;
- case SSEM_SYMTAB:
- return SHT_SYMTAB;
- case SSEM_STRTAB:
- return SHT_STRTAB;
- case SSEM_RELA:
- return SHT_RELA;
- case SSEM_REL:
- return SHT_REL;
- case SSEM_NOTE:
- return SHT_NOTE;
- case SSEM_INIT_ARRAY:
- return SHT_INIT_ARRAY;
- case SSEM_FINI_ARRAY:
- return SHT_FINI_ARRAY;
- case SSEM_PREINIT_ARRAY:
- return SHT_PREINIT_ARRAY;
- case SSEM_GROUP:
- return SHT_GROUP;
- default:
- return SHT_PROGBITS;
- }
-}
-
-static u8 sym_bind_to_elf(u16 bind) {
- switch (bind) {
- case SB_LOCAL:
- return STB_LOCAL;
- case SB_GLOBAL:
- return STB_GLOBAL;
- case SB_WEAK:
- return STB_WEAK;
- default:
- return STB_LOCAL;
- }
-}
-
-static u8 sym_kind_to_elf(u16 kind) {
- switch (kind) {
- case SK_UNDEF:
- return STT_NOTYPE;
- case SK_FUNC:
- return STT_FUNC;
- case SK_OBJ:
- return STT_OBJECT;
- case SK_SECTION:
- return STT_SECTION;
- case SK_FILE:
- return STT_FILE;
- /* Tentative definitions: real ELF emitters (clang, gcc, GNU as)
- * write these as STT_OBJECT with shndx=SHN_COMMON. STT_COMMON is
- * a near-extinct convention that llvm-readelf renders as the
- * literal type name "COMMON" — emitting it breaks roundtrip
- * against any toolchain-produced .o. */
- case SK_COMMON:
- return STT_OBJECT;
- case SK_TLS:
- return STT_TLS;
- case SK_ABS:
- return STT_NOTYPE; /* SHN_ABS, NOTYPE */
- case SK_NOTYPE:
- return STT_NOTYPE;
- case SK_IFUNC:
- return STT_GNU_IFUNC;
- default:
- return STT_NOTYPE;
- }
-}
-
-static u8 sym_vis_to_elf(u8 vis) {
- switch (vis) {
- case SV_DEFAULT:
- return STV_DEFAULT;
- case SV_HIDDEN:
- return STV_HIDDEN;
- case SV_PROTECTED:
- return STV_PROTECTED;
- case SV_INTERNAL:
- return STV_INTERNAL;
- default:
- return STV_DEFAULT;
- }
-}
-
-static u16 sym_shndx(const ObjSym* s, const u32* obj_to_elf, u32 nsec) {
- if (s->kind == SK_COMMON) return (u16)SHN_COMMON;
- if (s->kind == SK_ABS) return (u16)SHN_ABS;
- /* STT_FILE conventionally carries SHN_ABS as its shndx — its value
- * field is not an address. Match clang/binutils. */
- if (s->kind == SK_FILE) return (u16)SHN_ABS;
- if (s->section_id == OBJ_SEC_NONE) return (u16)SHN_UNDEF;
- if (s->section_id >= nsec) return (u16)SHN_UNDEF;
- return (u16)obj_to_elf[s->section_id];
-}
-
-static const char* sym_to_str(Compiler* c, Sym n, u32* len_out) {
- Slice sl = pool_slice(c->global, n);
- const char* s = sl.s;
- if (!s) {
- *len_out = 0;
- return "";
- }
- *len_out = (u32)sl.len;
- return s;
-}
-
-/* Append `len` bytes of `s` followed by a single NUL to `b`, return
- * the offset at which `s` was placed.
- *
- * If `s` already exists at some offset (as a NUL-terminated substring
- * starting at any offset), reuse that offset — clang/binutils both
- * dedupe trivially identical strings, and matching the convention
- * keeps our strtab the same size as theirs. The dedupe is linear in
- * the strtab; section + symbol counts are small enough that this is
- * fine without a hash. */
-static u32 strtab_add(Buf* b, const char* s, u32 len) {
- /* Empty string: always at offset 0 (the leading NUL). */
- if (len == 0) return 0;
-
- /* Linear search for an existing copy. We must scan chunk-by-chunk
- * because Buf is segmented; flatten to a temp scratch buffer first
- * if non-empty and search there. For our tiny strtabs, the cost is
- * dominated by the writes anyway. */
- u32 total = buf_pos(b);
- if (total > len) {
- /* Flatten just to search — not optimal but the strtab here is
- * always small (low kilobytes at most). */
- u8 stack[256];
- u8* tmp =
- total <= sizeof stack ? stack : (u8*)b->heap->alloc(b->heap, total, 1);
- if (tmp) {
- buf_flatten(b, tmp);
- for (u32 i = 0; i + len < total; ++i) {
- if (tmp[i + len] == 0 && memcmp(tmp + i, s, len) == 0) {
- if (tmp != stack) b->heap->free(b->heap, tmp, total);
- return i;
- }
- }
- if (tmp != stack) b->heap->free(b->heap, tmp, total);
- }
- }
-
- u32 off = total;
- buf_write(b, s, len);
- {
- u8 z = 0;
- buf_write(b, &z, 1);
- }
- return off;
-}
-
-void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) {
- Heap* h = (Heap*)c->ctx->heap;
-
- /* Run the tombstone sweep before any iteration: cascades removed
- * sections into their defining symbols, drops dangling relocs,
- * compacts groups, and absorbs the historical UNDEF prune. After this
- * call every direct ID-based access below must skip entries whose
- * `removed` bit is set. */
- obj_sweep_dead(ob);
-
- /* ---- target validation ------------------------------------------ */
- const ArchImpl* arch = arch_for_compiler(c);
- const ArchElfOps* elf = arch ? arch->elf : NULL;
- u32 e_machine;
- u32 (*reloc_to)(u32);
- if (!elf || !elf->reloc_to) {
- compiler_panic(c, no_loc(), "emit_elf: unsupported target arch %u",
- (u32)c->target.arch);
- }
- e_machine = elf->e_machine;
- reloc_to = elf->reloc_to;
- if (c->target.big_endian) {
- compiler_panic(c, no_loc(), "emit_elf: big-endian ELF not supported");
- }
- if (c->target.ptr_size != 8) {
- compiler_panic(c, no_loc(), "emit_elf: ptr_size %u (expected 8)",
- (u32)c->target.ptr_size);
- }
-
- /* ---- pass 1: plan ELF section list ------------------------------ */
-
- u32 nobjsec = obj_section_count(ob);
-
- u32 nobjgrp = obj_group_count(ob);
- /* Upper bound on ELF section count:
- * 1 (SHN_UNDEF)
- * + nobjsec - 1 (one ELF entry per real obj section)
- * + nobjsec - 1 (worst case: a .rela.<name> per obj section)
- * + nobjgrp - 1 (one synthesized SHT_GROUP per ObjGroup)
- * + 3 (.symtab, .strtab, .shstrtab)
- */
- u32 max_secs =
- 1 + (nobjsec - 1) + (nobjsec - 1) + (nobjgrp ? nobjgrp - 1 : 0) + 3;
- if (max_secs < 4) max_secs = 4;
- ElfSec* secs = arena_array(c->scratch, ElfSec, max_secs);
- u32 nsecs = 0;
- memset(&secs[nsecs++], 0, sizeof secs[0]); /* index 0 = SHN_UNDEF */
-
- /* Map obj section id -> ELF section index. */
- u32* obj_to_elf = arena_zarray(c->scratch, u32, nobjsec);
-
- for (u32 i = 1; i < nobjsec; ++i) {
- const Section* s = obj_section_get(ob, i);
- if (s->removed) continue; /* tombstone — see obj_sweep_dead */
- ElfSec* es = &secs[nsecs];
- memset(es, 0, sizeof *es);
- u32 nlen;
- es->name = sym_to_str(c, s->name, &nlen);
- es->name_len = nlen;
- /* Honor format-specific overrides preserved by the reader for
- * sh_type/sh_flags bits the canonical SecSem/SecFlag enums
- * don't model (e.g. SHT_LLVM_ADDRSIG, SHF_EXCLUDE). */
- es->sh_type = (s->ext_kind == OBJ_EXT_ELF && s->ext_type)
- ? s->ext_type
- : sec_sem_to_elf(s->sem);
- es->sh_flags = sec_flags_to_elf(s->flags);
- if (s->ext_kind == OBJ_EXT_ELF) es->sh_flags |= s->ext_flags;
- es->sh_addr = 0;
- es->sh_addralign = s->align ? s->align : 1;
- es->sh_entsize = s->entsize;
- es->sh_link = 0;
- es->sh_info = 0;
- if (s->sem == SSEM_NOBITS) {
- es->is_nobits = 1;
- es->sh_size = s->bss_size;
- } else {
- es->obj_bytes = &s->bytes;
- es->sh_size = s->bytes.total;
- }
- obj_to_elf[i] = nsecs++;
- }
-
- /* ---- pass 2: build .symtab + .strtab content -------------------- */
-
- /* .strtab: leading NUL byte. Then a name per emitted symbol. */
- Buf strtab;
- buf_init(&strtab, h);
- {
- u8 z = 0;
- buf_write(&strtab, &z, 1);
- }
-
- /* The .symtab is built into a contiguous arena buffer of fixed-size
- * 24-byte records. We don't know the count up front; bound by
- * (nobjsec section symbols) + (obj symbol count). */
- u32 nobjsym = 0;
- {
- ObjSymIter* it = obj_symiter_new(ob);
- ObjSymEntry e;
- while (obj_symiter_next(it, &e)) ++nobjsym;
- obj_symiter_free(it);
- }
- u32 max_syms = 1 + (nobjsec - 1) + nobjsym;
- u8* symtab = (u8*)arena_alloc(c->scratch, (size_t)ELF64_SYM_SIZE * max_syms,
- _Alignof(u64));
- u32 nsyms = 0;
- memset(&symtab[nsyms * ELF64_SYM_SIZE], 0, ELF64_SYM_SIZE);
- nsyms = 1; /* index 0: STN_UNDEF */
-
-/* Helper to emit one Elf64_Sym record at index `idx` into symtab. */
-#define WRITE_SYM(idx, st_name, st_info, st_other, st_shndx, st_value, \
- st_size) \
- do { \
- u8* slot = &symtab[(idx) * ELF64_SYM_SIZE]; \
- slot[0] = (u8)((st_name)); \
- slot[1] = (u8)((st_name) >> 8); \
- slot[2] = (u8)((st_name) >> 16); \
- slot[3] = (u8)((st_name) >> 24); \
- slot[4] = (u8)((st_info)); \
- slot[5] = (u8)((st_other)); \
- slot[6] = (u8)((st_shndx)); \
- slot[7] = (u8)((st_shndx) >> 8); \
- for (int _b = 0; _b < 8; ++_b) \
- slot[8 + _b] = (u8)((u64)(st_value) >> (_b * 8)); \
- for (int _b = 0; _b < 8; ++_b) \
- slot[16 + _b] = (u8)((u64)(st_size) >> (_b * 8)); \
- } while (0)
-
- /* No automatic STT_SECTION synthesis. Section symbols are emitted
- * iff they are present in the input ObjBuilder (typically as
- * SK_SECTION ObjSyms preserved by read_elf, or added explicitly by
- * a hand-built caller that needs to reference a section by sym).
- * This matches clang's output: only sections referenced by section
- * symbols carry one. */
-
- /* Map obj symbol id -> elf symbol index. */
- u32* sym_to_elf = arena_zarray(c->scratch, u32, nobjsym + 2);
-
- /* Two passes over obj symbols: locals, then globals/weak. */
- for (int pass = 0; pass < 2; ++pass) {
- ObjSymIter* it = obj_symiter_new(ob);
- ObjSymEntry e;
- while (obj_symiter_next(it, &e)) {
- const ObjSym* s = e.sym;
- if (s->removed) continue; /* spurious-UNDEF prune + explicit removal */
- int is_local = (s->bind == SB_LOCAL);
- if ((pass == 0) != is_local) continue;
- u32 nlen;
- const char* nm = sym_to_str(c, s->name, &nlen);
- u32 nameoff = nlen ? strtab_add(&strtab, nm, nlen) : 0;
- u8 info =
- ELF64_ST_INFO(sym_bind_to_elf(s->bind), sym_kind_to_elf(s->kind));
- u8 other = sym_vis_to_elf(s->vis);
- u16 shndx = sym_shndx(s, obj_to_elf, nobjsec);
- u64 value = (s->kind == SK_COMMON) ? s->common_align : s->value;
- WRITE_SYM(nsyms, nameoff, info, other, shndx, value, s->size);
- sym_to_elf[e.id] = nsyms;
- nsyms++;
- }
- obj_symiter_free(it);
- }
-#undef WRITE_SYM
-
- /* sh_info on .symtab is the index of the first non-local symbol.
- * Locals = 1 (STN_UNDEF) + count of input-side LOCAL obj symbols. */
- u32 nlocals = 1;
- {
- ObjSymIter* it = obj_symiter_new(ob);
- ObjSymEntry e;
- while (obj_symiter_next(it, &e)) {
- if (e.sym->removed) continue;
- if (e.sym->bind == SB_LOCAL) ++nlocals;
- }
- obj_symiter_free(it);
- }
-
- /* Append .symtab + .strtab + .shstrtab planning records.
- * sh_link/sh_info for .symtab and .rela.* are filled in once we know
- * each section's elf index. */
- u32 idx_symtab = 0, idx_strtab = 0, idx_shstrtab = 0;
-
- /* ---- pass 2.5: synthesize SHT_GROUP sections from ObjGroups ----
- * Append one SHT_GROUP section per ObjGroup. The body is a 4-byte LE
- * flags word followed by the elf section index of each member.
- * Placed before relas so the file layout has data sections, then
- * groups, then relas/symtab/strtab — matching clang's ordering and
- * keeping data-section offsets independent of group presence. */
- u32* group_elf_idx =
- nobjgrp > 1 ? arena_array(c->scratch, u32, nobjgrp) : NULL;
- if (group_elf_idx) memset(group_elf_idx, 0, sizeof(u32) * nobjgrp);
- for (u32 gi = 1; gi < nobjgrp; ++gi) {
- const ObjGroup* g = obj_group_get(ob, gi);
- if (!g || g->removed) continue;
-
- u32 body_size = 4u + 4u * g->nsections;
- u8* body = (u8*)arena_alloc(c->scratch, body_size, _Alignof(u32));
- u32 gflags = g->flags ? g->flags : 1u; /* GRP_COMDAT default */
- body[0] = (u8)(gflags);
- body[1] = (u8)(gflags >> 8);
- body[2] = (u8)(gflags >> 16);
- body[3] = (u8)(gflags >> 24);
- for (u32 j = 0; j < g->nsections; ++j) {
- ObjSecId sid = g->sections[j];
- u32 eidx = (sid && sid < nobjsec) ? obj_to_elf[sid] : 0;
- u8* slot = body + 4 + j * 4;
- slot[0] = (u8)(eidx);
- slot[1] = (u8)(eidx >> 8);
- slot[2] = (u8)(eidx >> 16);
- slot[3] = (u8)(eidx >> 24);
- }
-
- u32 nlen;
- const char* gname = sym_to_str(c, g->name, &nlen);
- if (nlen == 0) {
- gname = ".group";
- nlen = 6;
- }
-
- ElfSec* es = &secs[nsecs];
- memset(es, 0, sizeof *es);
- es->name = gname;
- es->name_len = nlen;
- es->sh_type = SHT_GROUP;
- es->sh_flags = 0;
- es->sh_addralign = 4;
- es->sh_entsize = 4;
- es->sh_info = (g->signature && g->signature < nobjsym + 2)
- ? sym_to_elf[g->signature]
- : 0;
- /* sh_link patched below once idx_symtab is known. */
- es->raw_bytes = body;
- es->sh_size = body_size;
- group_elf_idx[gi] = nsecs;
- nsecs++;
- }
-
- /* ---- pass 3: build .rela.<name> contents ------------------------ */
-
- /* Allocate one .rela section per obj section that has any relocs. */
- u32 total_relocs = obj_reloc_total(ob);
-
- typedef struct RelaPlan {
- u32 obj_section; /* obj section the rela applies to */
- u8* bytes; /* arena-allocated rela bytes */
- u32 size; /* bytes count = nrelocs * 24 */
- } RelaPlan;
-
- RelaPlan* rela_plans = arena_zarray(c->scratch, RelaPlan, nobjsec);
- u32 nrela_plans = 0;
-
- for (u32 si = 1; si < nobjsec; ++si) {
- const Section* host = obj_section_get(ob, si);
- if (!host || host->removed) continue;
- u32 nr = obj_reloc_count(ob, si);
- if (!nr) continue;
- u8* buf = (u8*)arena_alloc(c->scratch, (size_t)ELF64_RELA_SIZE * nr,
- _Alignof(u64));
- u32 j = 0;
- for (u32 i = 0; i < total_relocs; ++i) {
- const Reloc* r = obj_reloc_at(ob, i);
- if (r->removed) continue;
- if (r->section_id != si) continue;
- u32 etype = reloc_to(r->kind);
- if (etype == ELF_R_AARCH64_NONE /* == ELF_R_X86_64_NONE == 0 */ &&
- r->kind != R_NONE) {
- compiler_panic(c, no_loc(),
- "emit_elf: unsupported relocation kind %u for arch %u",
- (u32)r->kind, (u32)c->target.arch);
- }
- u32 sym_elf_idx;
- if (r->sym == OBJ_SYM_NONE) {
- /* Reloc against a section: use the synthesized
- * STT_SECTION symbol if the obj reloc carries a
- * section_id-equivalent; otherwise 0. */
- sym_elf_idx = 0;
- } else {
- sym_elf_idx = sym_to_elf[r->sym];
- }
- u8* slot = &buf[j * ELF64_RELA_SIZE];
- for (int b = 0; b < 8; ++b) slot[b] = (u8)((u64)r->offset >> (b * 8));
- u64 info = ELF64_R_INFO(sym_elf_idx, etype);
- for (int b = 0; b < 8; ++b) slot[8 + b] = (u8)(info >> (b * 8));
- for (int b = 0; b < 8; ++b)
- slot[16 + b] = (u8)((u64)r->addend >> (b * 8));
- ++j;
- }
- rela_plans[nrela_plans].obj_section = si;
- rela_plans[nrela_plans].bytes = buf;
- rela_plans[nrela_plans].size = nr * ELF64_RELA_SIZE;
- nrela_plans++;
- }
-
- /* Append ElfSec entries for each .rela.<name>. Names are ".rela" +
- * the obj section name; allocate in scratch. */
- u32* rela_elf_idx = arena_array(c->scratch, u32, nrela_plans + 1);
- for (u32 ri = 0; ri < nrela_plans; ++ri) {
- u32 si = rela_plans[ri].obj_section;
- const Section* s = obj_section_get(ob, si);
- u32 base_len;
- const char* base = sym_to_str(c, s->name, &base_len);
- u32 nlen = 5 + base_len; /* ".rela" + base */
- char* nm = (char*)arena_alloc(c->scratch, nlen + 1, 1);
- memcpy(nm, ".rela", 5);
- memcpy(nm + 5, base, base_len);
- nm[nlen] = 0;
-
- ElfSec* es = &secs[nsecs];
- memset(es, 0, sizeof *es);
- es->name = nm;
- es->name_len = nlen;
- es->sh_type = SHT_RELA;
- es->sh_flags = SHF_INFO_LINK;
- es->sh_addralign = 8;
- es->sh_entsize = ELF64_RELA_SIZE;
- es->sh_info = obj_to_elf[si]; /* section the relas apply to */
- /* sh_link filled below once we know symtab's elf index. */
- es->raw_bytes = rela_plans[ri].bytes;
- es->sh_size = rela_plans[ri].size;
- rela_elf_idx[ri] = nsecs;
- nsecs++;
- }
-
- /* Append .symtab. */
- {
- ElfSec* es = &secs[nsecs];
- memset(es, 0, sizeof *es);
- es->name = ".symtab";
- es->name_len = 7;
- es->sh_type = SHT_SYMTAB;
- es->sh_flags = 0;
- es->sh_addralign = 8;
- es->sh_entsize = ELF64_SYM_SIZE;
- es->raw_bytes = symtab;
- es->sh_size = (u64)nsyms * ELF64_SYM_SIZE;
- es->sh_info = nlocals; /* first non-local symbol */
- idx_symtab = nsecs;
- nsecs++;
- }
-
- /* Patch sh_link on each .rela section now that we have idx_symtab. */
- for (u32 ri = 0; ri < nrela_plans; ++ri) {
- secs[rela_elf_idx[ri]].sh_link = idx_symtab;
- }
- /* SHT_GROUP also points its sh_link at .symtab (the symtab the
- * signature symbol's index in sh_info refers to). */
- for (u32 gi = 1; gi < nobjgrp; ++gi) {
- if (group_elf_idx && group_elf_idx[gi]) {
- secs[group_elf_idx[gi]].sh_link = idx_symtab;
- }
- }
-
- /* ---- pass 4: append section names to the same strtab and emit it.
- *
- * clang reuses .strtab for both symbol names and section names —
- * e_shstrndx and .symtab.sh_link both point at it. Match that
- * convention: continue appending into `strtab` (which already
- * contains the symbol names), then emit one STRTAB section. */
-
- /* secs[0] (SHN_UNDEF) carries name "" → offset 0. */
- secs[0].sh_name = 0;
- for (u32 i = 1; i < nsecs; ++i) {
- secs[i].sh_name = strtab_add(&strtab, secs[i].name, secs[i].name_len);
- }
-
- /* Append the .strtab section record itself; its own name lands in
- * the same buffer (so the strtab is self-describing). */
- {
- const char* nm = ".strtab";
- u32 nlen = 7;
- u32 nameoff = strtab_add(&strtab, nm, nlen);
- u32 sz = buf_pos(&strtab);
- u8* flat = (u8*)arena_alloc(c->scratch, sz, 1);
- buf_flatten(&strtab, flat);
- buf_fini(&strtab);
-
- ElfSec* es = &secs[nsecs];
- memset(es, 0, sizeof *es);
- es->name = nm;
- es->name_len = nlen;
- es->sh_name = nameoff;
- es->sh_type = SHT_STRTAB;
- es->sh_addralign = 1;
- es->raw_bytes = flat;
- es->sh_size = sz;
- idx_strtab = nsecs;
- idx_shstrtab = nsecs; /* same section serves both roles */
- nsecs++;
- }
- secs[idx_symtab].sh_link = idx_strtab;
-
- /* ---- pass 5: assign file offsets -------------------------------- */
-
- u64 cur = ELF64_EHDR_SIZE;
- for (u32 i = 1; i < nsecs; ++i) {
- ElfSec* es = &secs[i];
- if (es->is_nobits) {
- /* sh_offset for NOBITS is conventionally where the next
- * non-NOBITS section begins; we set it to cur without
- * advancing. */
- es->sh_offset = cur;
- continue;
- }
- u64 a = es->sh_addralign ? es->sh_addralign : 1;
- cur = ALIGN_UP(cur, a);
- es->sh_offset = cur;
- cur += es->sh_size;
- }
- cur = ALIGN_UP(cur, (u64)8);
- u64 e_shoff = cur;
-
- /* ---- pass 6: write Ehdr ----------------------------------------- */
-
- u8 ident[EI_NIDENT] = {0};
- ident[EI_MAG0] = ELFMAG0;
- ident[EI_MAG1] = ELFMAG1;
- ident[EI_MAG2] = ELFMAG2;
- ident[EI_MAG3] = ELFMAG3;
- ident[EI_CLASS] = ELFCLASS64;
- ident[EI_DATA] = ELFDATA2LSB;
- ident[EI_VERSION] = EV_CURRENT;
- /* SysV is the canonical OSABI for relocatable AArch64 .o; clang and
- * GNU ld both emit it for Linux targets. Linking does not key off
- * EI_OSABI for plain AArch64 ELF — it's e_machine that matters.
- *
- * Exception: GNU extensions (STT_GNU_IFUNC, SHF_GNU_RETAIN, ...)
- * require EI_OSABI=ELFOSABI_GNU. Clang sets it for any TU using a
- * GNU-flavored marker; we mirror that so roundtrip is byte-stable. */
- ident[EI_OSABI] = ELFOSABI_NONE;
- {
- ObjSymIter* it = obj_symiter_new(ob);
- ObjSymEntry e;
- u32 nsec = obj_section_count(ob), si;
- while (obj_symiter_next(it, &e)) {
- if (e.sym->removed) continue;
- if (e.sym->kind == SK_IFUNC) {
- ident[EI_OSABI] = ELFOSABI_GNU;
- break;
- }
- }
- obj_symiter_free(it);
- if (ident[EI_OSABI] != ELFOSABI_GNU) {
- for (si = 1; si < nsec; ++si) {
- const Section* sec = obj_section_get(ob, si);
- if (sec && !sec->removed && (sec->flags & SF_RETAIN)) {
- ident[EI_OSABI] = ELFOSABI_GNU;
- break;
- }
- }
- }
- }
- /* e_flags: prefer the value preserved from a prior read (round-trip);
- * else synthesize a sensible per-arch default. RV64 cfree targets the
- * Linux psABI's lp64d soft-relax convention (RVC + double-float ABI). */
- u32 e_flags;
- if (!obj_get_elf_e_flags(ob, &e_flags)) e_flags = elf->e_flags;
-
- cfree_writer_seek(w, 0);
- cfree_writer_write(w, ident, EI_NIDENT);
- elf_wr_u16(w, ET_REL);
- elf_wr_u16(w, (u16)e_machine);
- elf_wr_u32(w, EV_CURRENT);
- elf_wr_u64(w, 0); /* e_entry */
- elf_wr_u64(w, 0); /* e_phoff */
- elf_wr_u64(w, e_shoff); /* e_shoff */
- elf_wr_u32(w, e_flags); /* e_flags */
- elf_wr_u16(w, ELF64_EHDR_SIZE); /* e_ehsize */
- elf_wr_u16(w, 0); /* e_phentsize */
- elf_wr_u16(w, 0); /* e_phnum */
- elf_wr_u16(w, ELF64_SHDR_SIZE); /* e_shentsize */
- elf_wr_u16(w, (u16)nsecs); /* e_shnum */
- elf_wr_u16(w, (u16)idx_shstrtab); /* e_shstrndx */
-
- /* ---- pass 7: write each section's bytes ------------------------- */
-
- for (u32 i = 1; i < nsecs; ++i) {
- ElfSec* es = &secs[i];
- if (es->is_nobits || es->sh_size == 0) continue;
- cfree_writer_seek(w, es->sh_offset);
- if (es->obj_bytes) {
- u32 sz = es->obj_bytes->total;
- u8* tmp = (u8*)h->alloc(h, sz ? sz : 1, 1);
- if (sz) buf_flatten(es->obj_bytes, tmp);
- cfree_writer_write(w, tmp, sz);
- h->free(h, tmp, sz ? sz : 1);
- } else if (es->raw_bytes) {
- cfree_writer_write(w, es->raw_bytes, (size_t)es->sh_size);
- }
- }
-
- /* ---- pass 8: write section header table ------------------------- */
-
- cfree_writer_seek(w, e_shoff);
- for (u32 i = 0; i < nsecs; ++i) {
- const ElfSec* es = &secs[i];
- elf_wr_u32(w, es->sh_name);
- elf_wr_u32(w, es->sh_type);
- elf_wr_u64(w, es->sh_flags);
- elf_wr_u64(w, es->sh_addr);
- elf_wr_u64(w, es->sh_offset);
- elf_wr_u64(w, es->sh_size);
- elf_wr_u32(w, es->sh_link);
- elf_wr_u32(w, es->sh_info);
- elf_wr_u64(w, es->sh_addralign);
- elf_wr_u64(w, es->sh_entsize);
- }
-}
diff --git a/src/obj/elf_read.c b/src/obj/elf_read.c
@@ -1,684 +0,0 @@
-/* ELF ET_REL reader. Parses a 64-bit little-endian relocatable object
- * back into a fresh ObjBuilder. The post-finalize ObjBuilder shape is
- * the canonical superset doc/DESIGN.md §5.5 promises: read_elf of an
- * emit_elf output produces an ObjBuilder equivalent to the writer's
- * input, modulo (a) section ordering and (b) STT_SECTION symbols
- * synthesized by the writer.
- *
- * Scope: AArch64 little-endian. Other archs / endianness produce a
- * compiler_panic with a diagnostic. */
-
-#include <string.h>
-
-#include "arch/arch.h"
-#include "core/heap.h"
-#include "core/pool.h"
-#include "core/slice.h"
-#include "obj/elf.h"
-
-static SrcLoc no_loc(void) {
- SrcLoc l = {0, 0, 0};
- return l;
-}
-
-/* ---- shdr scratch struct ---- */
-
-typedef struct ShdrRec {
- u32 sh_name;
- u32 sh_type;
- u64 sh_flags;
- u64 sh_addr;
- u64 sh_offset;
- u64 sh_size;
- u32 sh_link;
- u32 sh_info;
- u64 sh_addralign;
- u64 sh_entsize;
-} ShdrRec;
-
-static void parse_shdr(const u8* p, ShdrRec* out) {
- out->sh_name = elf_rd_u32(p + 0);
- out->sh_type = elf_rd_u32(p + 4);
- out->sh_flags = elf_rd_u64(p + 8);
- out->sh_addr = elf_rd_u64(p + 16);
- out->sh_offset = elf_rd_u64(p + 24);
- out->sh_size = elf_rd_u64(p + 32);
- out->sh_link = elf_rd_u32(p + 40);
- out->sh_info = elf_rd_u32(p + 44);
- out->sh_addralign = elf_rd_u64(p + 48);
- out->sh_entsize = elf_rd_u64(p + 56);
-}
-
-/* ---- mappers ---- */
-
-/* The bits this function maps to SecFlag — anything outside this mask is
- * treated as opaque and stashed in Section.ext_flags by the caller so the
- * emitter can write it back unchanged. Examples of bits left over:
- * SHF_EXCLUDE (0x80000000) on .llvm_addrsig, SHF_COMPRESSED (0x800) on
- * compressed .debug_*, SHF_INFO_LINK (0x40) on .rela.* sections. */
-#define ELF_KNOWN_FLAGS_MASK \
- ((u64)(SHF_ALLOC | SHF_EXECINSTR | SHF_WRITE | SHF_TLS | SHF_MERGE | \
- SHF_STRINGS | SHF_GROUP | SHF_LINK_ORDER | SHF_GNU_RETAIN))
-
-static u16 elf_flags_to_obj(u64 f) {
- u16 r = 0;
- if (f & SHF_ALLOC) r |= SF_ALLOC;
- if (f & SHF_EXECINSTR) r |= SF_EXEC;
- if (f & SHF_WRITE) r |= SF_WRITE;
- if (f & SHF_TLS) r |= SF_TLS;
- if (f & SHF_MERGE) r |= SF_MERGE;
- if (f & SHF_STRINGS) r |= SF_STRINGS;
- if (f & SHF_GROUP) r |= SF_GROUP;
- if (f & SHF_LINK_ORDER) r |= SF_LINK_ORDER;
- if (f & SHF_GNU_RETAIN) r |= SF_RETAIN;
- return r;
-}
-
-/* Map ELF sh_type -> SecSem. Sets *known to 1 if the value is one of
- * the canonical types the cfree model knows about; 0 means the caller
- * fell through to the SSEM_PROGBITS fallback and should preserve the
- * raw sh_type via Section.ext_type so emit_elf can write it back. */
-static u16 elf_type_to_sem(u32 t, int* known) {
- *known = 1;
- switch (t) {
- case SHT_PROGBITS:
- return SSEM_PROGBITS;
- case SHT_NOBITS:
- return SSEM_NOBITS;
- case SHT_SYMTAB:
- return SSEM_SYMTAB;
- case SHT_STRTAB:
- return SSEM_STRTAB;
- case SHT_RELA:
- return SSEM_RELA;
- case SHT_REL:
- return SSEM_REL;
- case SHT_NOTE:
- return SSEM_NOTE;
- case SHT_INIT_ARRAY:
- return SSEM_INIT_ARRAY;
- case SHT_FINI_ARRAY:
- return SSEM_FINI_ARRAY;
- case SHT_PREINIT_ARRAY:
- return SSEM_PREINIT_ARRAY;
- case SHT_GROUP:
- return SSEM_GROUP;
- default:
- *known = 0;
- return SSEM_PROGBITS;
- }
-}
-
-static u16 elf_kind_from_name(const char* name, u32 nlen, u64 sh_flags,
- u32 sh_type) {
- if (sh_type == SHT_NOBITS) return SEC_BSS;
- if (nlen >= 5 && memcmp(name, ".text", 5) == 0) return SEC_TEXT;
- if (nlen >= 7 && memcmp(name, ".rodata", 7) == 0) return SEC_RODATA;
- if (nlen >= 5 && memcmp(name, ".data", 5) == 0) return SEC_DATA;
- if (nlen >= 4 && memcmp(name, ".bss", 4) == 0) return SEC_BSS;
- if (nlen >= 7 && memcmp(name, ".debug_", 7) == 0) return SEC_DEBUG;
- /* Fallback: classify by flags. */
- if (sh_flags & SHF_EXECINSTR) return SEC_TEXT;
- if (sh_flags & SHF_WRITE) return SEC_DATA;
- if (sh_flags & SHF_ALLOC) return SEC_RODATA;
- return SEC_OTHER;
-}
-
-static u16 elf_bind_to_obj(u32 b) {
- switch (b) {
- case STB_GLOBAL:
- return SB_GLOBAL;
- case STB_WEAK:
- return SB_WEAK;
- default:
- return SB_LOCAL;
- }
-}
-
-static u16 elf_type_to_kind(u32 t, u16 shndx) {
- if (shndx == SHN_UNDEF) return SK_UNDEF;
- if (shndx == SHN_COMMON) return SK_COMMON;
- /* SHN_ABS is the convention for STT_FILE and a few other defined
- * symbols whose value is not an address. Don't smother the type
- * with SK_ABS when the type field carries real information — only
- * fall through to SK_ABS for STT_NOTYPE-at-SHN_ABS. */
- if (shndx == SHN_ABS && t == STT_NOTYPE) return SK_ABS;
- switch (t) {
- case STT_FUNC:
- return SK_FUNC;
- case STT_OBJECT:
- return SK_OBJ;
- case STT_SECTION:
- return SK_SECTION;
- case STT_FILE:
- return SK_FILE;
- case STT_TLS:
- return SK_TLS;
- case STT_COMMON:
- return SK_COMMON;
- case STT_GNU_IFUNC:
- return SK_IFUNC;
- default:
- /* STT_NOTYPE on a defined symbol (e.g. AArch64 mapping symbols
- * `$x` / `$d`, or assembly labels) round-trips as SK_NOTYPE.
- * The linker keeps definedness keyed on SK_UNDEF; SK_NOTYPE is
- * "defined but typeless". */
- return SK_NOTYPE;
- }
-}
-
-static u8 elf_other_to_vis(u32 other) {
- switch (other & 3) {
- case STV_HIDDEN:
- return SV_HIDDEN;
- case STV_PROTECTED:
- return SV_PROTECTED;
- case STV_INTERNAL:
- return SV_INTERNAL;
- default:
- return SV_DEFAULT;
- }
-}
-
-/* Bounds-checked C-string slice from a strtab section. Returns "" on
- * out-of-range so callers don't have to special-case it. `len_out` is
- * set to the result's byte length. */
-static const char* strtab_lookup(const u8* tab, u64 tab_size, u32 off,
- u32* len_out) {
- if (off >= tab_size) {
- *len_out = 0;
- return "";
- }
- const char* s = (const char*)(tab + off);
- u32 max = (u32)(tab_size - off);
- u32 n = 0;
- while (n < max && s[n] != '\0') ++n;
- *len_out = n;
- return s;
-}
-
-ObjBuilder* read_elf(Compiler* c, const char* name, const u8* data,
- size_t len) {
- (void)name;
-
- if (len < ELF64_EHDR_SIZE)
- compiler_panic(c, no_loc(), "read_elf: input shorter than ELF header");
-
- if (data[EI_MAG0] != ELFMAG0 || data[EI_MAG1] != ELFMAG1 ||
- data[EI_MAG2] != ELFMAG2 || data[EI_MAG3] != ELFMAG3)
- compiler_panic(c, no_loc(), "read_elf: bad ELF magic");
-
- if (data[EI_CLASS] != ELFCLASS64)
- compiler_panic(c, no_loc(), "read_elf: not ELFCLASS64 (got %u)",
- data[EI_CLASS]);
- if (data[EI_DATA] != ELFDATA2LSB)
- compiler_panic(c, no_loc(), "read_elf: not ELFDATA2LSB (got %u)",
- data[EI_DATA]);
-
- u16 e_type = elf_rd_u16(data + 16);
- if (e_type != ET_REL)
- compiler_panic(
- c, no_loc(),
- "read_elf: only ET_REL inputs are accepted by read_elf "
- "(got e_type=%u); use read_elf_dso for ET_DYN shared objects",
- (u32)e_type);
-
- u16 e_machine = elf_rd_u16(data + 18);
- const ArchImpl* arch = arch_lookup_elf_machine(e_machine);
- u32 (*reloc_from)(u32);
- if (!arch || !arch->elf || !arch->elf->reloc_from) {
- compiler_panic(c, no_loc(), "read_elf: unsupported e_machine 0x%x",
- (u32)e_machine);
- }
- reloc_from = arch->elf->reloc_from;
-
- u64 e_shoff = elf_rd_u64(data + 40);
- u32 e_flags = elf_rd_u32(data + 48);
- u16 e_shentsize = elf_rd_u16(data + 58);
- u16 e_shnum = elf_rd_u16(data + 60);
- u16 e_shstrndx = elf_rd_u16(data + 62);
-
- if (e_shentsize != ELF64_SHDR_SIZE)
- compiler_panic(c, no_loc(), "read_elf: unexpected e_shentsize %u",
- (u32)e_shentsize);
- if (e_shoff + (u64)e_shnum * ELF64_SHDR_SIZE > len)
- compiler_panic(c, no_loc(), "read_elf: section header table out of range");
- if (e_shstrndx >= e_shnum)
- compiler_panic(c, no_loc(), "read_elf: e_shstrndx %u >= e_shnum %u",
- (u32)e_shstrndx, (u32)e_shnum);
-
- /* Parse all shdrs into scratch. */
- ShdrRec* shdrs = arena_array(c->scratch, ShdrRec, e_shnum);
- for (u32 i = 0; i < e_shnum; ++i)
- parse_shdr(data + e_shoff + (u64)i * ELF64_SHDR_SIZE, &shdrs[i]);
-
- const ShdrRec* shstr_sh = &shdrs[e_shstrndx];
- if (shstr_sh->sh_offset + shstr_sh->sh_size > len)
- compiler_panic(c, no_loc(), "read_elf: .shstrtab out of range");
- const u8* shstrtab = data + shstr_sh->sh_offset;
- u64 shstrtab_sz = shstr_sh->sh_size;
-
- /* Build the ObjBuilder. */
- ObjBuilder* ob = obj_new(c);
- if (!ob) compiler_panic(c, no_loc(), "read_elf: obj_new failed");
- obj_set_elf_e_flags(ob, e_flags);
-
- /* elf_to_obj[shndx] -> ObjSecId, OBJ_SEC_NONE for skipped sections. */
- u32* elf_to_obj = arena_zarray(c->scratch, u32, e_shnum);
-
- /* Pass 1: create obj sections for every non-NULL shdr that carries
- * load-bearing model state. SYMTAB / STRTAB / RELA / REL are
- * consumed below for symbols and relocations and do NOT round-trip
- * as obj sections — emit_elf re-synthesizes them from the
- * ObjBuilder's symbols / strtab / relocs. The shstrtab is a STRTAB
- * too, so it falls out the same way. */
- for (u32 i = 1; i < e_shnum; ++i) {
- const ShdrRec* sh = &shdrs[i];
- if (sh->sh_type == SHT_NULL) continue;
- if (sh->sh_type == SHT_SYMTAB) continue;
- if (sh->sh_type == SHT_STRTAB) continue;
- if (sh->sh_type == SHT_RELA) continue;
- if (sh->sh_type == SHT_REL) continue;
- /* SHT_GROUP is consumed below into an ObjGroup record (signature
- * symbol + member ObjSecIds). emit_elf re-synthesizes the group
- * section bytes from the ObjGroup, using current section indices
- * — so the original section's raw body would be stale anyway. */
- if (sh->sh_type == SHT_GROUP) continue;
-
- u32 nlen;
- const char* nm = strtab_lookup(shstrtab, shstrtab_sz, sh->sh_name, &nlen);
- Sym sym = pool_intern_slice(c->global, (Slice){ .s = nm, .len = nlen });
-
- u16 sec_kind = elf_kind_from_name(nm, nlen, sh->sh_flags, sh->sh_type);
- int type_known;
- u16 sec_sem = elf_type_to_sem(sh->sh_type, &type_known);
- u16 flags = elf_flags_to_obj(sh->sh_flags);
- u32 align = sh->sh_addralign ? (u32)sh->sh_addralign : 1;
-
- ObjSecId id =
- obj_section_ex(ob, sym, (SecKind)sec_kind, (SecSem)sec_sem, flags,
- align, (u32)sh->sh_entsize, sh->sh_link, sh->sh_info);
- if (id == OBJ_SEC_NONE)
- compiler_panic(c, no_loc(), "read_elf: obj_section_ex failed for '%.*s'",
- SLICE_ARG(((Slice){.s = nm, .len = nlen})));
- elf_to_obj[i] = id;
-
- /* Preserve format-specific bits the canonical SecSem/SecFlag
- * mapping can't represent so emit_elf can write them back
- * verbatim. ext_type only set when the sh_type fell through
- * to the "unknown" path. */
- u32 leftover = (u32)(sh->sh_flags & ~ELF_KNOWN_FLAGS_MASK);
- if (!type_known || leftover) {
- obj_section_set_ext(ob, id, OBJ_EXT_ELF, type_known ? 0 : sh->sh_type,
- leftover);
- }
-
- /* Body bytes. */
- if (sh->sh_type == SHT_NOBITS) {
- obj_reserve_bss(ob, id, (u32)sh->sh_size, align);
- } else if (sh->sh_size) {
- if (sh->sh_offset + sh->sh_size > len)
- compiler_panic(c, no_loc(),
- "read_elf: section '%.*s' bytes out of range",
- SLICE_ARG(((Slice){.s = nm, .len = nlen})));
- /* For SYMTAB/STRTAB/RELA we still copy the raw bytes — the
- * post-finalize shape contract says these sections are
- * present; emit_elf will regenerate them on re-emit, so the
- * preserved bytes are informational rather than load-bearing.
- */
- obj_write(ob, id, data + sh->sh_offset, (size_t)sh->sh_size);
- }
- }
-
- /* Pass 2: parse the .symtab into ObjSyms, building an
- * elf_sym_idx -> ObjSymId table. There may be zero or one SYMTAB in
- * an ET_REL; pick the first. */
- u32 symtab_shndx = 0;
- for (u32 i = 1; i < e_shnum; ++i) {
- if (shdrs[i].sh_type == SHT_SYMTAB) {
- symtab_shndx = i;
- break;
- }
- }
-
- u32 nsyms = 0;
- u32* sym_elf_to_obj = NULL;
-
- if (symtab_shndx) {
- const ShdrRec* sh = &shdrs[symtab_shndx];
- if (sh->sh_entsize != ELF64_SYM_SIZE)
- compiler_panic(c, no_loc(), "read_elf: .symtab entsize %llu != %u",
- (unsigned long long)sh->sh_entsize, (u32)ELF64_SYM_SIZE);
- if (sh->sh_size % ELF64_SYM_SIZE)
- compiler_panic(c, no_loc(),
- "read_elf: .symtab size %llu not a multiple of %u",
- (unsigned long long)sh->sh_size, (u32)ELF64_SYM_SIZE);
- if (sh->sh_link >= e_shnum)
- compiler_panic(c, no_loc(), "read_elf: .symtab sh_link %u out of range",
- sh->sh_link);
- const ShdrRec* str_sh = &shdrs[sh->sh_link];
- if (str_sh->sh_offset + str_sh->sh_size > len)
- compiler_panic(c, no_loc(), "read_elf: .strtab out of range");
- const u8* strtab = data + str_sh->sh_offset;
- u64 strtab_sz = str_sh->sh_size;
-
- nsyms = (u32)(sh->sh_size / ELF64_SYM_SIZE);
- sym_elf_to_obj = arena_zarray(c->scratch, u32, nsyms ? nsyms : 1);
-
- const u8* base = data + sh->sh_offset;
- for (u32 i = 1; i < nsyms; ++i) { /* skip index 0 */
- const u8* p = base + (u64)i * ELF64_SYM_SIZE;
- u32 st_name = elf_rd_u32(p + 0);
- u8 st_info = p[4];
- u8 st_other = p[5];
- u16 st_shndx = elf_rd_u16(p + 6);
- u64 st_value = elf_rd_u64(p + 8);
- u64 st_size = elf_rd_u64(p + 16);
-
- u32 nlen;
- const char* nm = strtab_lookup(strtab, strtab_sz, st_name, &nlen);
- Sym sn = nlen ? pool_intern_slice(c->global, (Slice){ .s = nm, .len = nlen }) : 0;
-
- u32 e_bind = ELF64_ST_BIND(st_info);
- u32 e_type = ELF64_ST_TYPE(st_info);
- u16 bind = elf_bind_to_obj(e_bind);
- u16 kind = elf_type_to_kind(e_type, st_shndx);
- u8 vis = elf_other_to_vis(st_other);
-
- ObjSecId sec_id;
- u64 value;
- u64 cmnalign = 0;
- if (st_shndx == SHN_UNDEF) {
- sec_id = OBJ_SEC_NONE;
- value = st_value;
- } else if (st_shndx == SHN_ABS || st_shndx == SHN_COMMON) {
- sec_id = OBJ_SEC_NONE;
- value = st_value;
- if (st_shndx == SHN_COMMON) cmnalign = st_value;
- } else if (st_shndx < e_shnum) {
- sec_id = elf_to_obj[st_shndx];
- value = st_value;
- } else {
- compiler_panic(c, no_loc(), "read_elf: symbol shndx %u out of range",
- (u32)st_shndx);
- sec_id = OBJ_SEC_NONE;
- value = 0; /* unreachable */
- }
-
- ObjSymId id =
- obj_symbol_ex(ob, sn, (SymBind)bind, (SymVis)vis, (SymKind)kind,
- sec_id, value, st_size, cmnalign);
- obj_sym_mark_referenced(ob, id);
- sym_elf_to_obj[i] = id;
- }
- }
-
- /* Pass 3: parse each SHT_RELA / SHT_REL into ObjBuilder relocations
- * targeting the section the rela header's sh_info points at. */
- for (u32 i = 1; i < e_shnum; ++i) {
- const ShdrRec* sh = &shdrs[i];
- int is_rela = (sh->sh_type == SHT_RELA);
- int is_rel = (sh->sh_type == SHT_REL);
- if (!is_rela && !is_rel) continue;
-
- u32 entsize = is_rela ? ELF64_RELA_SIZE : 16;
- if (sh->sh_entsize != entsize)
- compiler_panic(c, no_loc(), "read_elf: rela entsize %llu != %u",
- (unsigned long long)sh->sh_entsize, entsize);
- if (sh->sh_info == 0 || sh->sh_info >= e_shnum)
- compiler_panic(c, no_loc(), "read_elf: rela sh_info %u out of range",
- sh->sh_info);
- ObjSecId target = elf_to_obj[sh->sh_info];
- if (target == OBJ_SEC_NONE) continue;
-
- u32 nrec = (u32)(sh->sh_size / entsize);
- const u8* base = data + sh->sh_offset;
- for (u32 j = 0; j < nrec; ++j) {
- const u8* p = base + (u64)j * entsize;
- u64 r_offset = elf_rd_u64(p + 0);
- u64 r_info = elf_rd_u64(p + 8);
- i64 r_addend = is_rela ? (i64)elf_rd_u64(p + 16) : 0;
- u32 esym = ELF64_R_SYM(r_info);
- u32 etype = ELF64_R_TYPE(r_info);
-
- u32 kind = reloc_from(etype);
- if (kind == (u32)-1)
- compiler_panic(c, no_loc(),
- "read_elf: unsupported reloc type %u for e_machine 0x%x",
- etype, (u32)e_machine);
-
- ObjSymId target_sym = OBJ_SYM_NONE;
- if (esym && sym_elf_to_obj && esym < nsyms)
- target_sym = sym_elf_to_obj[esym];
-
- obj_reloc_ex(ob, target, (u32)r_offset, (RelocKind)kind, target_sym,
- r_addend, is_rela ? 1 : 0, 0);
- }
- }
-
- /* Pass 4: SHT_GROUP. Each GROUP section's body is a sequence of
- * 4-byte LE indices: [flags, shndx, shndx, ...]. The signature is
- * the symbol named by sh_link/sh_info convention (sh_link=symtab,
- * sh_info=symbol index in that symtab). */
- for (u32 i = 1; i < e_shnum; ++i) {
- const ShdrRec* sh = &shdrs[i];
- if (sh->sh_type != SHT_GROUP) continue;
-
- if (sh->sh_size < 4 || (sh->sh_size % 4)) continue;
- const u8* p = data + sh->sh_offset;
- u32 flags = elf_rd_u32(p);
- u32 nm_len;
- const char* gnm =
- strtab_lookup(shstrtab, shstrtab_sz, sh->sh_name, &nm_len);
- Sym gname = pool_intern_slice(c->global, (Slice){ .s = gnm, .len = nm_len });
-
- ObjSymId signature = OBJ_SYM_NONE;
- if (sym_elf_to_obj && sh->sh_info < nsyms)
- signature = sym_elf_to_obj[sh->sh_info];
-
- ObjGroupId gid = obj_group(ob, gname, signature, flags);
- u32 n = (u32)(sh->sh_size / 4) - 1;
- for (u32 j = 0; j < n; ++j) {
- u32 shndx = elf_rd_u32(p + 4 + j * 4);
- if (shndx < e_shnum && elf_to_obj[shndx] != OBJ_SEC_NONE)
- obj_group_add_section(ob, gid, elf_to_obj[shndx]);
- }
- }
-
- obj_finalize(ob);
- return ob;
-}
-
-/* ---- ET_DYN (shared object) reader ----
- *
- * Produces an ObjBuilder containing only the DSO's exported symbols
- * (parsed from .dynsym, not .symtab). The DSO's sections, relocations,
- * and groups are skipped — DSOs contribute no bytes to the output
- * image. The DT_SONAME (if any) is interned and returned via
- * `*soname_out` so the caller can record DT_NEEDED at link time.
- *
- * Symbol shape: each defined dynsym entry produces an ObjSym whose
- * (bind, kind, vis) match the source. `section_id` is OBJ_SEC_NONE —
- * the symbol's value is its DSO-internal vaddr, not meaningful to the
- * consuming linker, so we record `value=0`. The linker layer
- * (resolve_undefs) only consults the name and the defined-ness flag.
- *
- * Undefined dynsym entries (st_shndx==SHN_UNDEF) are imports the DSO
- * itself has against other libraries; they're not relevant to a
- * consumer that's linking against this DSO and are dropped. */
-
-static int parse_phdr(const u8* data, size_t len, u64 e_phoff, u16 e_phentsize,
- u16 e_phnum, u32 want_type, u64* out_offset,
- u64* out_filesz) {
- u32 i;
- if (e_phentsize != ELF64_PHDR_SIZE) return 0;
- if (e_phoff + (u64)e_phnum * ELF64_PHDR_SIZE > len) return 0;
- for (i = 0; i < e_phnum; ++i) {
- const u8* p = data + e_phoff + (u64)i * ELF64_PHDR_SIZE;
- u32 p_type = elf_rd_u32(p + 0);
- if (p_type != want_type) continue;
- *out_offset = elf_rd_u64(p + 8);
- *out_filesz = elf_rd_u64(p + 32);
- return 1;
- }
- return 0;
-}
-
-ObjBuilder* read_elf_dso(Compiler* c, const char* name, const u8* data,
- size_t len, Sym* soname_out) {
- (void)name;
- if (soname_out) *soname_out = 0;
-
- if (len < ELF64_EHDR_SIZE)
- compiler_panic(c, no_loc(), "read_elf_dso: input shorter than ELF header");
- if (data[EI_MAG0] != ELFMAG0 || data[EI_MAG1] != ELFMAG1 ||
- data[EI_MAG2] != ELFMAG2 || data[EI_MAG3] != ELFMAG3)
- compiler_panic(c, no_loc(), "read_elf_dso: bad ELF magic");
- if (data[EI_CLASS] != ELFCLASS64)
- compiler_panic(c, no_loc(), "read_elf_dso: not ELFCLASS64");
- if (data[EI_DATA] != ELFDATA2LSB)
- compiler_panic(c, no_loc(), "read_elf_dso: not ELFDATA2LSB");
-
- u16 e_type = elf_rd_u16(data + 16);
- if (e_type != ET_DYN)
- compiler_panic(c, no_loc(), "read_elf_dso: expected ET_DYN, got e_type=%u",
- (u32)e_type);
-
- u16 e_machine = elf_rd_u16(data + 18);
- if (!arch_lookup_elf_machine(e_machine))
- compiler_panic(c, no_loc(), "read_elf_dso: unsupported e_machine 0x%x",
- (u32)e_machine);
-
- u64 e_phoff = elf_rd_u64(data + 32);
- u64 e_shoff = elf_rd_u64(data + 40);
- u16 e_phentsize = elf_rd_u16(data + 54);
- u16 e_phnum = elf_rd_u16(data + 56);
- u16 e_shentsize = elf_rd_u16(data + 58);
- u16 e_shnum = elf_rd_u16(data + 60);
- u16 e_shstrndx = elf_rd_u16(data + 62);
-
- if (e_shentsize != ELF64_SHDR_SIZE)
- compiler_panic(c, no_loc(), "read_elf_dso: unexpected e_shentsize %u",
- (u32)e_shentsize);
- if (e_shoff + (u64)e_shnum * ELF64_SHDR_SIZE > len)
- compiler_panic(c, no_loc(),
- "read_elf_dso: section header table out of range");
- if (e_shstrndx >= e_shnum)
- compiler_panic(c, no_loc(), "read_elf_dso: e_shstrndx out of range");
-
- ShdrRec* shdrs = arena_array(c->scratch, ShdrRec, e_shnum);
- for (u32 i = 0; i < e_shnum; ++i)
- parse_shdr(data + e_shoff + (u64)i * ELF64_SHDR_SIZE, &shdrs[i]);
-
- /* Locate .dynsym (preferred over .symtab — a stripped DSO carries
- * only .dynsym) and its associated strtab via sh_link. */
- u32 dynsym_idx = 0, dynamic_idx = 0;
- for (u32 i = 1; i < e_shnum; ++i) {
- if (shdrs[i].sh_type == SHT_DYNSYM && !dynsym_idx) dynsym_idx = i;
- if (shdrs[i].sh_type == SHT_DYNAMIC && !dynamic_idx) dynamic_idx = i;
- }
-
- if (!dynsym_idx)
- compiler_panic(c, no_loc(), "read_elf_dso: no SHT_DYNSYM in shared object");
-
- /* Parse PT_DYNAMIC for DT_SONAME. The .dynamic section gives us the
- * dynstr to resolve the SONAME's offset; if there's no .dynamic
- * section we fall back to scanning the PT_DYNAMIC segment. */
- Sym soname = 0;
- if (dynamic_idx) {
- const ShdrRec* dsh = &shdrs[dynamic_idx];
- if (dsh->sh_link >= e_shnum)
- compiler_panic(c, no_loc(),
- "read_elf_dso: .dynamic sh_link %u out of range",
- dsh->sh_link);
- const ShdrRec* str_sh = &shdrs[dsh->sh_link];
- if (str_sh->sh_offset + str_sh->sh_size > len)
- compiler_panic(c, no_loc(), "read_elf_dso: .dynamic strtab out of range");
- const u8* dynstr = data + str_sh->sh_offset;
- u64 dynstr_sz = str_sh->sh_size;
-
- if (dsh->sh_offset + dsh->sh_size > len)
- compiler_panic(c, no_loc(), "read_elf_dso: .dynamic body out of range");
- const u8* dynp = data + dsh->sh_offset;
- u64 dynsz = dsh->sh_size;
- /* DT entries are 16 bytes: (d_tag: u64, d_un: u64). */
- for (u64 off = 0; off + 16 <= dynsz; off += 16) {
- u64 tag = elf_rd_u64(dynp + off);
- u64 val = elf_rd_u64(dynp + off + 8);
- if (tag == DT_NULL) break;
- if (tag == DT_SONAME) {
- u32 nlen;
- const char* nm = strtab_lookup(dynstr, dynstr_sz, (u32)val, &nlen);
- if (nlen) soname = pool_intern_slice(c->global, (Slice){ .s = nm, .len = nlen });
- break;
- }
- }
- } else if (e_phnum) {
- /* Fallback: walk PT_DYNAMIC straight from program headers. We
- * only need DT_SONAME, so skip if we can't find a strtab pointer
- * inline (DT_STRTAB carries a vaddr, not a file offset — stripped
- * DSOs without SHT_DYNAMIC are exceedingly rare in practice). */
- u64 dyn_off, dyn_sz;
- (void)parse_phdr(data, len, e_phoff, e_phentsize, e_phnum, PT_DYNAMIC,
- &dyn_off, &dyn_sz);
- }
- if (soname_out) *soname_out = soname;
-
- /* Now parse .dynsym. */
- const ShdrRec* sh = &shdrs[dynsym_idx];
- if (sh->sh_entsize != ELF64_SYM_SIZE)
- compiler_panic(c, no_loc(), "read_elf_dso: .dynsym entsize %llu != %u",
- (unsigned long long)sh->sh_entsize, (u32)ELF64_SYM_SIZE);
- if (sh->sh_size % ELF64_SYM_SIZE)
- compiler_panic(c, no_loc(),
- "read_elf_dso: .dynsym size not multiple of entry size");
- if (sh->sh_link >= e_shnum)
- compiler_panic(c, no_loc(), "read_elf_dso: .dynsym sh_link out of range");
- const ShdrRec* str_sh = &shdrs[sh->sh_link];
- if (str_sh->sh_offset + str_sh->sh_size > len)
- compiler_panic(c, no_loc(), "read_elf_dso: .dynstr out of range");
- const u8* strtab = data + str_sh->sh_offset;
- u64 strtab_sz = str_sh->sh_size;
-
- ObjBuilder* ob = obj_new(c);
- if (!ob) compiler_panic(c, no_loc(), "read_elf_dso: obj_new failed");
-
- u32 nsyms = (u32)(sh->sh_size / ELF64_SYM_SIZE);
- const u8* base = data + sh->sh_offset;
- for (u32 i = 1; i < nsyms; ++i) { /* skip index 0 */
- const u8* p = base + (u64)i * ELF64_SYM_SIZE;
- u32 st_name = elf_rd_u32(p + 0);
- u8 st_info = p[4];
- u8 st_other = p[5];
- u16 st_shndx = elf_rd_u16(p + 6);
-
- /* Skip the DSO's own undefined imports — they don't satisfy any
- * undef in our consumer. Locals (STB_LOCAL) likewise aren't
- * exported and would only confuse the resolver. */
- if (st_shndx == SHN_UNDEF) continue;
- u32 e_bind = ELF64_ST_BIND(st_info);
- if (e_bind == STB_LOCAL) continue;
-
- u32 nlen;
- const char* nm = strtab_lookup(strtab, strtab_sz, st_name, &nlen);
- if (!nlen) continue;
- Sym sn = pool_intern_slice(c->global, (Slice){ .s = nm, .len = nlen });
-
- u32 e_type_field = ELF64_ST_TYPE(st_info);
- u16 bind = elf_bind_to_obj(e_bind);
- u16 kind = elf_type_to_kind(e_type_field, st_shndx);
- u8 vis = elf_other_to_vis(st_other);
-
- /* DSO exports land as defined symbols in OBJ_SEC_NONE with
- * value=0. The consumer treats them as imports — see
- * resolve_undefs in src/link/link_layout.c. */
- {
- ObjSymId did = obj_symbol_ex(ob, sn, (SymBind)bind, (SymVis)vis,
- (SymKind)kind, OBJ_SEC_NONE, 0, 0, 0);
- obj_sym_mark_referenced(ob, did);
- }
- }
-
- obj_finalize(ob);
- return ob;
-}
diff --git a/src/obj/elf_reloc_aarch64.c b/src/obj/elf_reloc_aarch64.c
@@ -1,182 +0,0 @@
-/* RelocKind <-> AArch64 ELF reloc-type mapping.
- *
- * Cfree's RelocKind enum is arch-agnostic at its top (R_ABS, R_REL, R_PC
- * variants) and arch-specific in its lower entries. On AArch64, R_REL and
- * R_PC collapse to ELF_R_AARCH64_PREL32 / ELF_R_AARCH64_PREL64 — both
- * mean "PC-relative relative to the symbol" once the linker has resolved
- * final addresses.
- *
- * Returning 0 (ELF_R_AARCH64_NONE) for an unsupported kind is the signal
- * to the caller to either panic (emit) or panic (read with diagnostic). */
-
-#include "obj/elf.h"
-
-u32 elf_aarch64_reloc_to(u32 kind /* RelocKind */) {
- switch (kind) {
- case R_NONE:
- return ELF_R_AARCH64_NONE;
- case R_ABS64:
- return ELF_R_AARCH64_ABS64;
- case R_ABS32:
- return ELF_R_AARCH64_ABS32;
- case R_PC64:
- return ELF_R_AARCH64_PREL64;
- case R_PC32:
- return ELF_R_AARCH64_PREL32;
- case R_REL64:
- return ELF_R_AARCH64_PREL64;
- case R_REL32:
- return ELF_R_AARCH64_PREL32;
- case R_AARCH64_JUMP26:
- return ELF_R_AARCH64_JUMP26;
- case R_AARCH64_CALL26:
- return ELF_R_AARCH64_CALL26;
- case R_AARCH64_CONDBR19:
- return ELF_R_AARCH64_CONDBR19;
- case R_AARCH64_TSTBR14:
- return ELF_R_AARCH64_TSTBR14;
- case R_AARCH64_LD_PREL_LO19:
- return ELF_R_AARCH64_LD_PREL_LO19;
- case R_AARCH64_ADR_PREL_LO21:
- return ELF_R_AARCH64_ADR_PREL_LO21;
- case R_AARCH64_ADR_PREL_PG_HI21:
- return ELF_R_AARCH64_ADR_PREL_PG_HI21;
- case R_AARCH64_ADR_PREL_PG_HI21_NC:
- return ELF_R_AARCH64_ADR_PREL_PG_HI21_NC;
- case R_AARCH64_ADD_ABS_LO12_NC:
- return ELF_R_AARCH64_ADD_ABS_LO12_NC;
- case R_AARCH64_ABS16:
- return ELF_R_AARCH64_ABS16;
- case R_AARCH64_PREL16:
- return ELF_R_AARCH64_PREL16;
- case R_AARCH64_LDST8_ABS_LO12_NC:
- return ELF_R_AARCH64_LDST8_ABS_LO12_NC;
- case R_AARCH64_LDST16_ABS_LO12_NC:
- return ELF_R_AARCH64_LDST16_ABS_LO12_NC;
- case R_AARCH64_LDST32_ABS_LO12_NC:
- return ELF_R_AARCH64_LDST32_ABS_LO12_NC;
- case R_AARCH64_LDST64_ABS_LO12_NC:
- return ELF_R_AARCH64_LDST64_ABS_LO12_NC;
- case R_AARCH64_LDST128_ABS_LO12_NC:
- return ELF_R_AARCH64_LDST128_ABS_LO12_NC;
- case R_AARCH64_ADR_GOT_PAGE:
- return ELF_R_AARCH64_ADR_GOT_PAGE;
- case R_AARCH64_LD64_GOT_LO12_NC:
- return ELF_R_AARCH64_LD64_GOT_LO12_NC;
- case R_AARCH64_TLSLE_ADD_TPREL_HI12:
- return ELF_R_AARCH64_TLSLE_ADD_TPREL_HI12;
- case R_AARCH64_TLSLE_ADD_TPREL_LO12:
- return ELF_R_AARCH64_TLSLE_ADD_TPREL_LO12;
- case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
- return ELF_R_AARCH64_TLSLE_ADD_TPREL_LO12_NC;
- case R_AARCH64_TLSLE_LDST8_TPREL_LO12:
- return ELF_R_AARCH64_TLSLE_LDST8_TPREL_LO12;
- case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC:
- return ELF_R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC;
- case R_AARCH64_TLSLE_LDST16_TPREL_LO12:
- return ELF_R_AARCH64_TLSLE_LDST16_TPREL_LO12;
- case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC:
- return ELF_R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC;
- case R_AARCH64_TLSLE_LDST32_TPREL_LO12:
- return ELF_R_AARCH64_TLSLE_LDST32_TPREL_LO12;
- case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC:
- return ELF_R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC;
- case R_AARCH64_TLSLE_LDST64_TPREL_LO12:
- return ELF_R_AARCH64_TLSLE_LDST64_TPREL_LO12;
- case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC:
- return ELF_R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC;
- case R_AARCH64_GLOB_DAT:
- return ELF_R_AARCH64_GLOB_DAT;
- case R_AARCH64_JUMP_SLOT:
- return ELF_R_AARCH64_JUMP_SLOT;
- case R_AARCH64_RELATIVE:
- return ELF_R_AARCH64_RELATIVE;
- case R_AARCH64_COPY:
- return ELF_R_AARCH64_COPY;
- default:
- return ELF_R_AARCH64_NONE;
- }
-}
-
-u32 elf_aarch64_reloc_from(u32 elf_type) {
- switch (elf_type) {
- case ELF_R_AARCH64_NONE:
- return R_NONE;
- case ELF_R_AARCH64_ABS64:
- return R_ABS64;
- case ELF_R_AARCH64_ABS32:
- return R_ABS32;
- case ELF_R_AARCH64_PREL64:
- return R_PC64;
- case ELF_R_AARCH64_PREL32:
- return R_PC32;
- case ELF_R_AARCH64_JUMP26:
- return R_AARCH64_JUMP26;
- case ELF_R_AARCH64_CALL26:
- return R_AARCH64_CALL26;
- case ELF_R_AARCH64_CONDBR19:
- return R_AARCH64_CONDBR19;
- case ELF_R_AARCH64_TSTBR14:
- return R_AARCH64_TSTBR14;
- case ELF_R_AARCH64_LD_PREL_LO19:
- return R_AARCH64_LD_PREL_LO19;
- case ELF_R_AARCH64_ADR_PREL_LO21:
- return R_AARCH64_ADR_PREL_LO21;
- case ELF_R_AARCH64_ADR_PREL_PG_HI21:
- return R_AARCH64_ADR_PREL_PG_HI21;
- case ELF_R_AARCH64_ADR_PREL_PG_HI21_NC:
- return R_AARCH64_ADR_PREL_PG_HI21_NC;
- case ELF_R_AARCH64_ADD_ABS_LO12_NC:
- return R_AARCH64_ADD_ABS_LO12_NC;
- case ELF_R_AARCH64_ABS16:
- return R_AARCH64_ABS16;
- case ELF_R_AARCH64_PREL16:
- return R_AARCH64_PREL16;
- case ELF_R_AARCH64_LDST8_ABS_LO12_NC:
- return R_AARCH64_LDST8_ABS_LO12_NC;
- case ELF_R_AARCH64_LDST16_ABS_LO12_NC:
- return R_AARCH64_LDST16_ABS_LO12_NC;
- case ELF_R_AARCH64_LDST32_ABS_LO12_NC:
- return R_AARCH64_LDST32_ABS_LO12_NC;
- case ELF_R_AARCH64_LDST64_ABS_LO12_NC:
- return R_AARCH64_LDST64_ABS_LO12_NC;
- case ELF_R_AARCH64_LDST128_ABS_LO12_NC:
- return R_AARCH64_LDST128_ABS_LO12_NC;
- case ELF_R_AARCH64_ADR_GOT_PAGE:
- return R_AARCH64_ADR_GOT_PAGE;
- case ELF_R_AARCH64_LD64_GOT_LO12_NC:
- return R_AARCH64_LD64_GOT_LO12_NC;
- case ELF_R_AARCH64_TLSLE_ADD_TPREL_HI12:
- return R_AARCH64_TLSLE_ADD_TPREL_HI12;
- case ELF_R_AARCH64_TLSLE_ADD_TPREL_LO12:
- return R_AARCH64_TLSLE_ADD_TPREL_LO12;
- case ELF_R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
- return R_AARCH64_TLSLE_ADD_TPREL_LO12_NC;
- case ELF_R_AARCH64_TLSLE_LDST8_TPREL_LO12:
- return R_AARCH64_TLSLE_LDST8_TPREL_LO12;
- case ELF_R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC:
- return R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC;
- case ELF_R_AARCH64_TLSLE_LDST16_TPREL_LO12:
- return R_AARCH64_TLSLE_LDST16_TPREL_LO12;
- case ELF_R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC:
- return R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC;
- case ELF_R_AARCH64_TLSLE_LDST32_TPREL_LO12:
- return R_AARCH64_TLSLE_LDST32_TPREL_LO12;
- case ELF_R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC:
- return R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC;
- case ELF_R_AARCH64_TLSLE_LDST64_TPREL_LO12:
- return R_AARCH64_TLSLE_LDST64_TPREL_LO12;
- case ELF_R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC:
- return R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC;
- case ELF_R_AARCH64_GLOB_DAT:
- return R_AARCH64_GLOB_DAT;
- case ELF_R_AARCH64_JUMP_SLOT:
- return R_AARCH64_JUMP_SLOT;
- case ELF_R_AARCH64_RELATIVE:
- return R_AARCH64_RELATIVE;
- case ELF_R_AARCH64_COPY:
- return R_AARCH64_COPY;
- default:
- return (u32)-1; /* sentinel */
- }
-}
diff --git a/src/obj/elf_reloc_riscv64.c b/src/obj/elf_reloc_riscv64.c
@@ -1,182 +0,0 @@
-/* RelocKind <-> RISC-V ELF reloc-type mapping.
- *
- * Mirror of elf_reloc_x86_64.c for the RISC-V LP64 ABI. The arch-
- * agnostic R_ABS / R_PC RelocKind entries fan out to the native
- * RISC-V codes; the RISC-V-specific encodings (HI20/LO12, BRANCH,
- * JAL, CALL, PCREL_*, TPREL_*, ADD/SUB/SET, RELAX, ALIGN, RVC_*)
- * live in the lower band as R_RV_*.
- *
- * Returning ELF_R_RISCV_NONE for an unsupported kind is the signal
- * to the caller to either panic (emit) or panic (read with diagnostic). */
-
-#include "obj/elf.h"
-
-u32 elf_riscv64_reloc_to(u32 kind /* RelocKind */) {
- switch (kind) {
- case R_NONE:
- return ELF_R_RISCV_NONE;
- case R_ABS64:
- return ELF_R_RISCV_64;
- case R_ABS32:
- return ELF_R_RISCV_32;
- case R_PC32:
- return ELF_R_RISCV_32_PCREL;
- case R_RV_HI20:
- return ELF_R_RISCV_HI20;
- case R_RV_LO12_I:
- return ELF_R_RISCV_LO12_I;
- case R_RV_LO12_S:
- return ELF_R_RISCV_LO12_S;
- case R_RV_BRANCH:
- return ELF_R_RISCV_BRANCH;
- case R_RV_JAL:
- return ELF_R_RISCV_JAL;
- case R_RV_CALL:
- return ELF_R_RISCV_CALL;
- case R_PLT32:
- return ELF_R_RISCV_CALL_PLT;
- case R_RV_PCREL_HI20:
- return ELF_R_RISCV_PCREL_HI20;
- case R_RV_PCREL_LO12_I:
- return ELF_R_RISCV_PCREL_LO12_I;
- case R_RV_PCREL_LO12_S:
- return ELF_R_RISCV_PCREL_LO12_S;
- case R_RV_GOT_HI20:
- return ELF_R_RISCV_GOT_HI20;
- case R_RV_TLS_GOT_HI20:
- return ELF_R_RISCV_TLS_GOT_HI20;
- case R_RV_TPREL_HI20:
- return ELF_R_RISCV_TPREL_HI20;
- case R_RV_TPREL_LO12_I:
- return ELF_R_RISCV_TPREL_LO12_I;
- case R_RV_TPREL_LO12_S:
- return ELF_R_RISCV_TPREL_LO12_S;
- case R_RV_TPREL_ADD:
- return ELF_R_RISCV_TPREL_ADD;
- case R_RV_ADD8:
- return ELF_R_RISCV_ADD8;
- case R_RV_ADD16:
- return ELF_R_RISCV_ADD16;
- case R_RV_ADD32:
- return ELF_R_RISCV_ADD32;
- case R_RV_ADD64:
- return ELF_R_RISCV_ADD64;
- case R_RV_SUB8:
- return ELF_R_RISCV_SUB8;
- case R_RV_SUB16:
- return ELF_R_RISCV_SUB16;
- case R_RV_SUB32:
- return ELF_R_RISCV_SUB32;
- case R_RV_SUB64:
- return ELF_R_RISCV_SUB64;
- case R_RV_ALIGN:
- return ELF_R_RISCV_ALIGN;
- case R_RV_RVC_BRANCH:
- return ELF_R_RISCV_RVC_BRANCH;
- case R_RV_RVC_JUMP:
- return ELF_R_RISCV_RVC_JUMP;
- case R_RV_RELAX:
- return ELF_R_RISCV_RELAX;
- case R_RV_SUB6:
- return ELF_R_RISCV_SUB6;
- case R_RV_SET6:
- return ELF_R_RISCV_SET6;
- case R_RV_SET8:
- return ELF_R_RISCV_SET8;
- case R_RV_SET16:
- return ELF_R_RISCV_SET16;
- case R_RV_SET32:
- return ELF_R_RISCV_SET32;
- case R_RV_SET_ULEB128:
- return ELF_R_RISCV_SET_ULEB128;
- case R_RV_SUB_ULEB128:
- return ELF_R_RISCV_SUB_ULEB128;
- default:
- return ELF_R_RISCV_NONE;
- }
-}
-
-u32 elf_riscv64_reloc_from(u32 elf_type) {
- switch (elf_type) {
- case ELF_R_RISCV_NONE:
- return R_NONE;
- case ELF_R_RISCV_64:
- return R_ABS64;
- case ELF_R_RISCV_32:
- return R_ABS32;
- case ELF_R_RISCV_32_PCREL:
- return R_PC32;
- case ELF_R_RISCV_HI20:
- return R_RV_HI20;
- case ELF_R_RISCV_LO12_I:
- return R_RV_LO12_I;
- case ELF_R_RISCV_LO12_S:
- return R_RV_LO12_S;
- case ELF_R_RISCV_BRANCH:
- return R_RV_BRANCH;
- case ELF_R_RISCV_JAL:
- return R_RV_JAL;
- case ELF_R_RISCV_CALL:
- return R_RV_CALL;
- case ELF_R_RISCV_CALL_PLT:
- return R_PLT32;
- case ELF_R_RISCV_PCREL_HI20:
- return R_RV_PCREL_HI20;
- case ELF_R_RISCV_PCREL_LO12_I:
- return R_RV_PCREL_LO12_I;
- case ELF_R_RISCV_PCREL_LO12_S:
- return R_RV_PCREL_LO12_S;
- case ELF_R_RISCV_GOT_HI20:
- return R_RV_GOT_HI20;
- case ELF_R_RISCV_TLS_GOT_HI20:
- return R_RV_TLS_GOT_HI20;
- case ELF_R_RISCV_TPREL_HI20:
- return R_RV_TPREL_HI20;
- case ELF_R_RISCV_TPREL_LO12_I:
- return R_RV_TPREL_LO12_I;
- case ELF_R_RISCV_TPREL_LO12_S:
- return R_RV_TPREL_LO12_S;
- case ELF_R_RISCV_TPREL_ADD:
- return R_RV_TPREL_ADD;
- case ELF_R_RISCV_ADD8:
- return R_RV_ADD8;
- case ELF_R_RISCV_ADD16:
- return R_RV_ADD16;
- case ELF_R_RISCV_ADD32:
- return R_RV_ADD32;
- case ELF_R_RISCV_ADD64:
- return R_RV_ADD64;
- case ELF_R_RISCV_SUB8:
- return R_RV_SUB8;
- case ELF_R_RISCV_SUB16:
- return R_RV_SUB16;
- case ELF_R_RISCV_SUB32:
- return R_RV_SUB32;
- case ELF_R_RISCV_SUB64:
- return R_RV_SUB64;
- case ELF_R_RISCV_ALIGN:
- return R_RV_ALIGN;
- case ELF_R_RISCV_RVC_BRANCH:
- return R_RV_RVC_BRANCH;
- case ELF_R_RISCV_RVC_JUMP:
- return R_RV_RVC_JUMP;
- case ELF_R_RISCV_RELAX:
- return R_RV_RELAX;
- case ELF_R_RISCV_SUB6:
- return R_RV_SUB6;
- case ELF_R_RISCV_SET6:
- return R_RV_SET6;
- case ELF_R_RISCV_SET8:
- return R_RV_SET8;
- case ELF_R_RISCV_SET16:
- return R_RV_SET16;
- case ELF_R_RISCV_SET32:
- return R_RV_SET32;
- case ELF_R_RISCV_SET_ULEB128:
- return R_RV_SET_ULEB128;
- case ELF_R_RISCV_SUB_ULEB128:
- return R_RV_SUB_ULEB128;
- default:
- return (u32)-1; /* sentinel */
- }
-}
diff --git a/src/obj/elf_reloc_x86_64.c b/src/obj/elf_reloc_x86_64.c
@@ -1,134 +0,0 @@
-/* RelocKind <-> x86_64 ELF reloc-type mapping.
- *
- * Mirror of elf_reloc_aarch64.c for the x86_64 SysV ABI. The arch-
- * agnostic R_ABS / R_PC / R_REL RelocKind entries fan out to the
- * native x86_64 codes; the x86_64-only encodings (R_X64_PC8, PLT32,
- * GOTPCREL, dynamic-only entries) live in the lower band.
- *
- * Returning ELF_R_X86_64_NONE for an unsupported kind is the signal
- * to the caller to either panic (emit) or panic (read with diagnostic). */
-
-#include "obj/elf.h"
-
-u32 elf_x86_64_reloc_to(u32 kind /* RelocKind */) {
- switch (kind) {
- case R_NONE:
- return ELF_R_X86_64_NONE;
- case R_ABS64:
- return ELF_R_X86_64_64;
- case R_ABS32:
- return ELF_R_X86_64_32;
- case R_X64_32S:
- return ELF_R_X86_64_32S;
- case R_PC32:
- return ELF_R_X86_64_PC32;
- case R_PC64:
- return ELF_R_X86_64_PC64;
- case R_REL32:
- return ELF_R_X86_64_PC32;
- case R_REL64:
- return ELF_R_X86_64_PC64;
- case R_X64_PC8:
- return ELF_R_X86_64_PC8;
- case R_PLT32:
- case R_X64_PLT32:
- return ELF_R_X86_64_PLT32;
- case R_GOT32:
- return ELF_R_X86_64_GOT32;
- case R_X64_GOTPCREL:
- return ELF_R_X86_64_GOTPCREL;
- case R_X64_GOTPCRELX:
- return ELF_R_X86_64_GOTPCRELX;
- case R_X64_REX_GOTPCRELX:
- return ELF_R_X86_64_REX_GOTPCRELX;
- case R_X64_GOTPC32:
- return ELF_R_X86_64_GOTPC32;
- case R_X64_GOTOFF64:
- return ELF_R_X86_64_GOTOFF64;
- case R_X64_TPOFF32:
- return ELF_R_X86_64_TPOFF32;
- case R_X64_TPOFF64:
- return ELF_R_X86_64_TPOFF64;
- case R_X64_DTPOFF32:
- return ELF_R_X86_64_DTPOFF32;
- case R_X64_DTPMOD64:
- return ELF_R_X86_64_DTPMOD64;
- case R_X64_DTPOFF64:
- return ELF_R_X86_64_DTPOFF64;
- case R_X64_TLSGD:
- return ELF_R_X86_64_TLSGD;
- case R_X64_TLSLD:
- return ELF_R_X86_64_TLSLD;
- case R_X64_GOTTPOFF:
- return ELF_R_X86_64_GOTTPOFF;
- case R_X64_GLOB_DAT:
- return ELF_R_X86_64_GLOB_DAT;
- case R_X64_JUMP_SLOT:
- return ELF_R_X86_64_JUMP_SLOT;
- case R_X64_RELATIVE:
- return ELF_R_X86_64_RELATIVE;
- case R_X64_COPY:
- return ELF_R_X86_64_COPY;
- default:
- return ELF_R_X86_64_NONE;
- }
-}
-
-u32 elf_x86_64_reloc_from(u32 elf_type) {
- switch (elf_type) {
- case ELF_R_X86_64_NONE:
- return R_NONE;
- case ELF_R_X86_64_64:
- return R_ABS64;
- case ELF_R_X86_64_32:
- return R_ABS32;
- case ELF_R_X86_64_32S:
- return R_X64_32S;
- case ELF_R_X86_64_PC32:
- return R_PC32;
- case ELF_R_X86_64_PC64:
- return R_PC64;
- case ELF_R_X86_64_PC8:
- return R_X64_PC8;
- case ELF_R_X86_64_PLT32:
- return R_X64_PLT32;
- case ELF_R_X86_64_GOT32:
- return R_GOT32;
- case ELF_R_X86_64_GOTPCREL:
- return R_X64_GOTPCREL;
- case ELF_R_X86_64_GOTPCRELX:
- return R_X64_GOTPCRELX;
- case ELF_R_X86_64_REX_GOTPCRELX:
- return R_X64_REX_GOTPCRELX;
- case ELF_R_X86_64_GOTPC32:
- return R_X64_GOTPC32;
- case ELF_R_X86_64_GOTOFF64:
- return R_X64_GOTOFF64;
- case ELF_R_X86_64_TPOFF32:
- return R_X64_TPOFF32;
- case ELF_R_X86_64_TPOFF64:
- return R_X64_TPOFF64;
- case ELF_R_X86_64_DTPOFF32:
- return R_X64_DTPOFF32;
- case ELF_R_X86_64_DTPMOD64:
- return R_X64_DTPMOD64;
- case ELF_R_X86_64_DTPOFF64:
- return R_X64_DTPOFF64;
- case ELF_R_X86_64_TLSGD:
- return R_X64_TLSGD;
- case ELF_R_X86_64_TLSLD:
- return R_X64_TLSLD;
- case ELF_R_X86_64_GOTTPOFF:
- return R_X64_GOTTPOFF;
- case ELF_R_X86_64_GLOB_DAT:
- return R_X64_GLOB_DAT;
- case ELF_R_X86_64_JUMP_SLOT:
- return R_X64_JUMP_SLOT;
- case ELF_R_X86_64_RELATIVE:
- return R_X64_RELATIVE;
- case ELF_R_X86_64_COPY:
- return R_X64_COPY;
- default:
- return (u32)-1; /* sentinel */
- }
-}
diff --git a/src/obj/format.h b/src/obj/format.h
@@ -1,13 +1,13 @@
#ifndef CFREE_OBJ_FORMAT_H
#define CFREE_OBJ_FORMAT_H
-#include <stddef.h>
-
#include <cfree/object.h>
+#include <stddef.h>
#include "core/core.h"
typedef struct LinkImage LinkImage;
+typedef struct Linker Linker;
typedef ObjBuilder* (*ObjFormatReadFn)(Compiler*, const char* name,
const u8* data, size_t len);
@@ -16,6 +16,45 @@ typedef ObjBuilder* (*ObjFormatReadDsoFn)(Compiler*, const char* name,
Sym* soname_out);
typedef void (*ObjFormatEmitFn)(Compiler*, ObjBuilder*, Writer*);
typedef void (*ObjFormatLinkEmitFn)(LinkImage*, Writer*);
+typedef void (*ObjFormatLayoutDynFn)(Linker*, LinkImage*);
+typedef void (*ObjFormatFreeDynFn)(LinkImage*);
+typedef void (*ObjFormatMachoStubFn)(u8* dst, u64 stub_vaddr,
+ u64 got_slot_vaddr);
+typedef void (*ObjFormatCoffStubFn)(u8* dst, u64 stub_vaddr,
+ u64 iat_slot_vaddr);
+
+typedef struct ObjElfArchOps {
+ CfreeArchKind arch;
+ u32 e_machine;
+ u32 e_flags;
+ const char* default_musl_interp;
+ u32 r_relative;
+ u32 r_glob_dat;
+ u32 r_jump_slot;
+ u32 (*reloc_to)(u32 kind);
+ u32 (*reloc_from)(u32 wire_type);
+} ObjElfArchOps;
+
+typedef struct ObjMachoArchOps {
+ CfreeArchKind arch;
+ u32 cputype;
+ u32 cpusubtype;
+ u32 stub_size;
+ ObjFormatMachoStubFn emit_stub;
+ u32 (*reloc_to)(u32 kind);
+ u32 (*reloc_pcrel)(u32 kind);
+ u32 (*reloc_length)(u32 kind);
+ u32 (*reloc_from)(u32 wire_type);
+} ObjMachoArchOps;
+
+typedef struct ObjCoffArchOps {
+ CfreeArchKind arch;
+ u16 machine;
+ u32 stub_size;
+ ObjFormatCoffStubFn emit_iat_stub;
+ u32 (*reloc_to)(u32 kind);
+ u32 (*reloc_from)(u32 wire_type);
+} ObjCoffArchOps;
typedef enum ObjFormatArchiveAction {
OBJ_FORMAT_ARCHIVE_KEEP = 0,
@@ -49,6 +88,15 @@ typedef struct ObjFormatImpl {
ObjFormatReadFn read;
ObjFormatReadDsoFn read_dso;
ObjFormatLinkEmitFn link_emit;
+ ObjFormatLayoutDynFn layout_dyn;
+ ObjFormatFreeDynFn free_dyn;
+
+ const ObjElfArchOps* (*elf_arch)(CfreeArchKind);
+ const ObjElfArchOps* (*elf_machine)(u32 e_machine);
+ const ObjMachoArchOps* (*macho_arch)(CfreeArchKind);
+ const ObjMachoArchOps* (*macho_cputype)(u32 cputype);
+ const ObjCoffArchOps* (*coff_arch)(CfreeArchKind);
+ const ObjCoffArchOps* (*coff_machine)(u16 machine);
/* Optional format-specific linker ingestion policy. */
int (*classify_obj_input)(Compiler*, ObjBuilder*, Sym* soname_out);
diff --git a/src/obj/macho/emit.c b/src/obj/macho/emit.c
@@ -0,0 +1,797 @@
+/* Mach-O MH_OBJECT writer. Walks a finalized ObjBuilder and emits a
+ * 64-bit little-endian relocatable object via the supplied Writer.
+ *
+ * Layout strategy (MH_OBJECT — everything in one anonymous segment):
+ * 1. plan Mach-O sections (one per non-symtab/strtab/rela ObjSection),
+ * mapping cfree section names to (segname, sectname) pairs;
+ * 2. partition ObjSyms into local / extdef / undef and assign final
+ * indices for LC_DYSYMTAB;
+ * 3. build per-section relocation tables via the per-arch translator
+ * (only aarch64 is wired today);
+ * 4. assign file offsets sequentially: header, load commands, section
+ * bytes, relocation tables, symbol table, string table;
+ * 5. write header → load commands → section bytes → relocs → symtab
+ * → strtab.
+ *
+ * 64-bit little-endian only. Big-endian / 32-bit panics at entry.
+ *
+ * Round-trip invariant: read_macho of
+ * this output must produce an ObjBuilder shape-equivalent to the input,
+ * modulo (a) Mach-O's mandatory (segname, sectname) pairing and (b)
+ * any synthesized N_SECT symbols. The (segname,sectname) form chosen
+ * here is the canonical post-roundtrip shape — read_macho stores the
+ * comma-joined "__SEG,__sect" form in Section.name so a re-emit
+ * produces the same bytes. */
+
+#include <string.h>
+
+#include "core/arena.h"
+#include "core/buf.h"
+#include "core/bytes.h"
+#include "core/heap.h"
+#include "core/pool.h"
+#include "core/slice.h"
+#include "core/util.h"
+#include "obj/format.h"
+#include "obj/macho/macho.h"
+
+static SrcLoc no_loc(void) {
+ SrcLoc l = {0, 0, 0};
+ return l;
+}
+
+/* ---- LE writer helpers (Writer-based) ---- */
+
+static void wr_u32(Writer* w, u32 v) {
+ u8 b[4];
+ wr_u32_le(b, v);
+ cfree_writer_write(w, b, 4);
+}
+
+static void wr_u64(Writer* w, u64 v) {
+ u8 b[8];
+ wr_u64_le(b, v);
+ cfree_writer_write(w, b, 8);
+}
+
+static void wr_name16(Writer* w, const char* s, u32 len) {
+ /* Mach-O section/segment names are 16-byte zero-padded fields. Names
+ * longer than 16 are truncated; the on-disk format leaves no room for
+ * a longer encoding. */
+ u8 buf[16];
+ u32 n = len > 16 ? 16 : len;
+ memcpy(buf, s, n);
+ if (n < 16) memset(buf + n, 0, 16 - n);
+ cfree_writer_write(w, buf, 16);
+}
+
+/* ---- (segname,sectname) derivation ---- */
+
+/* Split a cfree section name into Mach-O (segname, sectname) pair.
+ * If `name` contains a comma, it is treated as already in
+ * "__SEG,__sect" form and split at the first comma. Otherwise we
+ * derive the pair from SecKind, ignoring `name` (the input was an
+ * ELF-shaped name like ".text" or ".rodata"). */
+typedef struct MSegSect {
+ char segname[16];
+ char sectname[16];
+ u32 seg_len;
+ u32 sect_len;
+} MSegSect;
+
+static void copy_fixed16(char* dst, u32* len_out, const char* src,
+ u32 src_len) {
+ u32 n = src_len > 16 ? 16 : src_len;
+ memcpy(dst, src, n);
+ if (n < 16) memset(dst + n, 0, 16 - n);
+ *len_out = n;
+}
+
+static void name_to_seg_sect(const char* name, u32 nlen, u16 sec_kind,
+ MSegSect* out) {
+ /* Comma-separated form: take prefix as segname, suffix as sectname. */
+ for (u32 i = 0; i < nlen; ++i) {
+ if (name[i] == ',') {
+ copy_fixed16(out->segname, &out->seg_len, name, i);
+ copy_fixed16(out->sectname, &out->sect_len, name + i + 1, nlen - i - 1);
+ return;
+ }
+ }
+
+ /* Not comma-separated. Derive from SecKind; ignore `name`. */
+ const char* seg;
+ const char* sect;
+ switch (sec_kind) {
+ case SEC_TEXT:
+ seg = "__TEXT";
+ sect = "__text";
+ break;
+ case SEC_RODATA:
+ seg = "__TEXT";
+ sect = "__const";
+ break;
+ case SEC_DATA:
+ seg = "__DATA";
+ sect = "__data";
+ break;
+ case SEC_BSS:
+ seg = "__DATA";
+ sect = "__bss";
+ break;
+ case SEC_DEBUG:
+ seg = "__DWARF";
+ /* Strip a leading `.` from the input name (".debug_info" →
+ * "__debug_info") so the dwarf section names round-trip. */
+ sect = (nlen && name[0] == '.') ? name + 1 : name;
+ copy_fixed16(out->segname, &out->seg_len, seg,
+ (u32)slice_from_cstr(seg).len);
+ copy_fixed16(out->sectname, &out->sect_len, sect,
+ (u32)((nlen && name[0] == '.') ? nlen - 1 : nlen));
+ return;
+ default:
+ seg = "__DATA";
+ sect = "__data";
+ break;
+ }
+ copy_fixed16(out->segname, &out->seg_len, seg, (u32)slice_from_cstr(seg).len);
+ copy_fixed16(out->sectname, &out->sect_len, sect,
+ (u32)slice_from_cstr(sect).len);
+}
+
+/* ---- per-section plan ---- */
+
+typedef struct MSec {
+ MSegSect ns;
+ u64 addr; /* assigned vmaddr within the segment */
+ u64 size; /* bytes (or bss size) */
+ u32 fileoff; /* 0 for zerofill */
+ u32 align; /* power-of-two; stored as log2 in section_64.align */
+ u32 reloff; /* 0 if no relocs */
+ u32 nreloc;
+ u32 flags; /* S_TYPE | S_ATTR_* */
+ u32 entsize;
+ u32 obj_sec; /* originating ObjSecId */
+ int is_zerofill;
+ const Buf* obj_bytes; /* NULL when zerofill */
+ u8* relocs; /* arena-allocated; nreloc * 8 bytes */
+} MSec;
+
+static u32 log2_align(u32 a) {
+ u32 r = 0;
+ while ((1u << r) < a) ++r;
+ return r;
+}
+
+static u32 section_flags_for(u16 sec_kind, u16 sec_flags, const char* sectname,
+ u32 sect_len) {
+ u32 f = 0;
+ if (sec_kind == SEC_TEXT || (sec_flags & SF_EXEC)) {
+ f |= S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS;
+ }
+ if (sec_flags & SF_TLS) {
+ /* Mach-O distinguishes three TLV section types by sectname:
+ * __thread_data → S_THREAD_LOCAL_REGULAR (initial data)
+ * __thread_bss → S_THREAD_LOCAL_ZEROFILL (zero-init data)
+ * __thread_vars → S_THREAD_LOCAL_VARIABLES (descriptor records)
+ * dyld dispatches its TLV-bootstrap pass off the S_TYPE; the
+ * S_ATTR_* bits don't carry TLV semantics so we just emit the type. */
+ if (sect_len >= 13 && memcmp(sectname, "__thread_vars", 13) == 0)
+ return S_THREAD_LOCAL_VARIABLES;
+ if (sec_kind == SEC_BSS) return S_THREAD_LOCAL_ZEROFILL;
+ return S_THREAD_LOCAL_REGULAR;
+ }
+ if (sec_kind == SEC_BSS ||
+ (sect_len >= 5 && memcmp(sectname, "__bss", 5) == 0)) {
+ f |= S_ZEROFILL;
+ }
+ if (sec_flags & SF_STRINGS) {
+ f = (f & ~SECTION_TYPE) | S_CSTRING_LITERALS;
+ }
+ /* Default S_REGULAR (0) for all others. */
+ return f;
+}
+
+/* ---- symbol partition ---- */
+
+typedef struct MSym {
+ ObjSymId obj_id;
+ u32 strx; /* offset in string table */
+ u8 n_type;
+ u8 n_sect;
+ u16 n_desc;
+ u64 n_value;
+} MSym;
+
+static int sym_is_undef(const ObjSym* s) {
+ return s->section_id == OBJ_SEC_NONE && s->kind != SK_ABS &&
+ s->kind != SK_COMMON;
+}
+
+static int sym_is_extdef(const ObjSym* s) {
+ if (sym_is_undef(s)) return 0;
+ return s->bind == SB_GLOBAL || s->bind == SB_WEAK;
+}
+
+/* ---- string table ----
+ *
+ * Mach-O strtab: leading zero byte at offset 0 represents the empty
+ * string. Entries are NUL-terminated; we don't dedupe (small symbol
+ * counts in v1; matches the simplest llvm output). The "_" prefix on
+ * C symbols is added inline in the writer below. */
+
+void emit_macho(Compiler* c, ObjBuilder* ob, Writer* w) {
+ Heap* h = (Heap*)c->ctx->heap;
+
+ /* Tombstone sweep first — strip/objcopy mutations and the historical
+ * UNDEF prune are both expressed via Section.removed / ObjSym.removed
+ * post-sweep. See obj_sweep_dead. */
+ obj_sweep_dead(ob);
+
+ /* ---- target validation ---------------------------------------- */
+ const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_MACHO);
+ const ObjMachoArchOps* macho =
+ fmt && fmt->macho_arch ? fmt->macho_arch(c->target.arch) : NULL;
+ u32 cputype, cpusubtype;
+ u32 (*reloc_to)(u32);
+ u32 (*reloc_pcrel)(u32);
+ u32 (*reloc_length)(u32);
+ if (!macho || !macho->reloc_to || !macho->reloc_pcrel ||
+ !macho->reloc_length) {
+ compiler_panic(c, no_loc(), "emit_macho: unsupported target arch %u",
+ (u32)c->target.arch);
+ }
+ cputype = macho->cputype;
+ cpusubtype = macho->cpusubtype;
+ reloc_to = macho->reloc_to;
+ reloc_pcrel = macho->reloc_pcrel;
+ reloc_length = macho->reloc_length;
+ if (c->target.big_endian) {
+ compiler_panic(c, no_loc(), "emit_macho: big-endian not supported");
+ }
+ if (c->target.ptr_size != 8) {
+ compiler_panic(c, no_loc(), "emit_macho: ptr_size %u (expected 8)",
+ (u32)c->target.ptr_size);
+ }
+
+ /* ---- pass 1: plan Mach-O sections ----------------------------- */
+ u32 nobjsec = obj_section_count(ob);
+ MSec* secs = arena_zarray(c->scratch, MSec, nobjsec ? nobjsec : 1);
+ u32* obj_to_msec = arena_zarray(c->scratch, u32, nobjsec ? nobjsec : 1);
+ u32 nsecs = 0;
+
+ for (u32 i = 1; i < nobjsec; ++i) {
+ const Section* s = obj_section_get(ob, i);
+ if (s->removed) continue; /* see obj_sweep_dead */
+ /* Skip ELF-style synthetic sections that read_elf would have
+ * filtered: SYMTAB / STRTAB / RELA / GROUP have no Mach-O
+ * representation as data sections. */
+ if (s->sem == SSEM_SYMTAB || s->sem == SSEM_STRTAB || s->sem == SSEM_RELA ||
+ s->sem == SSEM_REL || s->sem == SSEM_GROUP) {
+ continue;
+ }
+ Slice nm_s = pool_slice(c->global, s->name);
+ const char* nm = nm_s.s;
+ size_t nlen = nm_s.len;
+ MSec* m = &secs[nsecs];
+ name_to_seg_sect(nm ? nm : "", (u32)nlen, s->kind, &m->ns);
+ m->obj_sec = i;
+ m->align = s->align ? s->align : 1;
+ m->entsize = s->entsize;
+ /* Mach-O reader stashes the raw section.flags (S_TYPE | S_ATTR_*)
+ * in Section.ext_type when reading a Mach-O input. Use it
+ * verbatim so attribute bits like S_ATTR_NO_DEAD_STRIP /
+ * S_ATTR_LIVE_SUPPORT round-trip. Fall back to the kind-derived
+ * default for sections originating from non-Mach-O readers (e.g.
+ * cfree codegen). */
+ if (s->ext_kind == OBJ_EXT_MACHO && s->ext_type) {
+ m->flags = s->ext_type;
+ } else {
+ m->flags =
+ section_flags_for(s->kind, s->flags, m->ns.sectname, m->ns.sect_len);
+ }
+ if (s->sem == SSEM_NOBITS || s->kind == SEC_BSS) {
+ m->is_zerofill = 1;
+ m->size = s->bss_size;
+ m->obj_bytes = NULL;
+ /* Preserve S_THREAD_LOCAL_ZEROFILL when SF_TLS routed us there;
+ * a regular BSS section gets the plain S_ZEROFILL type. */
+ u32 stype = m->flags & SECTION_TYPE;
+ if (stype != S_THREAD_LOCAL_ZEROFILL)
+ m->flags = (m->flags & ~SECTION_TYPE) | S_ZEROFILL;
+ } else {
+ m->is_zerofill = 0;
+ m->size = s->bytes.total;
+ m->obj_bytes = &s->bytes;
+ }
+ obj_to_msec[i] = nsecs + 1; /* 1-based: matches Mach-O n_sect. */
+ nsecs++;
+ }
+
+ /* ---- pass 2: assign vmaddrs (segment-relative) and per-section
+ * flat-layout addresses. MH_OBJECT keeps everything in
+ * one segment with vmaddr=0; section addr fields are
+ * relative offsets within the segment.
+ *
+ * Two-pass to match the conventional Mach-O `MH_OBJECT` layout:
+ * non-zerofill sections come first in vmaddr order, then zerofill
+ * sections at the tail. Apple `as` and clang `-c` both lay out
+ * this way, and roundtripping must reproduce it so symbol n_values
+ * (which are segment-relative addresses) compare equal. */
+ u64 cur_addr = 0;
+ for (u32 i = 0; i < nsecs; ++i) {
+ MSec* m = &secs[i];
+ if (m->is_zerofill) continue;
+ cur_addr = ALIGN_UP(cur_addr, (u64)m->align);
+ m->addr = cur_addr;
+ cur_addr += m->size;
+ }
+ for (u32 i = 0; i < nsecs; ++i) {
+ MSec* m = &secs[i];
+ if (!m->is_zerofill) continue;
+ cur_addr = ALIGN_UP(cur_addr, (u64)m->align);
+ m->addr = cur_addr;
+ cur_addr += m->size;
+ }
+ u64 segment_vmsize = cur_addr;
+
+ /* ---- pass 3: partition symbols (locals, extdefs, undefs) ------ */
+ u32 nobjsym = 0;
+ {
+ ObjSymIter* it = obj_symiter_new(ob);
+ ObjSymEntry e;
+ while (obj_symiter_next(it, &e)) ++nobjsym;
+ obj_symiter_free(it);
+ }
+
+ MSym* msyms = arena_zarray(c->scratch, MSym, nobjsym + 1);
+ u32 nmsyms = 0;
+ u32* sym_obj_to_macho =
+ arena_zarray(c->scratch, u32, nobjsym + 2); /* obj_id -> mach idx */
+
+ Buf strtab;
+ buf_init(&strtab, h);
+ /* Mach-O strtab convention: the first byte is " " (space) or NUL —
+ * llvm/Apple emit a single NUL. We start with NUL for offset 0. */
+ {
+ u8 z = 0;
+ buf_write(&strtab, &z, 1);
+ }
+
+ /* Emit in three passes so n_type/sect ordering matches LC_DYSYMTAB
+ * (locals, then extdefs, then undefs). */
+ for (int pass = 0; pass < 3; ++pass) {
+ ObjSymIter* it = obj_symiter_new(ob);
+ ObjSymEntry e;
+ while (obj_symiter_next(it, &e)) {
+ const ObjSym* s = e.sym;
+ if (s->removed) continue; /* spurious-UNDEF prune + explicit removal */
+ int undef = sym_is_undef(s);
+ int extdef = sym_is_extdef(s);
+ int local = !undef && !extdef;
+ int want =
+ (pass == 0 && local) || (pass == 1 && extdef) || (pass == 2 && undef);
+ if (!want) continue;
+ MSym* ms = &msyms[nmsyms];
+ ms->obj_id = e.id;
+
+ Slice nm_s = pool_slice(c->global, s->name);
+ const char* nm = nm_s.s;
+ size_t nlen = nm_s.len;
+ /* Mach-O symbol names are stored on disk verbatim — including
+ * the leading `_` Apple toolchains use for C-source-level
+ * symbols ("_main" for `int main()`). cfree treats the prefix
+ * as part of the on-disk name, not a transform applied at emit.
+ * Name-canonicalization for API callers (cfree_jit_lookup,
+ * link_set_entry) lives one layer up at the linker boundary
+ * (link.c), so emit/read stay byte-for-byte stable. */
+ if (nlen && nm) {
+ u32 off = buf_pos(&strtab);
+ buf_write(&strtab, nm, nlen);
+ u8 z = 0;
+ buf_write(&strtab, &z, 1);
+ ms->strx = off;
+ } else {
+ ms->strx = 0;
+ }
+
+ u8 type = 0;
+ if (extdef) type |= N_EXT;
+ if (s->vis == SV_HIDDEN || s->vis == SV_INTERNAL) {
+ /* Mach-O encodes hidden externals as N_PEXT|N_EXT. */
+ type |= N_PEXT;
+ }
+ u8 n_sect = NO_SECT;
+ u16 n_desc = 0;
+ u64 value = s->value;
+
+ if (undef) {
+ type |= N_UNDF;
+ /* Undefined symbols with non-LOCAL bind are external references
+ * (the common case — every `extern int x;`). Setting N_EXT
+ * matches what clang emits and what Apple `ld` expects. */
+ if (s->bind == SB_GLOBAL || s->bind == SB_WEAK) type |= N_EXT;
+ if (s->bind == SB_WEAK) n_desc |= N_WEAK_REF;
+ value = 0;
+ } else if (s->kind == SK_ABS) {
+ type |= N_ABS;
+ } else if (s->kind == SK_COMMON) {
+ /* Mach-O common symbols are N_UNDF|N_EXT with n_value=size and
+ * n_desc carrying log2(align) in the GET_COMM_ALIGN bits. */
+ type = N_UNDF | N_EXT;
+ value = s->size;
+ u32 a = s->common_align ? (u32)s->common_align : 1;
+ n_desc = (u16)(log2_align(a) << 8); /* GET_COMM_ALIGN field */
+ } else {
+ type |= N_SECT;
+ u32 ms_idx = (s->section_id < nobjsec) ? obj_to_msec[s->section_id] : 0;
+ n_sect = (u8)ms_idx;
+ if (n_sect && n_sect <= nsecs) {
+ value = secs[n_sect - 1].addr + s->value;
+ }
+ if (s->bind == SB_WEAK) n_desc |= N_WEAK_DEF;
+ }
+
+ /* OR in any pass-through n_desc bits the reader stashed in
+ * sym->flags (N_NO_DEAD_STRIP, etc.). The bits we already
+ * compute (N_WEAK_DEF / N_WEAK_REF and the common-alignment
+ * field) are already excluded by read_macho before stashing,
+ * so a plain OR can't double-count. */
+ n_desc |= s->flags;
+
+ ms->n_type = type;
+ ms->n_sect = n_sect;
+ ms->n_desc = n_desc;
+ ms->n_value = value;
+
+ sym_obj_to_macho[e.id] = nmsyms + 1; /* 1-based index, 0 = none. */
+ nmsyms++;
+ }
+ obj_symiter_free(it);
+ }
+
+ u32 nlocals = 0, nextdefs = 0, nundefs = 0;
+ for (u32 i = 0; i < nmsyms; ++i) {
+ u8 t = msyms[i].n_type;
+ u8 ext = (t & N_EXT) != 0;
+ u8 typ = (u8)(t & N_TYPE);
+ if (typ == N_UNDF && ext) {
+ /* Could be undef or common — common has nonzero n_value. */
+ if (msyms[i].n_value != 0)
+ ++nextdefs; /* common is conventionally extdef-shaped */
+ else
+ ++nundefs;
+ } else if (ext) {
+ ++nextdefs;
+ } else {
+ ++nlocals;
+ }
+ }
+ /* Re-derive without the common fudge by counting partition pass: we
+ * already wrote them in (locals,extdefs,undefs) order, so the prefix
+ * counts are just the per-pass counts. Mirror the spurious-UNDEF
+ * prune from the emit loop above so the LC_DYSYMTAB index counts
+ * line up with the symbols we actually wrote. */
+ nlocals = 0;
+ nextdefs = 0;
+ nundefs = 0;
+ {
+ ObjSymIter* it = obj_symiter_new(ob);
+ ObjSymEntry e;
+ while (obj_symiter_next(it, &e)) {
+ const ObjSym* s = e.sym;
+ if (s->removed) continue;
+ int undef = sym_is_undef(s);
+ if (undef)
+ ++nundefs;
+ else if (sym_is_extdef(s))
+ ++nextdefs;
+ else
+ ++nlocals;
+ }
+ obj_symiter_free(it);
+ }
+
+ /* ---- pass 4: build per-section relocation tables -------------- */
+ u32 total_relocs = obj_reloc_total(ob);
+ for (u32 i = 0; i < nsecs; ++i) {
+ MSec* m = &secs[i];
+ u32 nr = obj_reloc_count(ob, m->obj_sec);
+ if (!nr) continue;
+ /* Worst case: each reloc may be preceded by an ARM64_RELOC_ADDEND
+ * pair entry. We size the buffer for that upper bound. */
+ u8* buf = (u8*)arena_alloc(c->scratch, (size_t)MACHO_RELOC_SIZE * nr * 2,
+ _Alignof(u32));
+ u32 j = 0;
+ for (u32 ri = 0; ri < total_relocs; ++ri) {
+ const Reloc* r = obj_reloc_at(ob, ri);
+ if (r->removed) continue;
+ if (r->section_id != m->obj_sec) continue;
+ if ((r->kind == R_RV_ADD8 || r->kind == R_RV_ADD16 ||
+ r->kind == R_RV_ADD32 || r->kind == R_RV_ADD64) &&
+ ri + 1u < total_relocs) {
+ const Reloc* sub = obj_reloc_at(ob, ri + 1u);
+ int paired = sub && sub->section_id == r->section_id &&
+ sub->offset == r->offset &&
+ ((r->kind == R_RV_ADD8 && sub->kind == R_RV_SUB8) ||
+ (r->kind == R_RV_ADD16 && sub->kind == R_RV_SUB16) ||
+ (r->kind == R_RV_ADD32 && sub->kind == R_RV_SUB32) ||
+ (r->kind == R_RV_ADD64 && sub->kind == R_RV_SUB64));
+ if (paired) {
+ u32 length = (r->kind == R_RV_ADD64) ? 3u
+ : (r->kind == R_RV_ADD32) ? 2u
+ : (r->kind == R_RV_ADD16) ? 1u
+ : 0u;
+ u32 add_idx;
+ u32 sub_idx;
+ u32 sub_type = c->target.arch == CFREE_ARCH_ARM_64
+ ? ARM64_RELOC_SUBTRACTOR
+ : X86_64_RELOC_SUBTRACTOR;
+ u32 unsigned_type = c->target.arch == CFREE_ARCH_ARM_64
+ ? ARM64_RELOC_UNSIGNED
+ : X86_64_RELOC_UNSIGNED;
+ if (r->sym == OBJ_SYM_NONE || sub->sym == OBJ_SYM_NONE) {
+ compiler_panic(c, no_loc(),
+ "emit_macho: symdiff reloc without symbol");
+ }
+ add_idx = sym_obj_to_macho[r->sym];
+ sub_idx = sym_obj_to_macho[sub->sym];
+ if (add_idx == 0 || sub_idx == 0) {
+ compiler_panic(c, no_loc(),
+ "emit_macho: symdiff reloc target not in symtab");
+ }
+ {
+ u8* slot = buf + (size_t)j * MACHO_RELOC_SIZE;
+ wr_u32_le(slot + 0, (u32)r->offset);
+ wr_u32_le(slot + 4, ((sub_idx - 1u) & 0x00ffffffu) |
+ (length << 25) | (1u << 27) |
+ ((sub_type & 0xfu) << 28));
+ ++j;
+ }
+ {
+ u8* slot = buf + (size_t)j * MACHO_RELOC_SIZE;
+ wr_u32_le(slot + 0, (u32)r->offset);
+ wr_u32_le(slot + 4, ((add_idx - 1u) & 0x00ffffffu) |
+ (length << 25) | (1u << 27) |
+ ((unsigned_type & 0xfu) << 28));
+ ++j;
+ }
+ ++ri;
+ continue;
+ }
+ }
+ u32 mtype = reloc_to(r->kind);
+ if (mtype == (u32)-1) {
+ compiler_panic(c, no_loc(),
+ "emit_macho: unsupported reloc kind %u for arch %u",
+ (u32)r->kind, (u32)c->target.arch);
+ }
+ u32 pcrel = reloc_pcrel(r->kind);
+ u32 length = reloc_length(r->kind);
+
+ /* Resolve target — extern always 1 in our model (every Reloc has
+ * an ObjSymId). Skip relocs without a symbol — they would map to
+ * a section-relative reloc which the v1 cgtarget never emits. */
+ if (r->sym == OBJ_SYM_NONE) {
+ compiler_panic(c, no_loc(),
+ "emit_macho: reloc without symbol not supported "
+ "(sec=%u offset=%u kind=%u)",
+ (u32)r->section_id, (u32)r->offset, (u32)r->kind);
+ }
+ u32 mach_sym_idx = sym_obj_to_macho[r->sym];
+ if (mach_sym_idx == 0) {
+ compiler_panic(c, no_loc(),
+ "emit_macho: reloc target sym %u not in symtab",
+ (u32)r->sym);
+ }
+ u32 r_symbolnum = mach_sym_idx - 1; /* Mach-O uses 0-based. */
+
+ /* Non-zero addend: emit a leading ARM64_RELOC_ADDEND pair (only
+ * meaningful for non-UNSIGNED types — UNSIGNED carries the addend
+ * inline in the patched bytes). */
+ if (r->addend != 0 && mtype != ARM64_RELOC_UNSIGNED) {
+ u8* slot = buf + (size_t)j * MACHO_RELOC_SIZE;
+ wr_u32_le(slot + 0, (u32)r->offset);
+ u32 packed = ((u32)(i64)r->addend & 0x00ffffffu) | (0u << 24) |
+ (length << 25) | (1u << 27) /*extern*/ |
+ (ARM64_RELOC_ADDEND << 28);
+ wr_u32_le(slot + 4, packed);
+ ++j;
+ }
+
+ u8* slot = buf + (size_t)j * MACHO_RELOC_SIZE;
+ wr_u32_le(slot + 0, (u32)r->offset);
+ u32 packed = (r_symbolnum & 0x00ffffffu) | ((pcrel & 1u) << 24) |
+ ((length & 3u) << 25) | (1u << 27) /*extern*/ |
+ ((mtype & 0xfu) << 28);
+ wr_u32_le(slot + 4, packed);
+ ++j;
+ }
+ m->relocs = buf;
+ m->nreloc = j;
+ }
+
+ /* ---- pass 5: assign file offsets ------------------------------ */
+ /* Layout after the load-command block:
+ * section bytes (in order, respecting align)
+ * relocation tables (per section, 4-aligned)
+ * symbol table (8-aligned)
+ * string table */
+ u32 nload_cmds =
+ 4; /* LC_SEGMENT_64 + LC_BUILD_VERSION + LC_SYMTAB + LC_DYSYMTAB */
+ u32 segcmd_size = MACHO_SEGCMD64_SIZE + nsecs * MACHO_SECT64_SIZE;
+ u32 build_version_size =
+ 24; /* fixed: cmd+cmdsize+platform+minos+sdk+ntools(0) */
+ u32 sizeofcmds = segcmd_size + build_version_size + MACHO_SYMTAB_CMD_SIZE +
+ MACHO_DYSYMTAB_CMD_SIZE;
+
+ u64 cur = MACHO_HDR64_SIZE + sizeofcmds;
+ u32 fileoff_first = (u32)cur;
+ for (u32 i = 0; i < nsecs; ++i) {
+ MSec* m = &secs[i];
+ if (m->is_zerofill) {
+ m->fileoff = 0;
+ continue;
+ }
+ cur = ALIGN_UP(cur, (u64)m->align);
+ m->fileoff = (u32)cur;
+ cur += m->size;
+ }
+
+ /* Reloc tables. */
+ for (u32 i = 0; i < nsecs; ++i) {
+ MSec* m = &secs[i];
+ if (!m->nreloc) {
+ m->reloff = 0;
+ continue;
+ }
+ cur = ALIGN_UP(cur, (u64)4);
+ m->reloff = (u32)cur;
+ cur += (u64)m->nreloc * MACHO_RELOC_SIZE;
+ }
+
+ cur = ALIGN_UP(cur, (u64)8);
+ u64 symoff = cur;
+ cur += (u64)nmsyms * MACHO_NLIST64_SIZE;
+ u64 stroff = cur;
+ u32 strtab_size = buf_pos(&strtab);
+ cur += strtab_size;
+
+ /* ---- pass 6: write the file ------------------------------------ */
+ cfree_writer_seek(w, 0);
+
+ /* mach_header_64 */
+ wr_u32(w, MH_MAGIC_64);
+ wr_u32(w, cputype);
+ wr_u32(w, cpusubtype);
+ wr_u32(w, MH_OBJECT);
+ wr_u32(w, nload_cmds);
+ wr_u32(w, sizeofcmds);
+ wr_u32(w, 0); /* flags — MH_OBJECT carries none in v1 */
+ wr_u32(w, 0); /* reserved */
+
+ /* LC_SEGMENT_64 (anonymous, contains everything) */
+ wr_u32(w, LC_SEGMENT_64);
+ wr_u32(w, segcmd_size);
+ wr_name16(w, "", 0); /* segname: empty for MH_OBJECT */
+ wr_u64(w, 0); /* vmaddr */
+ wr_u64(w, segment_vmsize); /* vmsize */
+ wr_u64(w, fileoff_first); /* fileoff */
+ /* filesize = bytes covered by non-zerofill sections (post-section
+ * file offset minus the start). */
+ u64 filesize = 0;
+ for (u32 i = 0; i < nsecs; ++i) {
+ MSec* m = &secs[i];
+ if (m->is_zerofill) continue;
+ u64 end = (u64)m->fileoff + m->size;
+ u64 begin = m->fileoff;
+ if (end > filesize + fileoff_first) filesize = end - fileoff_first;
+ (void)begin;
+ }
+ wr_u64(w, filesize);
+ /* maxprot/initprot — VM_PROT_READ|WRITE|EXECUTE = 7 for object segs. */
+ wr_u32(w, 7);
+ wr_u32(w, 7);
+ wr_u32(w, nsecs);
+ wr_u32(w, 0); /* flags */
+
+ /* sections inline within the segment command */
+ for (u32 i = 0; i < nsecs; ++i) {
+ MSec* m = &secs[i];
+ wr_name16(w, m->ns.sectname, m->ns.sect_len);
+ wr_name16(w, m->ns.segname, m->ns.seg_len);
+ wr_u64(w, m->addr);
+ wr_u64(w, m->size);
+ wr_u32(w, m->fileoff);
+ wr_u32(w, log2_align(m->align));
+ wr_u32(w, m->reloff);
+ wr_u32(w, m->nreloc);
+ wr_u32(w, m->flags);
+ wr_u32(w, 0); /* reserved1 */
+ wr_u32(w, m->entsize); /* reserved2 */
+ wr_u32(w, 0); /* reserved3 */
+ }
+
+ /* LC_BUILD_VERSION — platform=PLATFORM_MACOS(1), minos/sdk=14.0.0,
+ * ntools=0. The exact min-version isn't load-bearing for MH_OBJECT,
+ * but Apple's `ld` warns when it's missing. */
+ wr_u32(w, LC_BUILD_VERSION);
+ wr_u32(w, build_version_size);
+ wr_u32(w, 1); /* platform: PLATFORM_MACOS */
+ wr_u32(w, (14u << 16) | 0); /* minos: 14.0.0 */
+ wr_u32(w, (14u << 16) | 0); /* sdk: 14.0.0 */
+ wr_u32(w, 0); /* ntools */
+
+ /* LC_SYMTAB */
+ wr_u32(w, LC_SYMTAB);
+ wr_u32(w, MACHO_SYMTAB_CMD_SIZE);
+ wr_u32(w, (u32)symoff);
+ wr_u32(w, nmsyms);
+ wr_u32(w, (u32)stroff);
+ wr_u32(w, strtab_size);
+
+ /* LC_DYSYMTAB */
+ wr_u32(w, LC_DYSYMTAB);
+ wr_u32(w, MACHO_DYSYMTAB_CMD_SIZE);
+ wr_u32(w, 0); /* ilocalsym */
+ wr_u32(w, nlocals);
+ wr_u32(w, nlocals);
+ wr_u32(w, nextdefs);
+ wr_u32(w, nlocals + nextdefs);
+ wr_u32(w, nundefs);
+ wr_u32(w, 0);
+ wr_u32(w, 0); /* tocoff, ntoc */
+ wr_u32(w, 0);
+ wr_u32(w, 0); /* modtaboff, nmodtab */
+ wr_u32(w, 0);
+ wr_u32(w, 0); /* extrefsymoff, nextrefsyms */
+ wr_u32(w, 0);
+ wr_u32(w, 0); /* indirectsymoff, nindirectsyms */
+ wr_u32(w, 0);
+ wr_u32(w, 0); /* extreloff, nextrel */
+ wr_u32(w, 0);
+ wr_u32(w, 0); /* locreloff, nlocrel */
+
+ /* section bytes */
+ for (u32 i = 0; i < nsecs; ++i) {
+ MSec* m = &secs[i];
+ if (m->is_zerofill || !m->size) continue;
+ cfree_writer_seek(w, m->fileoff);
+ if (m->obj_bytes) {
+ u32 sz = m->obj_bytes->total;
+ u8* tmp = (u8*)h->alloc(h, sz ? sz : 1, 1);
+ if (sz) buf_flatten(m->obj_bytes, tmp);
+ cfree_writer_write(w, tmp, sz);
+ h->free(h, tmp, sz ? sz : 1);
+ }
+ }
+
+ /* reloc tables */
+ for (u32 i = 0; i < nsecs; ++i) {
+ MSec* m = &secs[i];
+ if (!m->nreloc) continue;
+ cfree_writer_seek(w, m->reloff);
+ cfree_writer_write(w, m->relocs, (size_t)m->nreloc * MACHO_RELOC_SIZE);
+ }
+
+ /* symtab */
+ cfree_writer_seek(w, symoff);
+ for (u32 i = 0; i < nmsyms; ++i) {
+ const MSym* ms = &msyms[i];
+ u8 entry[MACHO_NLIST64_SIZE];
+ wr_u32_le(entry + 0, ms->strx);
+ entry[4] = ms->n_type;
+ entry[5] = ms->n_sect;
+ wr_u16_le(entry + 6, ms->n_desc);
+ wr_u64_le(entry + 8, ms->n_value);
+ cfree_writer_write(w, entry, MACHO_NLIST64_SIZE);
+ }
+
+ /* strtab */
+ {
+ u8* flat = (u8*)arena_alloc(c->scratch, strtab_size ? strtab_size : 1, 1);
+ if (strtab_size) buf_flatten(&strtab, flat);
+ cfree_writer_seek(w, stroff);
+ cfree_writer_write(w, flat, strtab_size);
+ }
+ buf_fini(&strtab);
+}
diff --git a/src/obj/macho/link.c b/src/obj/macho/link.c
@@ -0,0 +1,2613 @@
+/* link_emit_macho — write a dyld-loadable arm64 MH_EXECUTE.
+ *
+ * Mach-O peer of link_emit_elf. Produces a position-independent
+ * MH_EXECUTE that links against libSystem.B.dylib (or any other
+ * dylib/.tbd input) via LC_LOAD_DYLIB + LC_DYLD_CHAINED_FIXUPS. The
+ * binary is ad-hoc codesigned at the tail so the kernel will exec it
+ * on macOS 11+.
+ *
+ * Layout (Apple's stock arm64 layout):
+ *
+ * __PAGEZERO vmaddr 0, vmsize 0x100000000, no file bytes
+ * __TEXT (R-X)
+ * mach_header_64
+ * load commands
+ * [SF_EXEC sections — .text]
+ * [SF_ALLOC R-only sections — .rodata, init/fini_array, etc.]
+ * __stubs (12B per import-func)
+ * __DATA_CONST (RW initially, dyld marks R-only after fixups)
+ * __got (8B per import — both data and func imports)
+ * __DATA (R-W)
+ * [SF_WRITE sections — .data, .bss]
+ * __LINKEDIT (R)
+ * dyld_chained_fixups blob
+ * dyld_exports_trie blob
+ * function starts (empty)
+ * data in code (empty)
+ * symtab
+ * indirect symbol table (one entry per __stubs and __got slot)
+ * strtab
+ * code signature
+ *
+ * Imports are routed:
+ * CALL26/JUMP26 against an imported function -> __stubs entry
+ * GOT_LOAD_PAGE21/PAGEOFF12 against any import -> __got slot
+ * ABS64 against an imported symbol -> chained-bind at site
+ * ABS64 against a defined internal symbol -> chained-rebase at site
+ *
+ * arm64-only. x86_64-macos arrives with x64 codegen. */
+
+#include "link/link.h"
+
+#include <string.h>
+
+#include "core/bytes.h"
+#include "core/heap.h"
+#include "core/pool.h"
+#include "core/sha256.h"
+#include "core/slice.h"
+#include "core/util.h"
+#include "core/vec.h"
+#include "link/link_arch.h"
+#include "link/link_internal.h"
+#include "obj/format.h"
+#include "obj/macho/macho.h"
+
+static SrcLoc no_loc(void) {
+ SrcLoc l = {0, 0, 0};
+ return l;
+}
+
+/* ---- constants ---- */
+#define MZ_PAGEZERO 0x100000000ULL
+#define MZ_PAGE 0x4000ULL
+#define MZ_GOT_SIZE 8u
+/* __DATA,__thread_ptrs slot size — one pointer per unique TLV referenced
+ * via TLVP_LOAD_PAGE21/PAGEOFF12. Each slot holds the address of the
+ * matching TLV descriptor in __DATA,__thread_vars. */
+#define MZ_TLVP_SIZE 8u
+
+#define DYLD_CHAINED_PTR_64 2u
+#define DYLD_CHAINED_IMPORT 1u
+
+#define VM_PROT_READ 0x1u
+#define VM_PROT_WRITE 0x2u
+#define VM_PROT_EXECUTE 0x4u
+
+#define CS_MAGIC_EMBEDDED_SIGNATURE 0xfade0cc0u
+#define CS_MAGIC_CODEDIRECTORY 0xfade0c02u
+#define CSSLOT_CODEDIRECTORY 0u
+#define CS_HASHTYPE_SHA256 2u
+#define CS_SHA256_LEN SHA256_DIGEST_LEN
+#define CS_PAGE_SIZE_LOG2 12u
+#define CS_EXECSEG_MAIN_BINARY 1u
+
+/* extra LC ids */
+#define LC_DYLD_INFO_ONLY (0x22u | 0x80000000u)
+#define LC_FUNCTION_STARTS_C 0x26u
+#define LC_DATA_IN_CODE_C 0x29u
+#define LC_CODE_SIGNATURE_C 0x1du
+
+/* ---- byte buffer ---- */
+
+typedef struct MByte {
+ Heap* heap;
+ u8* data;
+ u32 len;
+ u32 cap;
+} MByte;
+
+static void mbuf_init(MByte* b, Heap* h) {
+ b->heap = h;
+ b->data = NULL;
+ b->len = 0;
+ b->cap = 0;
+}
+static void mbuf_fini(MByte* b) {
+ if (b->data) b->heap->free(b->heap, b->data, b->cap);
+ b->data = NULL;
+ b->cap = b->len = 0;
+}
+static void mbuf_reserve(MByte* b, u32 need) {
+ if (need <= b->cap) return;
+ (void)VEC_GROW(b->heap, b->data, b->cap, need);
+}
+static u32 mbuf_align(MByte* b, u32 a) {
+ u32 n = (u32)ALIGN_UP((u64)b->len, (u64)a);
+ if (n > b->len) {
+ mbuf_reserve(b, n);
+ memset(b->data + b->len, 0, n - b->len);
+ b->len = n;
+ }
+ return b->len;
+}
+static u32 mbuf_append(MByte* b, const void* src, u32 n) {
+ u32 off = b->len;
+ mbuf_reserve(b, b->len + n);
+ if (n) memcpy(b->data + b->len, src, n);
+ b->len += n;
+ return off;
+}
+static u32 mbuf_u32(MByte* b, u32 v) {
+ u8 t[4];
+ wr_u32_le(t, v);
+ return mbuf_append(b, t, 4);
+}
+static u32 mbuf_u16(MByte* b, u16 v) {
+ u8 t[2];
+ wr_u16_le(t, v);
+ return mbuf_append(b, t, 2);
+}
+static u32 mbuf_u64(MByte* b, u64 v) {
+ u8 t[8];
+ wr_u64_le(t, v);
+ return mbuf_append(b, t, 8);
+}
+static u32 mbuf_u8(MByte* b, u8 v) { return mbuf_append(b, &v, 1); }
+static u32 mbuf_str(MByte* b, const char* s, u32 n) {
+ u32 off = b->len;
+ mbuf_reserve(b, b->len + n + 1u);
+ if (n) memcpy(b->data + b->len, s, n);
+ b->data[b->len + n] = 0;
+ b->len += n + 1u;
+ return off;
+}
+
+/* ---- imports + dylibs ---- */
+
+typedef struct MachImp {
+ LinkSymId sym;
+ Sym name;
+ u32 dylib_ord; /* 1-based ordinal into LC_LOAD_DYLIB list */
+ u32 stub_idx; /* 1-based index into __stubs (0 if data import) */
+ u32 got_idx; /* 1-based index into __got */
+ u32 imports_strx; /* offset into chained-fixups symbol pool */
+ u8 is_func;
+ u8 weak;
+ /* internal=1 means this entry is an in-image symbol that's referenced
+ * via GOT_LOAD_PAGE21 / LD64_GOT_LO12_NC (clang emits these for any
+ * extern global so a single static-link can later become PIC). The
+ * GOT slot stores the symbol's image-relative vaddr and gets a
+ * chained-fixup rebase entry (or no entry at all for a weak-undef
+ * resolving to NULL). No dylib_ord / stub_idx / chained-fixup bind. */
+ u8 internal;
+ u8 pad[1];
+ u64 internal_vaddr; /* image-relative target vaddr; meaningful only when
+ internal=1 */
+} MachImp;
+
+typedef struct MachDylib {
+ Sym install;
+} MachDylib;
+
+/* One slot in the synthetic __DATA,__thread_ptrs section per unique TLV
+ * descriptor referenced via TLVP_LOAD_PAGE21/PAGEOFF12. Modeled after
+ * MachImp's internal-GOT entries: the slot holds the descriptor address
+ * (REBASE for internal-to-image descriptors, BIND for dylib-imported
+ * ones). The descriptor itself is laid out in __DATA,__thread_vars by
+ * either the input objects (internal) or the providing dylib (imported). */
+typedef struct MachTlv {
+ LinkSymId sym; /* canonical descriptor LinkSymId */
+ u32 tlv_idx; /* 1-based slot index in __thread_ptrs */
+ u8 imported; /* 1 == descriptor lives in a dylib (BIND), 0 == internal
+ (REBASE) */
+ u8 pad[3];
+ u32 import_idx; /* 1-based MachImp index when imported (for chained-bind
+ ordinal) */
+} MachTlv;
+
+/* ---- planned section ---- */
+
+typedef struct MSec {
+ /* Source: either a LinkSection (link_sec_id != 0) or a synthetic
+ * pre-built byte buffer (data + size). */
+ LinkSectionId link_sec_id;
+ const u8* synth_data;
+ u32 synth_size;
+ /* Mach-O placement */
+ const char* segname;
+ const char* sectname;
+ /* Inline storage for segname/sectname when split from a Mach-O
+ * `__SEG,__sect`-form LinkSection name. Names from string literals
+ * (synth sections, derived-from-flags defaults) point at .rodata
+ * and don't use these. 16 bytes matches the on-disk field width. */
+ char segname_buf[16];
+ char sectname_buf[16];
+ u64 vaddr;
+ u64 file_offset;
+ u64 size;
+ u32 align;
+ u32 flags; /* S_TYPE | S_ATTR_* */
+ u32 reserved1;
+ u32 reserved2;
+ u8 segidx; /* 1=__TEXT, 2=__DATA_CONST, 3=__DATA */
+ u8 is_zerofill;
+ u8 pad[6];
+} MSec;
+
+static void msec_repair_name_ptrs(MSec* m) {
+ if (m->segname_buf[0]) m->segname = m->segname_buf;
+ if (m->sectname_buf[0]) m->sectname = m->sectname_buf;
+}
+
+typedef struct MSeg {
+ const char* name;
+ u32 maxprot;
+ u32 initprot;
+ u64 vmaddr;
+ u64 vmsize;
+ u64 fileoff;
+ u64 filesize;
+ u32 nsects; /* MSec count in segment — internal layout */
+ u32 first_sec; /* first index into MSec[] */
+ u32 nouts; /* OutSec count in segment — what hits the file */
+ u32 first_out; /* first index into OutSec[] */
+} MSeg;
+
+/* On-disk section view: one record per (segname, sectname) within a
+ * segment. Mach-O requires this — emitting one section_64 per input
+ * MSec yields sibling __TEXT,__text records that violate the spec.
+ * Built from MSec[] after vaddr placement; reloc-apply still uses
+ * MSec[] for byte-buffer addressing. */
+typedef struct OutSec {
+ const char* segname;
+ const char* sectname;
+ u64 vaddr;
+ u64 file_offset;
+ u64 size;
+ u32 align;
+ u32 flags;
+ u32 reserved1;
+ u32 reserved2;
+ u8 segidx;
+ u8 is_zerofill;
+} OutSec;
+
+/* ---- main context ---- */
+
+typedef struct MCtx {
+ LinkImage* img;
+ Compiler* c;
+ Heap* h;
+ Writer* w;
+ Linker* linker;
+ const LinkArchDesc* link_arch;
+ const ObjMachoArchOps* macho;
+
+ /* imports */
+ MachImp* imports;
+ u32 nimports;
+ u32 nimports_real; /* count of imports with internal=0 (== prefix length;
+ * collect_imports appends internal=1 entries last) */
+ u32 nimport_funcs;
+ MachDylib* dylibs;
+ u32 ndylibs;
+ /* sym->import index, 1-based, 0 = not an import. Sized to LinkSymId space
+ * + 1. */
+ u32* sym_to_imp;
+ u32 sym_to_imp_size;
+
+ /* sections + segments */
+ MSec* secs;
+ u32 nsecs;
+ OutSec* outs;
+ u32 nouts;
+ MSeg segs[5]; /* PAGEZERO, TEXT, DATA_CONST, DATA, LINKEDIT */
+ u32 nsegs;
+
+ /* Synthetic byte buffers, owned. */
+ u8* stubs_bytes;
+ u32 stubs_size;
+ u8* got_bytes;
+ u32 got_size;
+ /* TLV pointer slots — one entry in __DATA,__thread_ptrs per unique
+ * descriptor referenced via TLVP_LOAD_PAGE21/PAGEOFF12. sym_to_tlv
+ * maps LinkSymId → 1-based slot index (parallel to sym_to_imp). Slot
+ * bytes are populated at apply_relocs time once shift_sections has
+ * pinned descriptor vaddrs. */
+ MachTlv* tlv_slots;
+ u32 ntlv;
+ u32* sym_to_tlv;
+ u32 sym_to_tlv_size;
+ u8* tlv_ptrs_bytes;
+ u32 tlv_ptrs_size;
+ u64 tlv_ptrs_vaddr;
+ /* Vaddr of the first thread-local-storage section
+ * (__thread_data / __thread_bss). Each TLV descriptor's word 2
+ * stores the symbol's offset within this image rather than an
+ * absolute address — see apply_relocs's S_THREAD_LOCAL_VARIABLES
+ * ABS64 special case. */
+ u64 tls_image_vaddr;
+ u8 has_tls_image;
+
+ /* Final layout (computed during plan) */
+ u64 text_vaddr;
+ u64 text_filesz;
+ u64 stubs_vaddr;
+ u64 got_vaddr;
+ u64 data_const_vaddr;
+ u64 data_vaddr;
+ u64 data_const_filesz;
+ u64 data_filesz;
+ u64 data_memsz;
+ u64 linkedit_vaddr;
+ u64 linkedit_fileoff;
+ u32 entry_offset; /* offset of entry within __TEXT segment */
+
+ u64 headers_size; /* header + loadcmds */
+
+ /* LINKEDIT contents */
+ MByte chained_fixups;
+ MByte exports_trie;
+ MByte symtab; /* binary nlist_64 array */
+ MByte strtab;
+ MByte indirect; /* u32 array */
+ MByte fn_starts;
+ MByte data_in_code;
+ MByte codesig;
+
+ u32 chained_fixups_off;
+ u32 exports_trie_off;
+ u32 fn_starts_off;
+ u32 data_in_code_off;
+ u32 symtab_off;
+ u32 indirect_off;
+ u32 strtab_off;
+ u32 codesig_off;
+ u32 codesig_size;
+ u32 nsyms;
+
+ u8 uuid[16];
+} MCtx;
+
+/* ---- helpers for finding LinkSymbol vaddr ---- */
+
+static LinkSymbol* sym_at(LinkImage* img, LinkSymId id) {
+ if (id == LINK_SYM_NONE || id > LinkSyms_count(&img->syms)) return NULL;
+ return LinkSyms_at(&img->syms, id - 1);
+}
+
+/* ---- pass: collect imports ---- */
+
+static u32 dylib_ordinal_of(MCtx* x, Sym install) {
+ for (u32 j = 0; j < x->ndylibs; ++j)
+ if (x->dylibs[j].install == install) return j + 1u;
+ return 0;
+}
+
+static void collect_imports(MCtx* x) {
+ LinkImage* img = x->img;
+ Heap* h = x->h;
+
+ x->sym_to_imp_size = LinkSyms_count(&img->syms) + 1u;
+ x->sym_to_imp =
+ (u32*)h->alloc(h, sizeof(u32) * x->sym_to_imp_size, _Alignof(u32));
+ if (!x->sym_to_imp)
+ compiler_panic(x->c, no_loc(), "link_macho: oom on sym_to_imp");
+ memset(x->sym_to_imp, 0, sizeof(u32) * x->sym_to_imp_size);
+
+ u32 cap = 0, cap_d = 0;
+ for (u32 i = 0; i < LinkSyms_count(&img->syms); ++i) {
+ LinkSymbol* s = LinkSyms_at(&img->syms, i);
+ if (!s->imported) continue;
+ if (s->name == 0) continue;
+ LinkSymId canon = symhash_get(&img->globals, s->name);
+ if (canon != LINK_SYM_NONE && canon != s->id) continue;
+ if (VEC_GROW(h, x->imports, cap, x->nimports + 1u))
+ compiler_panic(x->c, no_loc(), "link_macho: oom on imports");
+ MachImp* mi = &x->imports[x->nimports++];
+ memset(mi, 0, sizeof(*mi));
+ mi->sym = s->id;
+ mi->name = s->name;
+ mi->is_func = (s->kind == SK_FUNC || s->kind == SK_IFUNC) ? 1 : 0;
+ mi->weak = (s->bind == SB_WEAK) ? 1 : 0;
+ x->sym_to_imp[s->id] = x->nimports;
+ }
+
+ /* Back-classify: any CALL26/JUMP26 reloc target -> function. */
+ for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) {
+ LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
+ if (!x->link_arch->is_branch_reloc ||
+ !x->link_arch->is_branch_reloc(r->kind))
+ continue;
+ if (r->target == LINK_SYM_NONE || r->target >= x->sym_to_imp_size) continue;
+ u32 idx = x->sym_to_imp[r->target];
+ if (!idx) {
+ /* Resolve through canonical. */
+ LinkSymbol* tgt = LinkSyms_at(&img->syms, r->target - 1);
+ if (tgt->name == 0) continue;
+ LinkSymId canon = symhash_get(&img->globals, tgt->name);
+ if (canon == LINK_SYM_NONE || canon >= x->sym_to_imp_size) continue;
+ idx = x->sym_to_imp[canon];
+ if (!idx) continue;
+ /* Stash so future lookups skip this loop. */
+ x->sym_to_imp[r->target] = idx;
+ }
+ x->imports[idx - 1].is_func = 1;
+ }
+
+ /* Build dylib ordinal table. Pull soname from the providing DSO. */
+ for (u32 i = 0; i < x->nimports; ++i) {
+ MachImp* mi = &x->imports[i];
+ LinkSymbol* s = sym_at(img, mi->sym);
+ LinkInputId dso_id = s ? s->dso_input_id : LINK_INPUT_NONE;
+ Sym install = 0;
+ if (dso_id != LINK_INPUT_NONE && x->linker &&
+ dso_id - 1u < LinkInputs_count(&x->linker->inputs)) {
+ LinkInput* in = LinkInputs_at(&x->linker->inputs, dso_id - 1u);
+ if (in->kind == LINK_INPUT_DSO_BYTES) install = in->soname;
+ }
+ if (install == 0)
+ install = pool_intern_slice(x->c->global,
+ SLICE_LIT("/usr/lib/libSystem.B.dylib"));
+ u32 ord = dylib_ordinal_of(x, install);
+ if (!ord) {
+ if (VEC_GROW(h, x->dylibs, cap_d, x->ndylibs + 1u))
+ compiler_panic(x->c, no_loc(), "link_macho: oom on dylibs");
+ x->dylibs[x->ndylibs].install = install;
+ ++x->ndylibs;
+ ord = x->ndylibs;
+ }
+ mi->dylib_ord = ord;
+ }
+
+ /* Always include every DSO input's install-name. */
+ if (x->linker) {
+ for (u32 ii = 0; ii < LinkInputs_count(&x->linker->inputs); ++ii) {
+ LinkInput* in = LinkInputs_at(&x->linker->inputs, ii);
+ if (in->kind != LINK_INPUT_DSO_BYTES) continue;
+ if (in->soname == 0) continue;
+ if (dylib_ordinal_of(x, in->soname)) continue;
+ if (VEC_GROW(h, x->dylibs, cap_d, x->ndylibs + 1u))
+ compiler_panic(x->c, no_loc(), "link_macho: oom on dylibs");
+ x->dylibs[x->ndylibs].install = in->soname;
+ ++x->ndylibs;
+ }
+ }
+
+ /* All entries so far are real imports; remember the partition point
+ * so import/symtab table emit loops can skip the appended internals. */
+ x->nimports_real = x->nimports;
+
+ /* Internal GOT pass. clang on Mach-O routes every extern-global
+ * reference through the GOT (GOT_LOAD_PAGE21 / LD64_GOT_LO12_NC), so
+ * even a common symbol or weak-undef that ends up resolved within the
+ * image still needs a __got slot. For each such reloc whose target
+ * isn't an existing import, materialize a MachImp with internal=1.
+ * The slot's contents are filled at write time and a chained-fixup
+ * REBASE entry (or none, for weak undef → NULL) keeps it valid
+ * post-ASLR. */
+ for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) {
+ LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
+ if (!x->link_arch->is_got_load_reloc ||
+ !x->link_arch->is_got_load_reloc(r->kind))
+ continue;
+ if (r->target == LINK_SYM_NONE || r->target >= x->sym_to_imp_size) continue;
+ if (x->sym_to_imp[r->target]) continue;
+ LinkSymbol* t = sym_at(img, r->target);
+ if (!t) continue;
+ /* Resolve through canonical so we share a single slot per symbol. */
+ LinkSymId canon = r->target;
+ if (t->name != 0) {
+ LinkSymId hit = symhash_get(&img->globals, t->name);
+ if (hit != LINK_SYM_NONE) {
+ canon = hit;
+ if (x->sym_to_imp[canon]) {
+ x->sym_to_imp[r->target] = x->sym_to_imp[canon];
+ continue;
+ }
+ t = sym_at(img, canon);
+ if (!t) continue;
+ }
+ }
+ if (VEC_GROW(h, x->imports, cap, x->nimports + 1u))
+ compiler_panic(x->c, no_loc(), "link_macho: oom on internal got");
+ MachImp* mi = &x->imports[x->nimports++];
+ memset(mi, 0, sizeof(*mi));
+ mi->sym = canon;
+ mi->name = t->name;
+ mi->is_func = (t->kind == SK_FUNC || t->kind == SK_IFUNC) ? 1 : 0;
+ mi->weak = (t->bind == SB_WEAK) ? 1 : 0;
+ mi->internal = 1;
+ /* internal_vaddr is read fresh from the LinkSymbol when the slot
+ * gets initialized — collect_imports runs before shift_sections
+ * rebases section vaddrs to Mach-O layout, so capturing here would
+ * be stale by the time __got bytes are written. */
+ mi->internal_vaddr = 0;
+ x->sym_to_imp[canon] = x->nimports;
+ if (canon != r->target) x->sym_to_imp[r->target] = x->nimports;
+ }
+
+ /* Assign stub_idx + got_idx. Internal entries get a slot but no stub:
+ * the call site (CALL26) on internal funcs goes direct, not via stub. */
+ u32 stub_run = 0;
+ for (u32 i = 0; i < x->nimports; ++i) {
+ MachImp* mi = &x->imports[i];
+ mi->got_idx = i + 1u;
+ if (mi->is_func && !mi->internal) mi->stub_idx = ++stub_run;
+ }
+ x->nimport_funcs = stub_run;
+}
+
+/* ---- pass: collect TLV pointer slots ----
+ *
+ * Mirror of collect_imports' internal-GOT pass, but for TLV descriptors:
+ * each unique descriptor referenced via ARM64_RELOC_TLVP_LOAD_PAGE21 /
+ * PAGEOFF12 gets one slot in the synthetic __DATA,__thread_ptrs section.
+ * The slot's runtime value is the descriptor's address; we patch it at
+ * apply_relocs time (REBASE for in-image descriptors, BIND for ones in
+ * a dylib).
+ *
+ * Slots are deduplicated by canonical LinkSymId so a single descriptor
+ * referenced from N call sites shares one __thread_ptrs entry. */
+static void collect_tlv(MCtx* x) {
+ LinkImage* img = x->img;
+ Heap* h = x->h;
+ x->sym_to_tlv_size = LinkSyms_count(&img->syms) + 1u;
+ x->sym_to_tlv =
+ (u32*)h->alloc(h, sizeof(u32) * x->sym_to_tlv_size, _Alignof(u32));
+ if (!x->sym_to_tlv)
+ compiler_panic(x->c, no_loc(), "link_macho: oom on sym_to_tlv");
+ memset(x->sym_to_tlv, 0, sizeof(u32) * x->sym_to_tlv_size);
+
+ u32 cap = 0;
+ for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) {
+ LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
+ if (!x->link_arch->is_tlvp_reloc || !x->link_arch->is_tlvp_reloc(r->kind))
+ continue;
+ if (r->target == LINK_SYM_NONE || r->target >= x->sym_to_tlv_size) continue;
+ /* Resolve through canonical so multiple per-input duplicate undefs
+ * collapse onto one __thread_ptrs slot. */
+ LinkSymId canon = r->target;
+ LinkSymbol* t = sym_at(img, r->target);
+ if (!t) continue;
+ if (t->name != 0) {
+ LinkSymId hit = symhash_get(&img->globals, t->name);
+ if (hit != LINK_SYM_NONE) {
+ canon = hit;
+ t = sym_at(img, canon);
+ if (!t) continue;
+ }
+ }
+ if (x->sym_to_tlv[canon]) {
+ if (canon != r->target) x->sym_to_tlv[r->target] = x->sym_to_tlv[canon];
+ continue;
+ }
+ if (VEC_GROW(h, x->tlv_slots, cap, x->ntlv + 1u))
+ compiler_panic(x->c, no_loc(), "link_macho: oom on tlv_slots");
+ MachTlv* ts = &x->tlv_slots[x->ntlv++];
+ memset(ts, 0, sizeof(*ts));
+ ts->sym = canon;
+ ts->tlv_idx = x->ntlv;
+ ts->imported = t->imported ? 1u : 0u;
+ /* If the descriptor is imported we route the bind through the
+ * symbol's MachImp slot — that's where dyld's chained-import index
+ * comes from. When this loop fires the imp pass has already
+ * materialized the entry (real imports were processed first); the
+ * lookup may also have stashed an alias for non-canonical ids. */
+ if (ts->imported) {
+ u32 idx = (canon < x->sym_to_imp_size) ? x->sym_to_imp[canon] : 0u;
+ if (!idx && t->name != 0) {
+ LinkSymId hit2 = symhash_get(&img->globals, t->name);
+ if (hit2 != LINK_SYM_NONE && hit2 < x->sym_to_imp_size)
+ idx = x->sym_to_imp[hit2];
+ }
+ ts->import_idx = idx;
+ }
+ x->sym_to_tlv[canon] = x->ntlv;
+ if (canon != r->target) x->sym_to_tlv[r->target] = x->ntlv;
+ }
+}
+
+/* ---- pass: plan Mach-O sections ----
+ *
+ * Walks LinkImage sections. Each non-zero-size LinkSection becomes one
+ * MSec. Synthetic __stubs and __got are appended at the right segment
+ * boundaries. Vaddr and file_offset are assigned in a single forward
+ * pass starting at __TEXT base; __PAGEZERO and __LINKEDIT are special. */
+
+static void seg_init(MSeg* s, const char* name, u32 maxp, u32 initp) {
+ memset(s, 0, sizeof(*s));
+ s->name = name;
+ s->maxprot = maxp;
+ s->initprot = initp;
+}
+
+static int sec_is_writable(const LinkSection* ls) {
+ return (ls->flags & SF_WRITE) != 0u;
+}
+static int sec_is_exec(const LinkSection* ls) {
+ return (ls->flags & SF_EXEC) != 0u;
+}
+static int sec_is_zerofill(const LinkSection* ls) {
+ return ls->sem == SSEM_NOBITS;
+}
+
+static int section_has_abs64_reloc(const LinkImage* img, LinkSectionId id) {
+ for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) {
+ const LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
+ if (r->link_section_id == id && r->kind == R_ABS64) return 1;
+ }
+ return 0;
+}
+
+static int sec_needs_data_const(const LinkImage* img, const LinkSection* ls) {
+ if (!ls || !ls->size || sec_is_exec(ls) || sec_is_writable(ls) ||
+ sec_is_zerofill(ls)) {
+ return 0;
+ }
+ return section_has_abs64_reloc(img, ls->id);
+}
+
+/* Pick (segname, sectname) for a LinkSection. Comma-form Mach-O names
+ * round-trip into MSec's inline 16-byte buffers; literal defaults point
+ * at .rodata strings. Caller passes the MSec for per-section storage —
+ * a previous version used a shared static buffer which aliased all
+ * sections to whichever name was set last. */
+static void pick_macho_names(const LinkSection* ls, Compiler* c, MSec* m) {
+ Slice nm_s = pool_slice(c->global, ls->name);
+ const char* nm = nm_s.s;
+ size_t nlen = nm_s.len;
+ if (nm) {
+ /* Comma-form: "__SEG,__sect" round-tripped from a Mach-O input. */
+ for (size_t i = 0; i < nlen; ++i) {
+ if (nm[i] == ',') {
+ u32 seg_n = (u32)(i > 15 ? 15 : i);
+ memcpy(m->segname_buf, nm, seg_n);
+ m->segname_buf[seg_n] = 0;
+ u32 sect_n = (u32)((nlen - i - 1) > 15 ? 15 : (nlen - i - 1));
+ memcpy(m->sectname_buf, nm + i + 1, sect_n);
+ m->sectname_buf[sect_n] = 0;
+ m->segname = m->segname_buf;
+ m->sectname = m->sectname_buf;
+ return;
+ }
+ }
+ }
+ /* Derive from flags. */
+ if (sec_is_exec(ls)) {
+ m->segname = "__TEXT";
+ m->sectname = "__text";
+ } else if (sec_is_writable(ls)) {
+ m->segname = "__DATA";
+ m->sectname = sec_is_zerofill(ls) ? "__bss" : "__data";
+ } else {
+ m->segname = "__TEXT";
+ m->sectname = "__const";
+ }
+}
+
+static void plan_layout(MCtx* x) {
+ LinkImage* img = x->img;
+ Heap* h = x->h;
+
+ /* PAGEZERO */
+ seg_init(&x->segs[0], "__PAGEZERO", 0, 0);
+ x->segs[0].vmaddr = 0;
+ x->segs[0].vmsize = MZ_PAGEZERO;
+ x->segs[0].fileoff = 0;
+ x->segs[0].filesize = 0;
+ x->segs[0].nsects = 0;
+ x->segs[0].first_sec = 0;
+
+ /* Segments 1..4 */
+ seg_init(&x->segs[1], "__TEXT", VM_PROT_READ | VM_PROT_EXECUTE,
+ VM_PROT_READ | VM_PROT_EXECUTE);
+ seg_init(&x->segs[2], "__DATA_CONST", VM_PROT_READ | VM_PROT_WRITE,
+ VM_PROT_READ | VM_PROT_WRITE);
+ seg_init(&x->segs[3], "__DATA", VM_PROT_READ | VM_PROT_WRITE,
+ VM_PROT_READ | VM_PROT_WRITE);
+ seg_init(&x->segs[4], "__LINKEDIT", VM_PROT_READ, VM_PROT_READ);
+ x->nsegs = 5;
+
+ /* Pre-allocate MSec capacity: every LinkSection + 2 synth (__stubs,
+ * __got). (LinkSections from the dynamic-link layer — .dynsym / .plt
+ * etc. — were synthesized by layout_dyn for ELF; we won't have them
+ * since pie wasn't set on this Linker. Still, oversize by a few.) */
+ u32 cap = LinkRelocs_count(&img->relocs) + img->nsections + 4u;
+ x->secs = (MSec*)h->alloc(h, sizeof(MSec) * cap, _Alignof(MSec));
+ if (!x->secs) compiler_panic(x->c, no_loc(), "link_macho: oom on MSec");
+ memset(x->secs, 0, sizeof(MSec) * cap);
+ x->nsecs = 0;
+
+ /* Pass 1: __TEXT segment. Header + loadcmds reserve front. */
+ /* We need the exact header_size to set first sec's file_offset. We'll
+ * compute it later, but reserve a placeholder; for now use 0 and patch
+ * in pass 4 (offsets get bumped). */
+
+ u64 text_vaddr = MZ_PAGEZERO;
+ /* We'll compute headers_size after plan; stash starting vaddr only. */
+ x->segs[1].vmaddr = text_vaddr;
+ x->segs[1].fileoff = 0;
+ x->text_vaddr = text_vaddr;
+
+ /* Collect: (a) exec sections, (b) read-only allocatable sections. */
+ /* (cursor advances per-segment in pass 2; nothing to track here) */
+
+ /* We don't know the header size yet; walk sections first to enumerate
+ * MSec entries, then back-fill file_offset/vaddr after we know the
+ * load-command count. */
+
+ u32 first_text_sec = x->nsecs;
+
+ for (u32 i = 0; i < img->nsections; ++i) {
+ LinkSection* ls = &img->sections[i];
+ if (!ls->size) continue;
+ if (sec_is_writable(ls)) continue;
+ if (sec_is_zerofill(ls)) continue; /* placed in __DATA */
+ if (sec_needs_data_const(img, ls)) continue;
+ MSec* m = &x->secs[x->nsecs++];
+ memset(m, 0, sizeof(*m));
+ m->link_sec_id = ls->id;
+ pick_macho_names(ls, x->c, m);
+ /* Force into __TEXT. */
+ if (!slice_eq_cstr(slice_from_cstr(m->segname), "__TEXT"))
+ m->segname = "__TEXT";
+ m->align = ls->align ? ls->align : 1u;
+ m->size = ls->size;
+ m->segidx = 1;
+ m->flags = sec_is_exec(ls) ? (0x80000000u /*S_ATTR_PURE_INSTRUCTIONS*/ |
+ 0x00000400u /*S_ATTR_SOME_INSTRUCTIONS*/)
+ : 0u;
+ }
+
+ /* __stubs synthetic */
+ if (x->nimport_funcs) {
+ x->stubs_size = x->nimport_funcs * x->macho->stub_size;
+ x->stubs_bytes = (u8*)h->alloc(h, x->stubs_size, 4);
+ if (!x->stubs_bytes)
+ compiler_panic(x->c, no_loc(), "link_macho: oom on stubs");
+ memset(x->stubs_bytes, 0, x->stubs_size);
+ MSec* m = &x->secs[x->nsecs++];
+ memset(m, 0, sizeof(*m));
+ m->synth_data = x->stubs_bytes;
+ m->synth_size = x->stubs_size;
+ m->segname = "__TEXT";
+ m->sectname = "__stubs";
+ m->align = 4u;
+ m->size = x->stubs_size;
+ m->segidx = 1;
+ m->flags = 0x80000000u | 0x00000400u | 0x00000008u /*S_SYMBOL_STUBS*/;
+ m->reserved1 = 0; /* fill in later: indirect-symtab base */
+ m->reserved2 = x->macho->stub_size;
+ }
+ x->segs[1].nsects = x->nsecs - first_text_sec;
+ x->segs[1].first_sec = first_text_sec;
+
+ /* __DATA_CONST: __got synth */
+ u32 first_dc = x->nsecs;
+ if (x->nimports) {
+ x->got_size = x->nimports * MZ_GOT_SIZE;
+ x->got_bytes = (u8*)h->alloc(h, x->got_size, 8);
+ if (!x->got_bytes) compiler_panic(x->c, no_loc(), "link_macho: oom on got");
+ memset(x->got_bytes, 0, x->got_size);
+ MSec* m = &x->secs[x->nsecs++];
+ memset(m, 0, sizeof(*m));
+ m->synth_data = x->got_bytes;
+ m->synth_size = x->got_size;
+ m->segname = "__DATA_CONST";
+ m->sectname = "__got";
+ m->align = 8u;
+ m->size = x->got_size;
+ m->segidx = 2;
+ m->flags = 0x00000006u /*S_NON_LAZY_SYMBOL_POINTERS*/;
+ m->reserved1 = 0; /* indirect-symtab base */
+ }
+ for (u32 i = 0; i < img->nsections; ++i) {
+ LinkSection* ls = &img->sections[i];
+ if (!sec_needs_data_const(img, ls)) continue;
+ MSec* m = &x->secs[x->nsecs++];
+ memset(m, 0, sizeof(*m));
+ m->link_sec_id = ls->id;
+ pick_macho_names(ls, x->c, m);
+ m->segname = "__DATA_CONST";
+ m->align = ls->align ? ls->align : 1u;
+ m->size = ls->size;
+ m->segidx = 2;
+ m->flags = 0;
+ }
+ x->segs[2].nsects = x->nsecs - first_dc;
+ x->segs[2].first_sec = first_dc;
+
+ /* __DATA segment: writable sections + zerofill. */
+ u32 first_d = x->nsecs;
+ for (u32 i = 0; i < img->nsections; ++i) {
+ LinkSection* ls = &img->sections[i];
+ if (!ls->size && !sec_is_zerofill(ls)) continue;
+ if (!sec_is_writable(ls)) continue;
+ MSec* m = &x->secs[x->nsecs++];
+ memset(m, 0, sizeof(*m));
+ m->link_sec_id = ls->id;
+ pick_macho_names(ls, x->c, m);
+ if (!slice_eq_cstr(slice_from_cstr(m->segname), "__DATA"))
+ m->segname = "__DATA";
+ m->align = ls->align ? ls->align : 1u;
+ m->size = ls->size;
+ m->segidx = 3;
+ m->is_zerofill = sec_is_zerofill(ls) ? 1 : 0;
+ m->flags = m->is_zerofill ? 0x00000001u /*S_ZEROFILL*/ : 0;
+ /* dyld dispatches on the section type byte (low 8 bits of flags).
+ * __mod_init_func / __mod_term_func sections must carry the
+ * S_MOD_INIT_FUNC_POINTERS / S_MOD_TERM_FUNC_POINTERS type or dyld
+ * skips them entirely — leaving constructors unrun at startup. */
+ if (slice_eq_cstr(slice_from_cstr(m->sectname), "__mod_init_func"))
+ m->flags = 0x00000009u /*S_MOD_INIT_FUNC_POINTERS*/;
+ else if (slice_eq_cstr(slice_from_cstr(m->sectname), "__mod_term_func"))
+ m->flags = 0x0000000au /*S_MOD_TERM_FUNC_POINTERS*/;
+ else if (ls->flags & SF_TLS) {
+ /* TLV sections: dyld dispatches by section type, not name. Map
+ * __thread_vars → S_THREAD_LOCAL_VARIABLES (descriptor records),
+ * __thread_data → S_THREAD_LOCAL_REGULAR (initial data),
+ * __thread_bss → S_THREAD_LOCAL_ZEROFILL (zero-init data). Done
+ * by sectname so per-TU inputs without a Mach-O ext_type still
+ * get the right section type. */
+ if (slice_eq_cstr(slice_from_cstr(m->sectname), "__thread_vars")) {
+ m->flags = S_THREAD_LOCAL_VARIABLES;
+ /* Each descriptor is three pointers (24B) whose first word is
+ * dyld's _tlv_bootstrap thunk pointer. Clang/llvm emit
+ * __thread_vars with on-disk alignment 1 (relying on layout to
+ * land it on 8); force 8-alignment here so the descriptor
+ * pointers fall on 8-byte boundaries — dyld's chained-fixup
+ * processing assumes that. */
+ if (m->align < 8u) m->align = 8u;
+ } else if (m->is_zerofill)
+ m->flags = S_THREAD_LOCAL_ZEROFILL;
+ else
+ m->flags = S_THREAD_LOCAL_REGULAR;
+ }
+ }
+ /* __thread_ptrs synthetic (TLV pointer slots). Emitted into __DATA
+ * after the user's TLV input sections so descriptors and their
+ * pointers share the same segment. Each slot's runtime initial
+ * value (= TLV descriptor address) is patched during apply_relocs. */
+ if (x->ntlv) {
+ x->tlv_ptrs_size = x->ntlv * MZ_TLVP_SIZE;
+ x->tlv_ptrs_bytes = (u8*)h->alloc(h, x->tlv_ptrs_size, 8);
+ if (!x->tlv_ptrs_bytes)
+ compiler_panic(x->c, no_loc(), "link_macho: oom on tlv_ptrs");
+ memset(x->tlv_ptrs_bytes, 0, x->tlv_ptrs_size);
+ MSec* m = &x->secs[x->nsecs++];
+ memset(m, 0, sizeof(*m));
+ m->synth_data = x->tlv_ptrs_bytes;
+ m->synth_size = x->tlv_ptrs_size;
+ m->segname = "__DATA";
+ m->sectname = "__thread_ptrs";
+ m->align = 8u;
+ m->size = x->tlv_ptrs_size;
+ m->segidx = 3;
+ m->flags = S_THREAD_LOCAL_VARIABLE_POINTERS;
+ }
+ x->segs[3].nsects = x->nsecs - first_d;
+ x->segs[3].first_sec = first_d;
+
+ /* Group MSecs by (segname, sectname) within each segment so vaddr
+ * placement keeps same-named runs contiguous. Otherwise Phase B's
+ * adjacency-based coalescing splits a single Mach-O section into
+ * multiple OutSecs (e.g. `.text` from an in-memory ObjBuilder and
+ * `__TEXT,__text` from a Mach-O .o input both map to `__TEXT,__text`
+ * but arrive in separate link_layout groups, interleaved with other
+ * sections from each input). Stable insertion sort preserves input
+ * order within a name, which matters for synth __stubs/__thread_ptrs
+ * order relative to peers. */
+ for (u32 i = 0; i < x->nsegs; ++i) {
+ MSeg* sg = &x->segs[i];
+ if (sg->nsects < 2) continue;
+ u32 base = sg->first_sec;
+ u32 n = sg->nsects;
+ for (u32 a = 1; a < n; ++a) {
+ MSec key = x->secs[base + a];
+ msec_repair_name_ptrs(&key);
+ u32 j = a;
+ while (j > 0) {
+ MSec* prev = &x->secs[base + j - 1];
+ /* Ordering compare for stable sort: slices don't order, keep strcmp. */
+ int cmp = strcmp(prev->segname, key.segname); /* ordering */
+ if (cmp == 0) cmp = strcmp(prev->sectname, key.sectname); /* ordering */
+ if (cmp <= 0) break;
+ x->secs[base + j] = x->secs[base + j - 1];
+ msec_repair_name_ptrs(&x->secs[base + j]);
+ --j;
+ }
+ x->secs[base + j] = key;
+ msec_repair_name_ptrs(&x->secs[base + j]);
+ }
+ }
+
+ /* Phase A: count OutSecs per segment (distinct sectnames) so we can
+ * size the load commands before placing vaddrs. Phase B builds the
+ * actual OutSec[] after placement, when vaddrs are final. */
+ for (u32 i = 0; i < x->nsegs; ++i) {
+ MSeg* sg = &x->segs[i];
+ u32 cnt = 0;
+ for (u32 a = sg->first_sec; a < sg->first_sec + sg->nsects; ++a) {
+ int seen = 0;
+ for (u32 b = sg->first_sec; b < a; ++b) {
+ if (slice_eq_cstr(slice_from_cstr(x->secs[a].sectname),
+ x->secs[b].sectname) &&
+ slice_eq_cstr(slice_from_cstr(x->secs[a].segname),
+ x->secs[b].segname)) {
+ seen = 1;
+ break;
+ }
+ }
+ if (!seen) ++cnt;
+ }
+ sg->nouts = cnt;
+ sg->first_out = 0; /* assigned in Phase B */
+ }
+
+ /* Compute load-command count + sizeofcmds, then back-fill section
+ * offsets. Layout pass 2. */
+ u32 nseg_real = 0;
+ for (u32 i = 0; i < x->nsegs; ++i) {
+ /* Skip __DATA_CONST or __DATA if no sections (edge case). */
+ if (i == 0) {
+ ++nseg_real;
+ continue;
+ } /* PAGEZERO */
+ if (i == 4) {
+ ++nseg_real;
+ continue;
+ } /* LINKEDIT always */
+ if (x->segs[i].nsects > 0) ++nseg_real;
+ }
+ /* Each LC_SEGMENT_64 carries 72 + 80*nouts bytes (one section_64
+ * record per coalesced (segname,sectname), not per MSec). */
+ u32 sizeofcmds = 0;
+ for (u32 i = 0; i < x->nsegs; ++i) {
+ if (i == 0 || i == 4) {
+ sizeofcmds += MACHO_SEGCMD64_SIZE; /* no sections */
+ continue;
+ }
+ if (x->segs[i].nsects == 0) continue;
+ sizeofcmds += MACHO_SEGCMD64_SIZE + x->segs[i].nouts * MACHO_SECT64_SIZE;
+ }
+ (void)nseg_real;
+ /* LC_DYLD_CHAINED_FIXUPS / LC_DYLD_EXPORTS_TRIE */
+ sizeofcmds += 16u + 16u;
+ /* LC_SYMTAB / LC_DYSYMTAB */
+ sizeofcmds += MACHO_SYMTAB_CMD_SIZE + MACHO_DYSYMTAB_CMD_SIZE;
+ /* LC_LOAD_DYLINKER */
+ {
+ u32 ld_size = 12u + (u32)(sizeof("/usr/lib/dyld") - 1u) + 1u;
+ sizeofcmds += (u32)ALIGN_UP((u64)ld_size, 8u);
+ }
+ /* LC_UUID + LC_BUILD_VERSION + LC_MAIN */
+ sizeofcmds += 24u + 24u + 24u;
+ /* LC_LOAD_DYLIB per dylib */
+ for (u32 i = 0; i < x->ndylibs; ++i) {
+ size_t nl = pool_slice(x->c->global, x->dylibs[i].install).len;
+ u32 sz = 24u + (u32)nl + 1u;
+ sizeofcmds += (u32)ALIGN_UP((u64)sz, 8u);
+ }
+ /* LC_FUNCTION_STARTS / LC_DATA_IN_CODE / LC_CODE_SIGNATURE */
+ sizeofcmds += 16u + 16u + 16u;
+
+ x->headers_size = MACHO_HDR64_SIZE + sizeofcmds;
+
+ /* Now place sections in __TEXT, __DATA_CONST, __DATA. */
+ u64 vaddr = MZ_PAGEZERO + x->headers_size;
+ u64 fileoff = x->headers_size;
+ /* Pad __TEXT sections to natural alignment. */
+ for (u32 i = 0; i < x->nsegs; ++i) {
+ if (i == 0 || i == 4) continue;
+ MSeg* sg = &x->segs[i];
+ if (i > 1) {
+ /* page-align the start of __DATA_CONST and __DATA */
+ vaddr = ALIGN_UP(vaddr, MZ_PAGE);
+ fileoff = ALIGN_UP(fileoff, MZ_PAGE);
+ }
+ sg->vmaddr = (i == 1) ? MZ_PAGEZERO : vaddr;
+ sg->fileoff = (i == 1) ? 0 : fileoff;
+ /* __TEXT carries the headers_size + sections. */
+ u64 seg_start_v = sg->vmaddr;
+ u64 seg_start_f = sg->fileoff;
+ /* For __TEXT, sections begin after the header area. */
+ u64 cur_v = (i == 1) ? (seg_start_v + x->headers_size) : seg_start_v;
+ u64 cur_f = (i == 1) ? (seg_start_f + x->headers_size) : seg_start_f;
+ u64 first_zerofill_v = 0;
+ int seen_zerofill = 0;
+ /* Non-zerofill first */
+ for (u32 j = 0; j < sg->nsects; ++j) {
+ MSec* m = &x->secs[sg->first_sec + j];
+ if (m->is_zerofill) continue;
+ cur_v = ALIGN_UP(cur_v, (u64)m->align);
+ cur_f = ALIGN_UP(cur_f, (u64)m->align);
+ m->vaddr = cur_v;
+ m->file_offset = cur_f;
+ cur_v += m->size;
+ cur_f += m->size;
+ }
+ first_zerofill_v = cur_v;
+ /* zerofill last (no file bytes) */
+ for (u32 j = 0; j < sg->nsects; ++j) {
+ MSec* m = &x->secs[sg->first_sec + j];
+ if (!m->is_zerofill) continue;
+ cur_v = ALIGN_UP(cur_v, (u64)m->align);
+ m->vaddr = cur_v;
+ m->file_offset = 0;
+ cur_v += m->size;
+ seen_zerofill = 1;
+ }
+ sg->filesize = (i == 1)
+ ? (cur_f - seg_start_f)
+ : (first_zerofill_v ? (first_zerofill_v - seg_start_v)
+ : (cur_v - seg_start_v));
+ sg->vmsize = ALIGN_UP(cur_v - seg_start_v, MZ_PAGE);
+ if (sg->vmsize == 0 && sg->nsects > 0) sg->vmsize = MZ_PAGE;
+ if (i == 1) {
+ x->stubs_vaddr = 0;
+ for (u32 j = 0; j < sg->nsects; ++j) {
+ MSec* m = &x->secs[sg->first_sec + j];
+ if (slice_eq_cstr(slice_from_cstr(m->sectname), "__stubs"))
+ x->stubs_vaddr = m->vaddr;
+ }
+ x->text_filesz = sg->filesize;
+ }
+ if (i == 2) {
+ for (u32 j = 0; j < sg->nsects; ++j) {
+ MSec* m = &x->secs[sg->first_sec + j];
+ if (slice_eq_cstr(slice_from_cstr(m->sectname), "__got"))
+ x->got_vaddr = m->vaddr;
+ }
+ x->data_const_vaddr = sg->vmaddr;
+ x->data_const_filesz = sg->filesize;
+ }
+ if (i == 3) {
+ for (u32 j = 0; j < sg->nsects; ++j) {
+ MSec* m = &x->secs[sg->first_sec + j];
+ if (slice_eq_cstr(slice_from_cstr(m->sectname), "__thread_ptrs"))
+ x->tlv_ptrs_vaddr = m->vaddr;
+ /* TLS storage image base: min vaddr across __thread_data and
+ * __thread_bss sections. __thread_vars is excluded — it holds
+ * the descriptors, not the data that maps into the per-thread
+ * block. */
+ if ((slice_eq_cstr(slice_from_cstr(m->sectname), "__thread_data") ||
+ slice_eq_cstr(slice_from_cstr(m->sectname), "__thread_bss")) &&
+ (!x->has_tls_image || m->vaddr < x->tls_image_vaddr)) {
+ x->tls_image_vaddr = m->vaddr;
+ x->has_tls_image = 1;
+ }
+ }
+ x->data_vaddr = sg->vmaddr;
+ x->data_filesz = sg->filesize;
+ x->data_memsz = sg->vmsize;
+ }
+ vaddr = sg->vmaddr + sg->vmsize;
+ /* Mach-O segments are mapped in page units. If a segment's memory
+ * image extends past its initialized file bytes (for example
+ * __DATA,__bss), the following segment's fileoff must not reuse those
+ * pages or the kernel can map later file contents into the zero-fill
+ * tail. */
+ fileoff = sg->fileoff + ((sg->vmsize > ALIGN_UP(sg->filesize, MZ_PAGE))
+ ? sg->vmsize
+ : sg->filesize);
+ (void)seen_zerofill;
+ }
+ /* LINKEDIT placeholder; size is filled after blob assembly. */
+ vaddr = ALIGN_UP(vaddr, MZ_PAGE);
+ fileoff = ALIGN_UP(fileoff, MZ_PAGE);
+ x->segs[4].vmaddr = vaddr;
+ x->segs[4].fileoff = fileoff;
+ x->linkedit_vaddr = vaddr;
+ x->linkedit_fileoff = fileoff;
+
+ /* Encode __stubs bytes now that vaddrs are settled. Internal-GOT
+ * entries have stub_idx=0 (direct CALL26, no stub) and must be
+ * skipped so the (stub_idx - 1u) arithmetic doesn't wrap. */
+ for (u32 i = 0; i < x->nimports; ++i) {
+ MachImp* mi = &x->imports[i];
+ if (!mi->is_func || !mi->stub_idx) continue;
+ u64 stub_v = x->stubs_vaddr + (mi->stub_idx - 1u) * x->macho->stub_size;
+ u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE;
+ x->macho->emit_stub(
+ x->stubs_bytes + (mi->stub_idx - 1u) * x->macho->stub_size, stub_v,
+ got_v);
+ }
+
+ /* Phase B: build OutSec[] now that all MSec vaddrs are final. Walk
+ * MSecs sorted by (segidx, vaddr) and coalesce adjacent same-name
+ * runs. Mirrors link_elf.c's OutShdr build at link_elf.c:879. */
+ {
+ u32* order =
+ (u32*)h->alloc(h, sizeof(u32) * (x->nsecs + 1u), _Alignof(u32));
+ if (!order && x->nsecs)
+ compiler_panic(x->c, no_loc(), "link_macho: oom on outsec sort");
+ for (u32 i = 0; i < x->nsecs; ++i) order[i] = i;
+ /* Insertion sort — section count is small. */
+ for (u32 i = 1; i < x->nsecs; ++i) {
+ u32 cur = order[i];
+ MSec* a = &x->secs[cur];
+ u32 j = i;
+ while (j > 0) {
+ MSec* b = &x->secs[order[j - 1]];
+ if ((b->segidx < a->segidx) ||
+ (b->segidx == a->segidx && b->vaddr <= a->vaddr))
+ break;
+ order[j] = order[j - 1];
+ --j;
+ }
+ order[j] = cur;
+ }
+ u32 cap = x->nsecs + 1u;
+ x->outs = (OutSec*)h->alloc(h, sizeof(OutSec) * cap, _Alignof(OutSec));
+ if (!x->outs) compiler_panic(x->c, no_loc(), "link_macho: oom on OutSec");
+ memset(x->outs, 0, sizeof(OutSec) * cap);
+ x->nouts = 0;
+ for (u32 i = 0; i < x->nsecs; ++i) {
+ MSec* m = &x->secs[order[i]];
+ OutSec* tail = x->nouts ? &x->outs[x->nouts - 1] : NULL;
+ int merge = tail && tail->segidx == m->segidx &&
+ slice_eq_cstr(slice_from_cstr(tail->sectname), m->sectname) &&
+ slice_eq_cstr(slice_from_cstr(tail->segname), m->segname);
+ if (merge) {
+ if (tail->flags != m->flags || tail->is_zerofill != m->is_zerofill)
+ compiler_panic(
+ x->c, no_loc(),
+ "link_macho: coalesce mismatch on %.*s,%.*s (flags/zerofill)",
+ SLICE_ARG(slice_from_cstr(m->segname)),
+ SLICE_ARG(slice_from_cstr(m->sectname)));
+ u64 end = m->vaddr + m->size;
+ u64 prev_end = tail->vaddr + tail->size;
+ if (end > prev_end) tail->size = end - tail->vaddr;
+ if (m->align > tail->align) tail->align = m->align;
+ } else {
+ OutSec* o = &x->outs[x->nouts++];
+ o->segname = m->segname;
+ o->sectname = m->sectname;
+ o->vaddr = m->vaddr;
+ o->file_offset = m->file_offset;
+ o->size = m->size;
+ o->align = m->align;
+ o->flags = m->flags;
+ o->reserved1 = m->reserved1;
+ o->reserved2 = m->reserved2;
+ o->segidx = m->segidx;
+ o->is_zerofill = m->is_zerofill;
+ }
+ }
+ h->free(h, order, sizeof(u32) * (x->nsecs + 1u));
+ /* Recompute per-segment OutSec span; Phase A's count was for
+ * sizeofcmds sizing — recompute it here as the source of truth and
+ * assert agreement. */
+ for (u32 i = 0; i < x->nsegs; ++i) {
+ x->segs[i].first_out = 0;
+ }
+ u32 prev_nouts[5];
+ for (u32 i = 0; i < x->nsegs; ++i) prev_nouts[i] = x->segs[i].nouts;
+ for (u32 i = 0; i < x->nsegs; ++i) x->segs[i].nouts = 0;
+ for (u32 i = 0; i < x->nouts; ++i) {
+ u8 sx = x->outs[i].segidx;
+ if (x->segs[sx].nouts == 0) x->segs[sx].first_out = i;
+ ++x->segs[sx].nouts;
+ }
+ for (u32 i = 0; i < x->nsegs; ++i) {
+ if (prev_nouts[i] != x->segs[i].nouts)
+ compiler_panic(x->c, no_loc(),
+ "link_macho: OutSec count drift seg %u (%u vs %u)",
+ (u32)i, prev_nouts[i], x->segs[i].nouts);
+ }
+ }
+}
+
+/* ---- pass: shift LinkImage into final vaddrs/file_offsets ----
+ *
+ * The sections in img->sections are still in their original
+ * link_layout coordinates. Map each LinkSection -> its MSec and copy
+ * the final vaddr/file_offset so reloc-apply walks correctly. */
+
+static void shift_sections(MCtx* x) {
+ LinkImage* img = x->img;
+ /* Build a quick lookup: link_sec_id -> MSec*. */
+ for (u32 i = 0; i < x->nsecs; ++i) {
+ MSec* m = &x->secs[i];
+ if (!m->link_sec_id) continue;
+ /* Walk link_section_id slot. */
+ LinkSection* ls = &img->sections[m->link_sec_id - 1u];
+ /* shift relocs whose write_vaddr/file_offset live within this
+ * section's original [old_vaddr, old_vaddr+size). */
+ u64 old_v = ls->vaddr;
+ u64 old_f = ls->file_offset;
+ u64 new_v = m->vaddr;
+ u64 new_f = m->file_offset;
+ if (old_v == new_v && old_f == new_f) continue;
+ /* Update the LinkSection itself. */
+ ls->vaddr = new_v;
+ ls->file_offset = new_f;
+ /* Update relocs that target this section. */
+ for (u32 ri = 0; ri < LinkRelocs_count(&img->relocs); ++ri) {
+ LinkRelocApply* r = LinkRelocs_at(&img->relocs, ri);
+ if (r->link_section_id != ls->id) continue;
+ r->write_vaddr = new_v + (r->write_vaddr - old_v);
+ r->write_file_offset = new_f + (r->write_file_offset - old_f);
+ }
+ /* Update LinkSyms that belong to this LinkSection. Match by
+ * section_id rather than vaddr range — multiple input sections
+ * may share the same pre-shift vaddr (each bucket in
+ * link_layout starts at offset 0). */
+ for (u32 si = 0; si < LinkSyms_count(&img->syms); ++si) {
+ LinkSymbol* s = LinkSyms_at(&img->syms, si);
+ if (!s->defined) continue;
+ if (s->kind == SK_ABS) continue;
+ if (s->section_id != ls->id) continue;
+ s->vaddr = new_v + (s->vaddr - old_v);
+ }
+ }
+}
+
+/* ---- pass: apply relocations + collect chained-fixup sites ----
+ *
+ * Reloc dispatch:
+ * target=imported func + CALL26/JUMP26 -> S = stub vaddr
+ * target=import + GOT_LOAD_PAGE21/PAGEOFF12 -> S = got slot vaddr
+ * target=import + ABS64 -> write 0; collect bind site
+ * target=internal + ABS64 -> write target VA; collect rebase site
+ * everything else -> standard apply
+ *
+ * Patch sites for chained fixups are 8-byte slots; for ABS32 we do not
+ * support fixups (no chained-fixup format for 32-bit pointers in
+ * standard arm64 — would need DYLD_CHAINED_PTR_32). Internal R_ABS32
+ * still works (no slide adjustment is wrong technically, but for
+ * compile-time-known offsets it suffices).
+ */
+
+typedef struct FixSite {
+ u8 segidx; /* 2 = __DATA_CONST, 3 = __DATA */
+ u8 is_bind; /* 0 = rebase, 1 = bind */
+ u8 pad[2];
+ u32 import_idx; /* 1-based import index for binds, 0 for rebases */
+ u64 vaddr; /* absolute VA of the slot */
+ u64 rebase_target; /* unslid target VA; only used for rebases */
+} FixSite;
+
+typedef struct FixList {
+ Heap* heap;
+ FixSite* a;
+ u32 n;
+ u32 cap;
+} FixList;
+
+static void fix_init(FixList* fl, Heap* h) {
+ fl->heap = h;
+ fl->a = NULL;
+ fl->n = 0;
+ fl->cap = 0;
+}
+static void fix_fini(FixList* fl) {
+ if (fl->a) fl->heap->free(fl->heap, fl->a, sizeof(*fl->a) * fl->cap);
+ fl->a = NULL;
+ fl->n = fl->cap = 0;
+}
+static void fix_push(FixList* fl, const FixSite* s) {
+ if (VEC_GROW(fl->heap, fl->a, fl->cap, fl->n + 1u)) return;
+ fl->a[fl->n++] = *s;
+}
+
+/* find MSec covering an absolute vaddr */
+static MSec* msec_for_vaddr(MCtx* x, u64 v) {
+ for (u32 i = 0; i < x->nsecs; ++i) {
+ MSec* m = &x->secs[i];
+ if (v >= m->vaddr && v < m->vaddr + m->size) return m;
+ }
+ return NULL;
+}
+
+static u8* bytes_for_section(MCtx* x, MSec* m, LinkImage* img) {
+ if (m->synth_data) {
+ /* Synthetic — caller reads/writes via x->stubs_bytes / x->got_bytes. */
+ if (m->synth_data == x->stubs_bytes) return x->stubs_bytes;
+ if (m->synth_data == x->got_bytes) return x->got_bytes;
+ return NULL;
+ }
+ /* Backed by a LinkSection: find the LinkSegment buffer that section
+ * sits in (link_layout.c stored input section bytes there). */
+ LinkSection* ls = &img->sections[m->link_sec_id - 1u];
+ u32 segid = ls->segment_id;
+ if (segid == LINK_SEG_NONE) return NULL;
+ return img->segment_bytes[segid - 1u];
+}
+
+/* Map the LinkSection that backs a write_vaddr to an MSec, then to the
+ * underlying byte buffer. */
+static u8* patch_ptr(MCtx* x, LinkImage* img, const LinkRelocApply* r,
+ MSec** out_msec) {
+ /* Look up via the LinkSection. After shift_sections the section
+ * vaddr is the Mach-O vaddr; the corresponding MSec backs it. */
+ if (r->link_section_id == LINK_SEC_NONE) return NULL;
+ LinkSection* ls = &img->sections[r->link_section_id - 1u];
+ /* Find the MSec by link_sec_id. */
+ MSec* m = NULL;
+ for (u32 i = 0; i < x->nsecs; ++i) {
+ if (x->secs[i].link_sec_id == ls->id) {
+ m = &x->secs[i];
+ break;
+ }
+ }
+ if (!m) return NULL;
+ /* The LinkSegment's bytes are valid (not shifted), but the offset
+ * within them is the original input_offset. Use input_offset for
+ * the byte offset, since the LinkSegment buffer wasn't reshuffled. */
+ /* link_layout.c set ls->file_offset = seg.file_offset + input_offset
+ * originally. ls->vaddr similarly. After our shift, they're new.
+ * The byte offset within the segment buffer is still input_offset. */
+ u8* base = bytes_for_section(x, m, img);
+ if (!base) return NULL;
+ u32 within_section = (u32)(r->write_vaddr - m->vaddr);
+ /* The segment buffer's first byte corresponds to ls->input_offset==0
+ * for the FIRST section in the segment. But that's a complication.
+ * For simplicity we recompute the segment-relative byte offset by
+ * (file_offset - segment.file_offset) where segment.file_offset is
+ * unchanged. Wait: the original layout produced `ls->file_offset =
+ * seg.file_offset + input_offset`, and we may have changed
+ * ls->file_offset. Let's just use input_offset stored on the
+ * LinkSection. */
+ u32 in_off = (u32)(ls->input_offset + within_section);
+ if (out_msec) *out_msec = m;
+ return base + in_off;
+}
+
+/* Symbol-relative resolved-address S, accounting for imports. */
+static int sym_S(MCtx* x, LinkImage* img, LinkSymId id, u64* out_S,
+ int* out_imp_idx) {
+ *out_S = 0;
+ *out_imp_idx = 0;
+ if (id == LINK_SYM_NONE) return 0;
+ LinkSymbol* s = sym_at(img, id);
+ if (!s) return 0;
+ /* Look up the import index — real imports plus internal-GOT entries
+ * the collect_imports pass materialized for GOT-routed internal refs. */
+ u32 idx = 0;
+ if (id < x->sym_to_imp_size) idx = x->sym_to_imp[id];
+ if (!idx && s->name != 0) {
+ LinkSymId canon = symhash_get(&img->globals, s->name);
+ if (canon != LINK_SYM_NONE && canon < x->sym_to_imp_size)
+ idx = x->sym_to_imp[canon];
+ }
+ if (s->imported) {
+ *out_imp_idx = (int)idx;
+ return 1;
+ }
+ /* Internal symbol that has a GOT slot — surface the import index so
+ * the GOT_LOAD reloc paths in apply_relocs find it, but also expose
+ * S=vaddr so non-GOT relocs (CALL26 etc.) still apply directly. */
+ *out_imp_idx = (int)idx;
+ *out_S = s->vaddr;
+ return 0;
+}
+
+static void apply_relocs(MCtx* x, FixList* fl) {
+ LinkImage* img = x->img;
+ for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) {
+ LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
+ if (r->target == LINK_SYM_NONE) continue;
+ MSec* msec = NULL;
+ u8* P_bytes = patch_ptr(x, img, r, &msec);
+ if (!P_bytes) continue;
+ u64 P = r->write_vaddr;
+
+ u64 S;
+ int imp_idx;
+ int is_imp = sym_S(x, img, r->target, &S, &imp_idx);
+
+ /* TLVP relocs route through a __thread_ptrs slot regardless of
+ * whether the descriptor target is in-image or imported. Resolved
+ * before the import / internal split because an imported TLV
+ * descriptor doesn't use the __got slot (its address lives in
+ * __thread_ptrs with its own chained bind). */
+ if (x->link_arch->is_tlvp_reloc && x->link_arch->is_tlvp_reloc(r->kind)) {
+ u32 tlv_idx =
+ (r->target < x->sym_to_tlv_size) ? x->sym_to_tlv[r->target] : 0u;
+ if (!tlv_idx)
+ compiler_panic(x->c, no_loc(),
+ "link_macho: TLVP reloc has no __thread_ptrs slot");
+ u64 slot_v = x->tlv_ptrs_vaddr + (tlv_idx - 1u) * MZ_TLVP_SIZE;
+ link_reloc_apply(x->c, r->kind, P_bytes, slot_v, r->addend, P);
+ continue;
+ }
+
+ if (is_imp) {
+ MachImp* mi = (imp_idx > 0) ? &x->imports[imp_idx - 1] : NULL;
+ if (x->link_arch->is_branch_reloc &&
+ x->link_arch->is_branch_reloc(r->kind)) {
+ if (!mi || !mi->stub_idx)
+ compiler_panic(x->c, no_loc(),
+ "link_macho: import has no stub for branch");
+ u64 stub_v = x->stubs_vaddr + (mi->stub_idx - 1u) * x->macho->stub_size;
+ link_reloc_apply(x->c, r->kind, P_bytes, stub_v, r->addend, P);
+ continue;
+ }
+ if (x->link_arch->is_got_load_reloc &&
+ x->link_arch->is_got_load_reloc(r->kind)) {
+ if (!mi)
+ compiler_panic(x->c, no_loc(),
+ "link_macho: GOT reloc for unknown import");
+ u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE;
+ link_reloc_apply(x->c, r->kind, P_bytes, got_v, r->addend, P);
+ continue;
+ }
+ if (x->link_arch->is_direct_page_reloc &&
+ x->link_arch->is_direct_page_reloc(r->kind)) {
+ /* Direct page/lo12 against an import: route through __got. */
+ if (!mi)
+ compiler_panic(x->c, no_loc(),
+ "link_macho: PAGE/LO12 against unknown import");
+ u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE;
+ link_reloc_apply(x->c, r->kind, P_bytes, got_v, r->addend, P);
+ continue;
+ }
+ if (r->kind == R_ABS64) {
+ /* Direct 8-byte absolute against an import: bind the slot. */
+ wr_u64_le(P_bytes, 0);
+ FixSite fs = {(u8)msec->segidx, 1, {0}, (u32)imp_idx, P, 0};
+ fix_push(fl, &fs);
+ continue;
+ }
+ compiler_panic(x->c, no_loc(),
+ "link_macho: unhandled reloc kind %u against imported "
+ "symbol",
+ (u32)r->kind);
+ }
+
+ /* Internal relocs. */
+ if (r->kind == R_ABS64) {
+ /* Special case: ABS64 reloc inside a TLV descriptor record
+ * (__thread_vars section) targeting in-image TLS storage. This
+ * is the descriptor's word-2 "offset" field — dyld interprets it
+ * as the per-thread offset of the storage within the TLS image,
+ * NOT as an absolute address. Apple's ld writes the literal
+ * offset and emits no chained-fixup entry; replicate that so the
+ * chain skips over this slot (chained_fixups already does the
+ * right thing: no fixsite -> no chain link). */
+ if (msec && (msec->flags & SECTION_TYPE) == S_THREAD_LOCAL_VARIABLES &&
+ x->has_tls_image) {
+ u64 offset = (S + (u64)r->addend) - x->tls_image_vaddr;
+ wr_u64_le(P_bytes, offset);
+ continue;
+ }
+ /* Rebase site. */
+ wr_u64_le(P_bytes, S + (u64)r->addend);
+ FixSite fs = {(u8)msec->segidx, 0, {0}, 0, P, S + (u64)r->addend};
+ fix_push(fl, &fs);
+ continue;
+ }
+ /* Internal symbol routed through __got (clang emits GOT_LOAD_PAGE21
+ * for any extern global, even if the def is in-image). imp_idx
+ * was populated by collect_imports' internal-GOT pass; redirect
+ * the page/lo12 reloc to the GOT slot's vaddr. */
+ if (imp_idx > 0 && x->link_arch->is_got_load_reloc &&
+ x->link_arch->is_got_load_reloc(r->kind)) {
+ MachImp* mi = &x->imports[imp_idx - 1];
+ u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE;
+ link_reloc_apply(x->c, r->kind, P_bytes, got_v, r->addend, P);
+ continue;
+ }
+ /* Generic apply. */
+ link_reloc_apply(x->c, r->kind, P_bytes, S, r->addend, P);
+ }
+
+ /* Per-slot chained fixup. Real imports → bind (dyld resolves at
+ * load). Internal GOT entries → rebase pointing at the symbol's
+ * image-relative vaddr; a target vaddr of 0 (weak undef → NULL) gets
+ * no fixup, just a literal zero slot — chained fixups treat 0 as a
+ * gap and won't disturb it. */
+ for (u32 i = 0; i < x->nimports; ++i) {
+ MachImp* mi = &x->imports[i];
+ u64 slot_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE;
+ if (mi->internal) {
+ /* Re-read the symbol's final vaddr now that shift_sections has
+ * rebased every defined symbol into the Mach-O image layout
+ * (collect_imports snapshotted too early). */
+ LinkSymbol* s = sym_at(img, mi->sym);
+ u64 tgt_v = s ? s->vaddr : 0;
+ u8* slot = x->got_bytes + (mi->got_idx - 1u) * MZ_GOT_SIZE;
+ wr_u64_le(slot, tgt_v);
+ if (tgt_v == 0) continue; /* weak-undef → NULL */
+ FixSite fs = {2u, 0, {0}, 0, slot_v, tgt_v};
+ fix_push(fl, &fs);
+ } else {
+ /* clear slot bytes (already zero) — dyld writes via chain */
+ FixSite fs = {2u, 1, {0}, i + 1u, slot_v, 0};
+ fix_push(fl, &fs);
+ }
+ }
+
+ /* Per-slot TLV pointer fixups. Mirror of the __got loop above: each
+ * __thread_ptrs slot points at the descriptor record. When the
+ * descriptor is in-image (internal) we REBASE to its final vaddr; when
+ * it lives in a dylib we BIND through the descriptor's MachImp. The
+ * slot itself lives in __DATA (segidx=3), distinct from __got's
+ * __DATA_CONST (segidx=2). */
+ for (u32 i = 0; i < x->ntlv; ++i) {
+ MachTlv* ts = &x->tlv_slots[i];
+ u64 slot_v = x->tlv_ptrs_vaddr + (ts->tlv_idx - 1u) * MZ_TLVP_SIZE;
+ u8* slot = x->tlv_ptrs_bytes + (ts->tlv_idx - 1u) * MZ_TLVP_SIZE;
+ if (ts->imported) {
+ if (!ts->import_idx)
+ compiler_panic(x->c, no_loc(),
+ "link_macho: imported TLV without matching import slot");
+ wr_u64_le(slot, 0);
+ FixSite fs = {3u, 1, {0}, ts->import_idx, slot_v, 0};
+ fix_push(fl, &fs);
+ } else {
+ LinkSymbol* s = sym_at(img, ts->sym);
+ u64 tgt_v = s ? s->vaddr : 0;
+ wr_u64_le(slot, tgt_v);
+ if (tgt_v == 0) continue; /* weak-undef descriptor → NULL */
+ FixSite fs = {3u, 0, {0}, 0, slot_v, tgt_v};
+ fix_push(fl, &fs);
+ }
+ }
+}
+
+/* ---- chained fixups blob assembler ----
+ *
+ * For each segment that has fixups, build a dyld_chained_starts_in_segment
+ * with one chain per page (MZ_PAGE). Within a page, sort sites by
+ * offset, encode each as DYLD_CHAINED_PTR_64, and link via the `next`
+ * field (4-byte units, 0 = end of chain).
+ */
+
+typedef struct PageChain {
+ u32 first_offset_in_page; /* relative to page start */
+ u32 nsites;
+ u32 first_site_idx; /* into a per-segment site array */
+} PageChain;
+
+static int site_cmp_by_vaddr(const void* a, const void* b) {
+ const FixSite* x = a;
+ const FixSite* y = b;
+ if (x->vaddr < y->vaddr) return -1;
+ if (x->vaddr > y->vaddr) return 1;
+ return 0;
+}
+
+/* tiny insertion sort to avoid pulling qsort */
+static void sort_sites(FixSite* a, u32 n) {
+ for (u32 i = 1; i < n; ++i) {
+ FixSite tmp = a[i];
+ u32 j = i;
+ while (j > 0 && site_cmp_by_vaddr(&a[j - 1], &tmp) > 0) {
+ a[j] = a[j - 1];
+ --j;
+ }
+ a[j] = tmp;
+ }
+}
+
+static void emit_pointer(u8* slot, int is_bind, u32 ord_or_target_lo,
+ u32 high_or_target_hi, u32 next4) {
+ /* DYLD_CHAINED_PTR_64:
+ * bind : ordinal:24, addend:8, reserved:19, next:12, bind:1=1
+ * rebase: target:36 (vmaddr), high8:8, reserved:7, next:12, bind:1=0
+ */
+ u64 v = 0;
+ if (is_bind) {
+ u64 ordinal = (u64)ord_or_target_lo & 0xffffffull; /* 24 bits */
+ u64 addend = 0;
+ u64 next = (u64)next4 & 0xfffull;
+ v = ordinal | (addend << 24) | (0ull /* reserved */ << 32) | (next << 51) |
+ ((u64)1 << 63);
+ } else {
+ /* rebase: target is full vmaddr; we get hi:lo split. */
+ u64 target = ((u64)high_or_target_hi << 32) | (u64)ord_or_target_lo;
+ target &= ((u64)1 << 36) - 1u; /* 36 bits */
+ u64 high8 = 0;
+ u64 next = (u64)next4 & 0xfffull;
+ v = target | (high8 << 36) | (0ull /* reserved */ << 44) | (next << 51) |
+ ((u64)0 << 63);
+ }
+ wr_u64_le(slot, v);
+}
+
+static void build_chained_fixups(MCtx* x, FixList* fl) {
+ Heap* h = x->h;
+ MByte* out = &x->chained_fixups;
+ mbuf_init(out, h);
+
+ /* Header (32 B):
+ * uint32 fixups_version (=0)
+ * uint32 starts_offset
+ * uint32 imports_offset
+ * uint32 symbols_offset
+ * uint32 imports_count
+ * uint32 imports_format (=1)
+ * uint32 symbols_format (=0)
+ */
+ u32 hdr_pos = mbuf_u32(out, 0); /* fixups_version */
+ (void)hdr_pos;
+ u32 starts_offset_pos = mbuf_u32(out, 0);
+ u32 imports_offset_pos = mbuf_u32(out, 0);
+ u32 symbols_offset_pos = mbuf_u32(out, 0);
+ mbuf_u32(out, x->nimports_real);
+ mbuf_u32(out, DYLD_CHAINED_IMPORT);
+ mbuf_u32(out, 0); /* symbols uncompressed */
+ /* dyld expects 8-byte alignment of the starts table. */
+ mbuf_align(out, 4);
+
+ /* dyld_chained_starts_in_image:
+ * uint32 seg_count
+ * uint32 seg_info_offset[seg_count]
+ *
+ * seg_count must equal mach-O segment count (5).
+ * seg_info_offset[i] = 0 means no fixups in that segment.
+ */
+ u32 starts_off = out->len;
+ wr_u32_le(out->data + starts_offset_pos, starts_off);
+ mbuf_u32(out, x->nsegs);
+ /* Reserve seg_info_offset[]. */
+ u32 seg_info_offsets_pos = out->len;
+ for (u32 i = 0; i < x->nsegs; ++i) mbuf_u32(out, 0);
+
+ /* Sort fixsites by vaddr globally. */
+ sort_sites(fl->a, fl->n);
+
+ /* Per segment, emit dyld_chained_starts_in_segment when fixups present. */
+ for (u32 si = 0; si < x->nsegs; ++si) {
+ /* count sites in this segment */
+ u32 first = (u32)-1, count = 0;
+ for (u32 k = 0; k < fl->n; ++k) {
+ if (fl->a[k].segidx == si) {
+ if (first == (u32)-1) first = k;
+ ++count;
+ }
+ }
+ if (!count) continue;
+ /* Page-align this struct to 4. */
+ mbuf_align(out, 4);
+ u32 sis_off = out->len;
+ /* Patch seg_info_offset[si] to (sis_off - starts_off). */
+ wr_u32_le(out->data + seg_info_offsets_pos + si * 4u, sis_off - starts_off);
+
+ /* Compute page count for this segment. */
+ u64 seg_va = x->segs[si].vmaddr;
+ u64 seg_size = x->segs[si].vmsize ? x->segs[si].vmsize : MZ_PAGE;
+ u32 page_count = (u32)((seg_size + MZ_PAGE - 1u) / MZ_PAGE);
+
+ /* dyld_chained_starts_in_segment:
+ * uint32 size
+ * uint16 page_size
+ * uint16 pointer_format
+ * uint64 segment_offset (offset of segment's first byte from
+ * mach_header)
+ * uint32 max_valid_pointer (0 for 64-bit)
+ * uint16 page_count
+ * uint16 page_start[page_count] (0xFFFF = no fixups in page)
+ */
+ u32 sis_size_pos = mbuf_u32(out, 0); /* fill below */
+ mbuf_u16(out, (u16)MZ_PAGE);
+ mbuf_u16(out, (u16)DYLD_CHAINED_PTR_64);
+ mbuf_u64(out, (u64)x->segs[si].fileoff); /* segment file offset */
+ mbuf_u32(out, 0);
+ mbuf_u16(out, (u16)page_count);
+ u32 page_starts_pos = out->len;
+ for (u32 p = 0; p < page_count; ++p) mbuf_u16(out, 0xFFFFu);
+ /* size includes the page_start array */
+ u32 sis_size = out->len - sis_size_pos + 4u;
+ /* Hmm, the `size` field is the size of *this* struct. We measure
+ * from sis_off through end of page_starts. */
+ sis_size = out->len - sis_off;
+ wr_u32_le(out->data + sis_size_pos, sis_size);
+
+ /* Now: walk sites in this segment, group by page, write
+ * page_start[i] = offset_in_page of first site, and chain via
+ * next-field in the actual segment's bytes. */
+ /* Sites are sorted globally; collect contiguous run for this seg. */
+ u32 cur = first;
+ while (cur < first + count) {
+ u32 page_idx = (u32)((fl->a[cur].vaddr - seg_va) / MZ_PAGE);
+ u32 offset_in_page = (u32)((fl->a[cur].vaddr - seg_va) % MZ_PAGE);
+ wr_u16_le(out->data + page_starts_pos + page_idx * 2u,
+ (u16)offset_in_page);
+ /* Walk this page's chain. */
+ u32 next_in_page = cur;
+ while (next_in_page + 1 < first + count) {
+ u64 nv = fl->a[next_in_page + 1].vaddr;
+ if (nv >= seg_va + (u64)(page_idx + 1) * MZ_PAGE) break;
+ ++next_in_page;
+ }
+ /* Encode chain pointers. */
+ for (u32 k = cur; k <= next_in_page; ++k) {
+ FixSite* s = &fl->a[k];
+ u32 next4 = 0;
+ if (k < next_in_page) {
+ u64 dist = fl->a[k + 1].vaddr - s->vaddr;
+ next4 = (u32)(dist / 4u);
+ }
+ /* Find segment bytes. Synthetic pointer sections have private
+ * buffers; file-backed sections can live in any segment, including
+ * pointer-bearing read-only constants in __TEXT. */
+ u8* slot = NULL;
+ if (s->segidx == 2 && x->got_bytes && s->vaddr >= x->got_vaddr &&
+ s->vaddr < x->got_vaddr + x->got_size) {
+ /* __DATA_CONST: __got slot. */
+ slot = x->got_bytes + (s->vaddr - x->got_vaddr);
+ } else if (x->tlv_ptrs_bytes && s->vaddr >= x->tlv_ptrs_vaddr &&
+ s->vaddr < x->tlv_ptrs_vaddr + x->tlv_ptrs_size) {
+ slot = x->tlv_ptrs_bytes + (s->vaddr - x->tlv_ptrs_vaddr);
+ } else {
+ MSec* m = msec_for_vaddr(x, s->vaddr);
+ if (m && m->link_sec_id) {
+ u8* base = bytes_for_section(x, m, x->img);
+ if (base) {
+ LinkSection* ls = &x->img->sections[m->link_sec_id - 1u];
+ u32 in_off = (u32)(ls->input_offset + (s->vaddr - m->vaddr));
+ slot = base + in_off;
+ }
+ }
+ }
+ if (!slot)
+ compiler_panic(x->c, no_loc(),
+ "link_macho: chained-fixup slot for vaddr 0x%llx not "
+ "in any segment buffer",
+ (unsigned long long)s->vaddr);
+ if (s->is_bind) {
+ /* ordinal is import index (1-based) - 1; chained-import format
+ * uses 0-based. */
+ if (s->import_idx == 0 || s->import_idx > x->nimports_real) {
+ compiler_panic(
+ x->c, no_loc(),
+ "link_macho: chained bind for vaddr 0x%llx uses import index "
+ "%u outside real import table size %u",
+ (unsigned long long)s->vaddr, (unsigned)s->import_idx,
+ (unsigned)x->nimports_real);
+ }
+ u32 ord = s->import_idx - 1u;
+ emit_pointer(slot, 1, ord, 0, next4);
+ } else {
+ /* rebase target = unslid vmaddr */
+ u32 lo = (u32)(s->rebase_target & 0xffffffffu);
+ u32 hi = (u32)(s->rebase_target >> 32);
+ emit_pointer(slot, 0, lo, hi, next4);
+ }
+ }
+ cur = next_in_page + 1u;
+ }
+ }
+
+ /* Imports table: one dyld_chained_import (4B) per real import.
+ * Layout: lib_ordinal:8, weak:1, name_offset:23. Internal-GOT
+ * entries are not bound by dyld so they're omitted here. */
+ mbuf_align(out, 4);
+ u32 imports_off = out->len;
+ wr_u32_le(out->data + imports_offset_pos, imports_off);
+ /* We need to first build the symbol pool to know name offsets. */
+ u32 symbols_off = imports_off + x->nimports_real * 4u;
+ /* Reserve imports area. */
+ for (u32 i = 0; i < x->nimports_real; ++i) mbuf_u32(out, 0);
+ /* Emit symbols (each NUL-terminated). Set name_offset on each import. */
+ wr_u32_le(out->data + symbols_offset_pos, out->len);
+ /* Leading NUL for offset 0. */
+ mbuf_u8(out, 0);
+ for (u32 i = 0; i < x->nimports_real; ++i) {
+ MachImp* mi = &x->imports[i];
+ Slice nm_s = pool_slice(x->c->global, mi->name);
+ const char* nm = nm_s.s;
+ size_t nl = nm_s.len;
+ if (!nm || !nl || mi->dylib_ord == 0 || mi->dylib_ord > x->ndylibs) {
+ compiler_panic(x->c, no_loc(),
+ "link_macho: invalid chained import %u "
+ "(name=%u dylib_ord=%u ndylibs=%u)",
+ (unsigned)i, (unsigned)mi->name, (unsigned)mi->dylib_ord,
+ (unsigned)x->ndylibs);
+ }
+ u32 off = out->len - symbols_off;
+ mbuf_str(out, nm, (u32)nl);
+ /* Patch the import slot. */
+ u32 packed = ((u32)mi->dylib_ord & 0xffu) |
+ ((u32)(mi->weak ? 1u : 0u) << 8) | ((off & 0x7fffffu) << 9);
+ wr_u32_le(out->data + imports_off + i * 4u, packed);
+ }
+ (void)symbols_off;
+}
+
+/* ---- exports trie ---- *
+ *
+ * Minimal trie: one node carrying a single export "_main" with the
+ * entry symbol's VA-relative offset. This is enough for dyld; binaries
+ * with a real exports trie include more data but we don't need it. */
+
+static void uleb128(MByte* out, u64 v) {
+ do {
+ u8 byte = v & 0x7fu;
+ v >>= 7;
+ if (v) byte |= 0x80u;
+ mbuf_u8(out, byte);
+ } while (v);
+}
+
+static void build_exports_trie(MCtx* x) {
+ /* Format:
+ * node = (terminal_size: uleb128) (export_data)? (children_count: u8)
+ * (children: [(label NUL) (offset uleb128)]*)
+ *
+ * We emit a trie with a single leaf at "_main" with offset
+ * entry_offset (from __TEXT base).
+ *
+ * Easiest: single root node with children_count=1, child label = "_main",
+ * child offset points to a leaf node.
+ */
+ MByte* out = &x->exports_trie;
+ mbuf_init(out, x->h);
+
+ LinkImage* img = x->img;
+ LinkSymbol* esym = sym_at(img, img->entry_sym);
+ if (!esym || !esym->defined) {
+ /* No entry — emit a single empty terminal trie. */
+ mbuf_u8(out, 0); /* terminal_size 0 */
+ mbuf_u8(out, 0); /* children 0 */
+ return;
+ }
+ Slice nm_s = pool_slice(x->c->global, esym->name);
+ const char* nm = nm_s.s;
+ size_t nl = nm_s.len;
+ if (!nm || nl == 0) {
+ mbuf_u8(out, 0);
+ mbuf_u8(out, 0);
+ return;
+ }
+ /* leaf node: terminal_size = sizeof(uleb(flags)+uleb(offset))
+ * flags = 0 (regular export); offset = vaddr - __TEXT.vmaddr */
+ u64 entry_off = esym->vaddr - x->text_vaddr;
+
+ /* Compute leaf-node bytes length: uleb(flags=0) + uleb(offset). */
+ u32 flags = 0;
+ u32 leaf_payload_len;
+ {
+ /* count uleb bytes for flags=0 -> 1 byte */
+ u32 a = 1;
+ /* count uleb bytes for entry_off */
+ u32 b = 0;
+ u64 v = entry_off;
+ do {
+ ++b;
+ v >>= 7;
+ } while (v);
+ leaf_payload_len = a + b;
+ }
+ /* Layout: root node first, then leaf. The root node's child entry
+ * carries the absolute offset of the leaf within the trie. */
+
+ /* root: terminal_size=0, children_count=1, "_main"\0, child_offset=
+ * (leaf-position uleb). */
+ /* We'll back-patch child_offset after we know the leaf position. */
+ mbuf_u8(out, 0); /* root terminal size */
+ mbuf_u8(out, 1); /* children_count */
+ mbuf_str(out, nm, (u32)nl);
+ /* child offset: 5 bytes max for uleb128(u32). Reserve and patch. */
+ u32 child_off_pos = out->len;
+ /* Reserve 5 bytes. */
+ for (u32 i = 0; i < 5; ++i) mbuf_u8(out, 0);
+ /* leaf node */
+ u32 leaf_pos = out->len;
+ /* terminal_size byte then payload */
+ mbuf_u8(out, (u8)leaf_payload_len);
+ uleb128(out, flags);
+ uleb128(out, entry_off);
+ mbuf_u8(out, 0); /* children_count */
+
+ /* Patch child_offset uleb. */
+ u32 v = leaf_pos;
+ for (u32 i = 0; i < 5; ++i) {
+ u8 b = (u8)(v & 0x7fu);
+ v >>= 7;
+ if (v) b |= 0x80u;
+ out->data[child_off_pos + i] = b;
+ if (!v && i < 4) {
+ /* Remaining bytes need to be 0x00 — but we already wrote zeros;
+ * we need a continuation-zero so the consumer sees 5 bytes. Set
+ * top bit on lower bytes to indicate continuation, last byte = 0. */
+ /* Actually: ULEB needs proper termination. Force final byte to
+ * 0 with no continuation by setting bit-7=0 on the last
+ * non-zero byte and also forcing remaining bytes to be 0x80
+ * extension or trim. Simpler: set last byte explicitly. */
+ out->data[child_off_pos + i] = (u8)(out->data[child_off_pos + i] & 0x7fu);
+ for (u32 j = i + 1; j < 5; ++j) out->data[child_off_pos + j] = 0x80;
+ out->data[child_off_pos + 4] = 0x00;
+ break;
+ }
+ }
+ /* Pad trie to 8 bytes. */
+ mbuf_align(out, 8);
+}
+
+/* ---- symtab + strtab + indirect symtab ---- */
+
+typedef struct NlistRec {
+ u32 strx;
+ u8 type;
+ u8 sect; /* 1-based section index (Mach-O) */
+ u16 desc;
+ u64 value;
+} NlistRec;
+
+static void build_symtab(MCtx* x) {
+ Heap* h = x->h;
+ LinkImage* img = x->img;
+ mbuf_init(&x->symtab, h);
+ mbuf_init(&x->strtab, h);
+ mbuf_init(&x->indirect, h);
+
+ /* strtab leading NUL */
+ mbuf_u8(&x->strtab, 0);
+
+ /* Approach:
+ * - Add one local nlist per defined LinkSymbol (locals + non-imported
+ * externs) — but to keep things simple we only emit external defined
+ * syms (mainly _main), plus all imports as N_UNDF|N_EXT.
+ *
+ * Mach-O dyld requires the symtab order: locals first, ext-defs next,
+ * undef last (matched by LC_DYSYMTAB ranges).
+ */
+
+ /* Pass A: defined externals. */
+ u32 n_local = 0;
+ u32 n_extdef = 0;
+ u32 n_undef = 0;
+
+ /* For now we emit only externals + imports. No locals. */
+ /* extdef pass */
+ for (u32 i = 0; i < LinkSyms_count(&img->syms); ++i) {
+ LinkSymbol* s = LinkSyms_at(&img->syms, i);
+ if (!s->defined) continue;
+ if (s->bind != SB_GLOBAL && s->bind != SB_WEAK) continue;
+ if (s->name == 0) continue;
+ if (s->kind == SK_ABS) continue; /* skip abs externs */
+ /* Locate which OutSec contains this vaddr to figure out n_sect.
+ * n_sect is the 1-based index into the flat section_64 table the
+ * file actually contains (post-coalesce), matching what we emit
+ * in emit_load_command_segment. */
+ u8 n_sect = 0;
+ for (u32 k = 0; k < x->nouts; ++k) {
+ OutSec* o = &x->outs[k];
+ if (s->vaddr >= o->vaddr && s->vaddr < o->vaddr + o->size) {
+ n_sect = (u8)(k + 1u);
+ break;
+ }
+ if (s->vaddr == o->vaddr + o->size) {
+ n_sect = (u8)(k + 1u);
+ break;
+ }
+ }
+ Slice nm_s = pool_slice(x->c->global, s->name);
+ const char* nm = nm_s.s;
+ size_t nl = nm_s.len;
+ u32 strx = x->strtab.len;
+ if (nm && nl) mbuf_str(&x->strtab, nm, (u32)nl);
+
+ u8 t[16];
+ u8 nt = N_SECT | N_EXT;
+ if (s->bind == SB_WEAK) {
+ /* N_WEAK_DEF in n_desc (not a flag in n_type) */
+ }
+ wr_u32_le(t + 0, strx);
+ t[4] = nt;
+ t[5] = n_sect;
+ wr_u16_le(t + 6, s->bind == SB_WEAK ? N_WEAK_DEF : 0);
+ wr_u64_le(t + 8, s->vaddr);
+ mbuf_append(&x->symtab, t, 16);
+ ++n_extdef;
+ }
+
+ /* undef imports — real imports only. Internal-GOT entries don't get
+ * N_UNDF nlist records since they're defined in the image. */
+ u32 imp_first_symtab_idx = n_extdef;
+ for (u32 i = 0; i < x->nimports_real; ++i) {
+ MachImp* mi = &x->imports[i];
+ Slice nm_s = pool_slice(x->c->global, mi->name);
+ const char* nm = nm_s.s;
+ size_t nl = nm_s.len;
+ u32 strx = x->strtab.len;
+ if (nm && nl) mbuf_str(&x->strtab, nm, (u32)nl);
+
+ u8 t[16];
+ wr_u32_le(t + 0, strx);
+ t[4] = N_UNDF | N_EXT;
+ t[5] = 0;
+ /* n_desc carries dylib ordinal in high byte (REFERENCED_DYNAMICALLY etc.)
+ */
+ u16 desc = (u16)(((u16)mi->dylib_ord & 0xff) << 8);
+ if (mi->weak) desc |= N_WEAK_REF;
+ wr_u16_le(t + 6, desc);
+ wr_u64_le(t + 8, 0);
+ mbuf_append(&x->symtab, t, 16);
+ ++n_undef;
+ }
+
+ /* indirect symtab: one entry per __stubs slot, then one per __got
+ * slot. Internal-GOT slots use INDIRECT_SYMBOL_LOCAL (0x80000000)
+ * since they have no nlist entry. */
+ u32 indirect_start = 0;
+ /* Patch reserved1 of each synth OutSec. __stubs and __got are each
+ * singleton OutSecs (synth sections never coalesce with user input),
+ * so a sectname match identifies them unambiguously. */
+ for (u32 i = 0; i < x->nouts; ++i) {
+ OutSec* o = &x->outs[i];
+ if (slice_eq_cstr(slice_from_cstr(o->sectname), "__stubs") && o->size) {
+ o->reserved1 = indirect_start;
+ for (u32 k = 0; k < x->nimports; ++k) {
+ MachImp* mi = &x->imports[k];
+ if (!mi->stub_idx) continue;
+ u32 sym_idx = imp_first_symtab_idx + k;
+ mbuf_u32(&x->indirect, sym_idx);
+ ++indirect_start;
+ }
+ }
+ }
+ for (u32 i = 0; i < x->nouts; ++i) {
+ OutSec* o = &x->outs[i];
+ if (slice_eq_cstr(slice_from_cstr(o->sectname), "__got") && o->size) {
+ o->reserved1 = indirect_start;
+ for (u32 k = 0; k < x->nimports; ++k) {
+ MachImp* mi = &x->imports[k];
+ u32 sym_idx = mi->internal ? 0x80000000u /* INDIRECT_SYMBOL_LOCAL */
+ : (imp_first_symtab_idx + k);
+ mbuf_u32(&x->indirect, sym_idx);
+ ++indirect_start;
+ }
+ }
+ }
+
+ x->nsyms = n_local + n_extdef + n_undef;
+ (void)n_local;
+ (void)imp_first_symtab_idx;
+}
+
+/* ---- LINKEDIT layout assembly ----
+ *
+ * Place blobs in the order Apple prefers:
+ * chained_fixups, exports_trie, fn_starts, data_in_code,
+ * symtab, indirect, strtab, codesig
+ */
+
+static void layout_linkedit(MCtx* x) {
+ /* fn_starts and data_in_code are both empty. */
+ mbuf_init(&x->fn_starts, x->h);
+ mbuf_init(&x->data_in_code, x->h);
+ mbuf_init(&x->codesig, x->h);
+
+ u64 cur = x->linkedit_fileoff;
+ /* chained fixups */
+ cur = ALIGN_UP(cur, 8u);
+ x->chained_fixups_off = (u32)cur;
+ cur += x->chained_fixups.len;
+ /* exports trie */
+ cur = ALIGN_UP(cur, 8u);
+ x->exports_trie_off = (u32)cur;
+ cur += x->exports_trie.len;
+ /* function starts (empty placeholder, but allocate one byte) */
+ cur = ALIGN_UP(cur, 8u);
+ x->fn_starts_off = (u32)cur;
+ /* data in code */
+ cur = ALIGN_UP(cur, 8u);
+ x->data_in_code_off = (u32)cur;
+ /* symtab */
+ cur = ALIGN_UP(cur, 8u);
+ x->symtab_off = (u32)cur;
+ cur += x->symtab.len;
+ /* indirect symtab */
+ cur = ALIGN_UP(cur, 4u);
+ x->indirect_off = (u32)cur;
+ cur += x->indirect.len;
+ /* strtab */
+ cur = ALIGN_UP(cur, 8u);
+ x->strtab_off = (u32)cur;
+ cur += x->strtab.len;
+ /* code signature: end-aligned to 16 */
+ cur = ALIGN_UP(cur, 16u);
+ x->codesig_off = (u32)cur;
+
+ /* Linkedit segment file_size includes everything up to (but not yet
+ * including) codesig. Codesig is computed below. */
+ u64 le_size = cur - x->linkedit_fileoff;
+ /* Set linkedit segment size; will be increased after codesig. */
+ x->segs[4].filesize = le_size;
+ x->segs[4].vmsize = ALIGN_UP(le_size, MZ_PAGE);
+ if (!x->segs[4].vmsize) x->segs[4].vmsize = MZ_PAGE;
+}
+
+/* ---- ad-hoc code signature (CodeDirectory + SuperBlob) ----
+ *
+ * Produces a minimal embedded SuperBlob with a single CodeDirectory.
+ * The CD is sha256-hashed over CS_PAGE_SIZE_LOG2 = 4096-byte pages of
+ * the file (excluding the codesig itself). The kernel verifies the
+ * CD's hash chain on exec.
+ *
+ * Output format (in big-endian for SuperBlob/CodeDirectory headers):
+ * [SuperBlob]
+ * u32 magic (0xfade0cc0)
+ * u32 length
+ * u32 count (=1)
+ * [Slot]
+ * u32 type (=0 CSSLOT_CODEDIRECTORY)
+ * u32 offset (=20) -- relative to start of SuperBlob
+ * [CodeDirectory]
+ * u32 magic (0xfade0c02)
+ * u32 length (bytes including all hashes)
+ * u32 version (>=0x20400 for execSeg fields)
+ * u32 flags (=0 ad-hoc — actually flags must include 0x2
+ * (kSecCodeSignatureAdhoc)) u32 hashOffset (offset of first slot hash) u32
+ * identOffset (offset of identifier string) u32 nSpecialSlots (=0) u32
+ * nCodeSlots u32 codeLimit (file bytes covered) u8 hashSize (=32) u8
+ * hashType (=2 sha256) u8 platform (=0) u8 pageSize (=12 for 4096) u32
+ * spare2 (=0) u32 scatterOffset (=0) u32 teamOffset (=0) u32 spare3 (=0)
+ * u64 codeLimit64 (=0)
+ * u64 execSegBase (=__TEXT.fileoff)
+ * u64 execSegLimit (=__TEXT.filesize)
+ * u64 execSegFlags (=1 main binary)
+ * [identifier bytes "a.out\0"]
+ * [codeslot hashes nCodeSlots * 32 B]
+ *
+ * Hashes computed AFTER everything else is final — including the codesig
+ * blob's own offset in the file (the hash range stops just before
+ * codeLimit). */
+
+static void wr_u64_be(u8* p, u64 v) {
+ for (u32 i = 0; i < 8; ++i) p[7 - i] = (u8)(v >> (i * 8));
+}
+
+/* Build the codesig blob with placeholder hashes; size is precise so
+ * file layout is final after this. */
+static void build_codesig_skeleton(MCtx* x, u32 code_limit, const char* ident) {
+ u32 code_page = 1u << CS_PAGE_SIZE_LOG2; /* 4096 */
+ u32 nslots = (code_limit + code_page - 1u) / code_page;
+
+ /* CodeDirectory size:
+ * header 88 bytes through execSegFlags
+ * identifier (ident_len + 1)
+ * hashes (nslots * 32)
+ */
+ u32 ident_len = (u32)slice_from_cstr(ident).len + 1u;
+ u32 cd_hdr = 88u;
+ u32 cd_size = cd_hdr + ident_len + nslots * CS_SHA256_LEN;
+ /* SuperBlob: 12 hdr + 8 slot + cd. */
+ u32 sb_size = 12u + 8u + cd_size;
+
+ MByte* out = &x->codesig;
+ mbuf_init(out, x->h);
+ mbuf_reserve(out, sb_size);
+ memset(out->data, 0, sb_size);
+ out->len = sb_size;
+
+ u8* sb = out->data;
+ /* SuperBlob header */
+ wr_u32_be(sb + 0, CS_MAGIC_EMBEDDED_SIGNATURE);
+ wr_u32_be(sb + 4, sb_size);
+ wr_u32_be(sb + 8, 1); /* count */
+ /* slot 0: type=CSSLOT_CODEDIRECTORY, offset=20 */
+ wr_u32_be(sb + 12, CSSLOT_CODEDIRECTORY);
+ wr_u32_be(sb + 16, 20u);
+
+ /* CodeDirectory */
+ u8* cd = sb + 20;
+ wr_u32_be(cd + 0, CS_MAGIC_CODEDIRECTORY);
+ wr_u32_be(cd + 4, cd_size);
+ wr_u32_be(cd + 8, 0x20400u); /* version with execSeg */
+ wr_u32_be(cd + 12, 0x2u); /* flags = adhoc */
+ wr_u32_be(cd + 16, cd_hdr + ident_len); /* hashOffset */
+ wr_u32_be(cd + 20, cd_hdr); /* identOffset */
+ wr_u32_be(cd + 24, 0); /* nSpecialSlots */
+ wr_u32_be(cd + 28, nslots);
+ wr_u32_be(cd + 32, code_limit);
+ cd[36] = (u8)CS_SHA256_LEN;
+ cd[37] = (u8)CS_HASHTYPE_SHA256;
+ cd[38] = 0; /* platform */
+ cd[39] = (u8)CS_PAGE_SIZE_LOG2;
+ wr_u32_be(cd + 40, 0); /* spare2 */
+ wr_u32_be(cd + 44, 0); /* scatterOffset */
+ wr_u32_be(cd + 48, 0); /* teamOffset */
+ wr_u32_be(cd + 52, 0); /* spare3 */
+ wr_u64_be(cd + 56, 0); /* codeLimit64 */
+ wr_u64_be(cd + 64, x->segs[1].fileoff); /* execSegBase */
+ wr_u64_be(cd + 72, x->segs[1].filesize); /* execSegLimit */
+ wr_u64_be(cd + 80, CS_EXECSEG_MAIN_BINARY);
+
+ /* identifier */
+ memcpy(cd + cd_hdr, ident, ident_len);
+
+ x->codesig_size = sb_size;
+}
+
+static void compute_codesig(MCtx* x, const u8* full_file, u32 file_len_excl_cs,
+ const char* ident) {
+ u32 code_page = 1u << CS_PAGE_SIZE_LOG2;
+ u32 nslots = (file_len_excl_cs + code_page - 1u) / code_page;
+ u32 ident_len = (u32)slice_from_cstr(ident).len + 1u;
+ u8* cd = x->codesig.data + 12 + 8;
+ u8* hashes = cd + 88u + ident_len;
+
+ for (u32 i = 0; i < nslots; ++i) {
+ u32 off = i * code_page;
+ u32 take = (off + code_page <= file_len_excl_cs) ? code_page
+ : (file_len_excl_cs - off);
+ Sha256 s;
+ sha256_init(&s);
+ sha256_update(&s, full_file + off, take);
+ /* Pages shorter than code_page get the standard SHA over the
+ * partial bytes — Apple's tools do exactly this (no zero padding
+ * on the tail). */
+ sha256_final(&s, hashes + i * CS_SHA256_LEN);
+ }
+}
+
+/* ---- final emission ---- */
+
+static void emit_load_command_segment(MByte* lc, MCtx* x, u32 segidx) {
+ MSeg* sg = &x->segs[segidx];
+ u32 seg_cmd_size = MACHO_SEGCMD64_SIZE + sg->nouts * MACHO_SECT64_SIZE;
+ u32 base = lc->len;
+ mbuf_u32(lc, LC_SEGMENT_64);
+ mbuf_u32(lc, seg_cmd_size);
+ /* segname: 16 bytes zero-padded */
+ u8 nm[16];
+ memset(nm, 0, 16);
+ size_t nlen = slice_from_cstr(sg->name).len;
+ if (nlen > 16) nlen = 16;
+ memcpy(nm, sg->name, nlen);
+ mbuf_append(lc, nm, 16);
+ mbuf_u64(lc, sg->vmaddr);
+ mbuf_u64(lc, sg->vmsize);
+ mbuf_u64(lc, sg->fileoff);
+ mbuf_u64(lc, sg->filesize);
+ mbuf_u32(lc, sg->maxprot);
+ mbuf_u32(lc, sg->initprot);
+ mbuf_u32(lc, sg->nouts);
+ mbuf_u32(lc, 0); /* flags */
+
+ for (u32 j = 0; j < sg->nouts; ++j) {
+ OutSec* o = &x->outs[sg->first_out + j];
+ u8 sname[16], gname[16];
+ memset(sname, 0, 16);
+ memset(gname, 0, 16);
+ size_t sl = o->sectname ? slice_from_cstr(o->sectname).len : 0;
+ if (sl > 16) sl = 16;
+ if (sl) memcpy(sname, o->sectname, sl);
+ size_t gl = slice_from_cstr(sg->name).len; /* segname must match */
+ if (gl > 16) gl = 16;
+ memcpy(gname, sg->name, gl);
+ mbuf_append(lc, sname, 16);
+ mbuf_append(lc, gname, 16);
+ mbuf_u64(lc, o->vaddr);
+ mbuf_u64(lc, o->size);
+ mbuf_u32(lc, (u32)o->file_offset);
+ /* align is power of 2; encode as log2. */
+ u32 a = o->align ? o->align : 1u;
+ u32 al = 0;
+ while ((1u << al) < a) ++al;
+ mbuf_u32(lc, al);
+ mbuf_u32(lc, 0); /* reloff */
+ mbuf_u32(lc, 0); /* nreloc */
+ mbuf_u32(lc, o->flags);
+ mbuf_u32(lc, o->reserved1);
+ mbuf_u32(lc, o->reserved2);
+ mbuf_u32(lc, 0); /* reserved3 */
+ }
+ (void)base;
+}
+
+void link_emit_macho(LinkImage* img, Writer* w);
+
+void link_emit_macho(LinkImage* img, Writer* w) {
+ MCtx x;
+ memset(&x, 0, sizeof(x));
+ x.img = img;
+ x.c = img->c;
+ x.h = img->heap;
+ x.w = w;
+ x.linker = img->linker;
+ x.link_arch = link_arch_desc_for(img->c);
+ {
+ const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_MACHO);
+ x.macho =
+ fmt && fmt->macho_arch ? fmt->macho_arch(img->c->target.arch) : NULL;
+ }
+
+ if (!x.link_arch || !x.macho || !x.macho->cputype || !x.macho->emit_stub ||
+ !x.macho->stub_size)
+ compiler_panic(x.c, no_loc(),
+ "link_emit_macho: no Mach-O descriptor for target");
+ if (img->entry_sym == LINK_SYM_NONE)
+ compiler_panic(x.c, no_loc(), "link_emit_macho: no resolved entry");
+
+ collect_imports(&x);
+ collect_tlv(&x);
+ plan_layout(&x);
+ shift_sections(&x);
+
+ /* entry offset within __TEXT segment. */
+ LinkSymbol* esym = sym_at(img, img->entry_sym);
+ if (!esym || !esym->defined)
+ compiler_panic(x.c, no_loc(), "link_emit_macho: entry symbol undefined");
+ if (esym->vaddr < x.text_vaddr)
+ compiler_panic(x.c, no_loc(),
+ "link_emit_macho: entry symbol below __TEXT base");
+ x.entry_offset = (u32)(esym->vaddr - x.text_vaddr);
+
+ /* image-id UUID. */
+ u8 image_id[LINK_IMAGE_ID_BYTES];
+ link_image_id_compute(img, image_id);
+ memcpy(x.uuid, image_id, 16);
+
+ /* Reloc apply collects fixsites. */
+ FixList fl;
+ fix_init(&fl, x.h);
+ apply_relocs(&x, &fl);
+
+ /* Build LINKEDIT contents. */
+ build_chained_fixups(&x, &fl);
+ build_exports_trie(&x);
+ build_symtab(&x);
+ layout_linkedit(&x);
+
+ /* Compute code-sig skeleton sized to file bytes excluding sig. */
+ u32 code_limit = x.codesig_off;
+ build_codesig_skeleton(&x, code_limit, "a.out");
+ /* Now extend linkedit segment to include codesig. */
+ u64 le_size = (u64)x.codesig_off + (u64)x.codesig_size - x.linkedit_fileoff;
+ x.segs[4].filesize = le_size;
+ x.segs[4].vmsize = ALIGN_UP(le_size, MZ_PAGE);
+
+ /* Build load commands buffer. */
+ MByte lc;
+ mbuf_init(&lc, x.h);
+
+ /* LC_SEGMENT_64 for each segment with sections (and PAGEZERO/LINKEDIT). */
+ emit_load_command_segment(&lc, &x, 0); /* PAGEZERO */
+ emit_load_command_segment(&lc, &x, 1); /* TEXT */
+ if (x.segs[2].nsects > 0)
+ emit_load_command_segment(&lc, &x, 2); /* DATA_CONST */
+ if (x.segs[3].nsects > 0) emit_load_command_segment(&lc, &x, 3); /* DATA */
+ emit_load_command_segment(&lc, &x, 4); /* LINKEDIT */
+
+ /* LC_DYLD_CHAINED_FIXUPS (linkedit_data_command: 16B) */
+ mbuf_u32(&lc, LC_DYLD_CHAINED_FIXUPS);
+ mbuf_u32(&lc, 16);
+ mbuf_u32(&lc, x.chained_fixups_off);
+ mbuf_u32(&lc, x.chained_fixups.len);
+
+ /* LC_DYLD_EXPORTS_TRIE */
+ mbuf_u32(&lc, LC_DYLD_EXPORTS_TRIE);
+ mbuf_u32(&lc, 16);
+ mbuf_u32(&lc, x.exports_trie_off);
+ mbuf_u32(&lc, x.exports_trie.len);
+
+ /* LC_SYMTAB */
+ mbuf_u32(&lc, LC_SYMTAB);
+ mbuf_u32(&lc, MACHO_SYMTAB_CMD_SIZE);
+ mbuf_u32(&lc, x.symtab_off);
+ mbuf_u32(&lc, x.nsyms);
+ mbuf_u32(&lc, x.strtab_off);
+ mbuf_u32(&lc, x.strtab.len);
+
+ /* LC_DYSYMTAB */
+ /* nlocal=0, nextdef=#defined-globals, nundef=#imports. We tracked
+ * those during build_symtab; recompute by inspecting strtab... easier
+ * to recount: defined globals are total - imports. */
+ u32 nlocal = 0;
+ u32 nundef = x.nimports_real;
+ u32 nextdef = (x.nsyms > nundef) ? x.nsyms - nundef - nlocal : 0;
+ mbuf_u32(&lc, LC_DYSYMTAB);
+ mbuf_u32(&lc, MACHO_DYSYMTAB_CMD_SIZE);
+ mbuf_u32(&lc, 0); /* ilocalsym */
+ mbuf_u32(&lc, nlocal);
+ mbuf_u32(&lc, nlocal);
+ mbuf_u32(&lc, nextdef);
+ mbuf_u32(&lc, nlocal + nextdef);
+ mbuf_u32(&lc, nundef);
+ mbuf_u32(&lc, 0);
+ mbuf_u32(&lc, 0); /* tocoff, ntoc */
+ mbuf_u32(&lc, 0);
+ mbuf_u32(&lc, 0); /* modtaboff, nmodtab */
+ mbuf_u32(&lc, 0);
+ mbuf_u32(&lc, 0); /* extrefsymoff, nextrefsyms */
+ mbuf_u32(&lc, x.indirect_off);
+ mbuf_u32(&lc, x.indirect.len / 4u);
+ mbuf_u32(&lc, 0);
+ mbuf_u32(&lc, 0); /* extreloff, nextrel */
+ mbuf_u32(&lc, 0);
+ mbuf_u32(&lc, 0); /* locreloff, nlocrel */
+
+ /* LC_LOAD_DYLINKER */
+ {
+ const char* dyld = "/usr/lib/dyld";
+ u32 dyld_len = (u32)slice_from_cstr(dyld).len;
+ u32 cmd_size = (u32)ALIGN_UP((u64)(12u + dyld_len + 1u), 8u);
+ mbuf_u32(&lc, LC_LOAD_DYLINKER);
+ mbuf_u32(&lc, cmd_size);
+ mbuf_u32(&lc, 12u); /* name offset within cmd */
+ u32 wrote = mbuf_str(&lc, dyld, dyld_len);
+ (void)wrote;
+ /* Pad to cmd_size. */
+ while (lc.len < (u32)((u64)mbuf_align(&lc, 1) + 0)) {
+ /* no-op */
+ break;
+ }
+ /* Re-align to cmd_size. */
+ u32 want = (u32)(lc.len);
+ /* Walk back: lc grew by 12 + (strlen+1). Pad to cmd_size. */
+ u32 cmd_start_back = lc.len - (12u + dyld_len + 1u);
+ u32 pad_needed = cmd_size - (lc.len - cmd_start_back);
+ while (pad_needed-- > 0) mbuf_u8(&lc, 0);
+ (void)want;
+ }
+
+ /* LC_UUID */
+ mbuf_u32(&lc, LC_UUID);
+ mbuf_u32(&lc, 24);
+ mbuf_append(&lc, x.uuid, 16);
+
+ /* LC_BUILD_VERSION */
+ mbuf_u32(&lc, LC_BUILD_VERSION);
+ mbuf_u32(&lc, 24);
+ mbuf_u32(&lc, 1); /* PLATFORM_MACOS */
+ mbuf_u32(&lc, (12u << 16) | 0); /* minos 12.0.0 */
+ mbuf_u32(&lc, (12u << 16) | 0); /* sdk 12.0.0 */
+ mbuf_u32(&lc, 0); /* ntools */
+
+ /* LC_MAIN — entryoff is offset within __TEXT segment from its file
+ * start (0). */
+ mbuf_u32(&lc, LC_MAIN);
+ mbuf_u32(&lc, 24);
+ mbuf_u64(&lc, (u64)x.entry_offset); /* entryoff = vaddr - __TEXT.vmaddr */
+ mbuf_u64(&lc, 0); /* stacksize */
+
+ /* LC_LOAD_DYLIB per dylib. */
+ for (u32 i = 0; i < x.ndylibs; ++i) {
+ Slice nm_s = pool_slice(x.c->global, x.dylibs[i].install);
+ const char* nm = nm_s.s;
+ size_t nl = nm_s.len;
+ u32 cmd_size = (u32)ALIGN_UP((u64)(24u + (u32)nl + 1u), 8u);
+ u32 cmd_start = lc.len;
+ mbuf_u32(&lc, LC_LOAD_DYLIB);
+ mbuf_u32(&lc, cmd_size);
+ mbuf_u32(&lc, 24u); /* name offset */
+ mbuf_u32(&lc, 0); /* timestamp */
+ mbuf_u32(&lc, (1u << 16)); /* current_version 1.0 */
+ mbuf_u32(&lc, (1u << 16)); /* compat_version 1.0 */
+ mbuf_str(&lc, nm ? nm : "", (u32)nl);
+ while (lc.len - cmd_start < cmd_size) mbuf_u8(&lc, 0);
+ }
+
+ /* LC_FUNCTION_STARTS / LC_DATA_IN_CODE — empty. */
+ mbuf_u32(&lc, LC_FUNCTION_STARTS_C);
+ mbuf_u32(&lc, 16);
+ mbuf_u32(&lc, x.fn_starts_off);
+ mbuf_u32(&lc, 0);
+
+ mbuf_u32(&lc, LC_DATA_IN_CODE_C);
+ mbuf_u32(&lc, 16);
+ mbuf_u32(&lc, x.data_in_code_off);
+ mbuf_u32(&lc, 0);
+
+ /* LC_CODE_SIGNATURE */
+ mbuf_u32(&lc, LC_CODE_SIGNATURE_C);
+ mbuf_u32(&lc, 16);
+ mbuf_u32(&lc, x.codesig_off);
+ mbuf_u32(&lc, x.codesig_size);
+
+ /* Sanity: lc.len + MACHO_HDR64_SIZE must equal headers_size we
+ * predicted in plan_layout. If not, we mis-sized — panic. */
+ if ((u64)lc.len + MACHO_HDR64_SIZE != x.headers_size) {
+ compiler_panic(x.c, no_loc(),
+ "link_macho: load-cmd size mismatch: predicted %llu got %u",
+ (unsigned long long)(x.headers_size - MACHO_HDR64_SIZE),
+ lc.len);
+ }
+
+ /* ---- now stream the file ---- */
+ /* The Writer in cfree allows seek; we'll write a flat buffer first
+ * (so we can hash it for codesig) and flush at the end. */
+ MByte file;
+ mbuf_init(&file, x.h);
+
+ /* mach_header_64 */
+ u32 ncmds = 0;
+ /* Recount: PAGEZERO + TEXT + maybe DATA_CONST + maybe DATA + LINKEDIT
+ * + chained + exports_trie + symtab + dysymtab + dyld + uuid +
+ * build_version + main + nDylibs + fn_starts + data_in_code +
+ * codesig. */
+ ncmds += 2; /* PAGEZERO + TEXT */
+ if (x.segs[2].nsects > 0) ncmds++;
+ if (x.segs[3].nsects > 0) ncmds++;
+ ncmds++; /* LINKEDIT */
+ ncmds += 11 + x.ndylibs;
+ /* (chained, exports_trie, symtab, dysymtab, dyld, uuid, build_version,
+ * main, fn_starts, data_in_code, codesig) = 11 */
+
+ mbuf_u32(&file, MH_MAGIC_64);
+ mbuf_u32(&file, x.macho->cputype);
+ mbuf_u32(&file, x.macho->cpusubtype);
+ mbuf_u32(&file, MH_EXECUTE);
+ mbuf_u32(&file, ncmds);
+ mbuf_u32(&file, lc.len);
+ {
+ u32 mh_flags = MH_DYLDLINK | MH_TWOLEVEL | MH_NOUNDEFS | MH_PIE;
+ /* dyld scans __thread_vars and allocates a pthread_key for each
+ * descriptor only when this flag is set; without it the descriptor's
+ * thunk pointer is silently patched to _tlv_bootstrap_error. Apple's
+ * ld sets it whenever the image contains S_THREAD_LOCAL_* sections. */
+ if (x.ntlv) mh_flags |= MH_HAS_TLV_DESCRIPTORS;
+ mbuf_u32(&file, mh_flags);
+ }
+ mbuf_u32(&file, 0); /* reserved */
+ mbuf_append(&file, lc.data, lc.len);
+
+ /* Pad to first section's file offset. */
+ /* __TEXT first section begins at headers_size; we wrote header+lc =
+ * headers_size, so no pad needed. Then each MSec's file_offset
+ * tells us where to write its bytes. */
+
+ /* Now emit segment payload bytes per MSec. */
+ for (u32 i = 0; i < x.nsecs; ++i) {
+ MSec* m = &x.secs[i];
+ if (m->is_zerofill || m->size == 0) continue;
+ /* Pad up to m->file_offset. */
+ while (file.len < m->file_offset) mbuf_u8(&file, 0);
+ if (m->synth_data) {
+ mbuf_append(&file, m->synth_data, m->synth_size);
+ } else {
+ LinkSection* ls = &img->sections[m->link_sec_id - 1u];
+ u32 segid = ls->segment_id;
+ u8* base =
+ (segid != LINK_SEG_NONE) ? img->segment_bytes[segid - 1u] : NULL;
+ if (base && ls->size) {
+ mbuf_append(&file, base + ls->input_offset, (u32)ls->size);
+ } else if (ls->size) {
+ for (u64 k = 0; k < ls->size; ++k) mbuf_u8(&file, 0);
+ }
+ }
+ }
+
+ /* Pad to LINKEDIT start. */
+ while (file.len < x.linkedit_fileoff) mbuf_u8(&file, 0);
+
+ /* LINKEDIT contents in declared order. */
+ while (file.len < x.chained_fixups_off) mbuf_u8(&file, 0);
+ mbuf_append(&file, x.chained_fixups.data, x.chained_fixups.len);
+ while (file.len < x.exports_trie_off) mbuf_u8(&file, 0);
+ mbuf_append(&file, x.exports_trie.data, x.exports_trie.len);
+ while (file.len < x.fn_starts_off) mbuf_u8(&file, 0);
+ /* fn_starts is empty */
+ while (file.len < x.data_in_code_off) mbuf_u8(&file, 0);
+ /* empty */
+ while (file.len < x.symtab_off) mbuf_u8(&file, 0);
+ mbuf_append(&file, x.symtab.data, x.symtab.len);
+ while (file.len < x.indirect_off) mbuf_u8(&file, 0);
+ mbuf_append(&file, x.indirect.data, x.indirect.len);
+ while (file.len < x.strtab_off) mbuf_u8(&file, 0);
+ mbuf_append(&file, x.strtab.data, x.strtab.len);
+ while (file.len < x.codesig_off) mbuf_u8(&file, 0);
+
+ /* Compute codesig hashes over file bytes [0, codesig_off). */
+ /* The codesig blob currently has zero hashes; hash now. */
+ compute_codesig(&x, file.data, x.codesig_off, "a.out");
+ /* Append codesig. */
+ mbuf_append(&file, x.codesig.data, x.codesig.len);
+
+ /* Stream out. */
+ cfree_writer_seek(w, 0);
+ cfree_writer_write(w, file.data, file.len);
+
+ /* Cleanup. */
+ fix_fini(&fl);
+ mbuf_fini(&lc);
+ mbuf_fini(&file);
+ mbuf_fini(&x.chained_fixups);
+ mbuf_fini(&x.exports_trie);
+ mbuf_fini(&x.symtab);
+ mbuf_fini(&x.strtab);
+ mbuf_fini(&x.indirect);
+ mbuf_fini(&x.fn_starts);
+ mbuf_fini(&x.data_in_code);
+ mbuf_fini(&x.codesig);
+ if (x.imports) x.h->free(x.h, x.imports, 0); /* VEC_GROW: cap unknown */
+ if (x.dylibs) x.h->free(x.h, x.dylibs, 0);
+ if (x.sym_to_imp)
+ x.h->free(x.h, x.sym_to_imp, sizeof(u32) * x.sym_to_imp_size);
+ if (x.secs) x.h->free(x.h, x.secs, 0);
+ if (x.stubs_bytes) x.h->free(x.h, x.stubs_bytes, x.stubs_size);
+ if (x.got_bytes) x.h->free(x.h, x.got_bytes, x.got_size);
+ if (x.tlv_ptrs_bytes) x.h->free(x.h, x.tlv_ptrs_bytes, x.tlv_ptrs_size);
+ if (x.tlv_slots) x.h->free(x.h, x.tlv_slots, 0);
+ if (x.sym_to_tlv)
+ x.h->free(x.h, x.sym_to_tlv, sizeof(u32) * x.sym_to_tlv_size);
+}
diff --git a/src/obj/macho.h b/src/obj/macho/macho.h
diff --git a/src/obj/macho/read.c b/src/obj/macho/read.c
@@ -0,0 +1,651 @@
+/* Mach-O MH_OBJECT reader. Parses a 64-bit little-endian relocatable
+ * object back into a fresh ObjBuilder. The post-finalize ObjBuilder
+ * shape is the canonical superset of the writer's input:
+ * read_macho of an emit_macho output produces an ObjBuilder
+ * shape-equivalent to the writer's input, modulo the synthesized
+ * "__SEG,__sect"-form section names.
+ *
+ * Scope: AArch64 little-endian, MH_OBJECT only (MH_DYLIB is a stub —
+ * the linker has no consumer for it yet). Other archs / endianness
+ * produce a compiler_panic with a diagnostic. */
+
+#include <string.h>
+
+#include "core/arena.h"
+#include "core/bytes.h"
+#include "core/heap.h"
+#include "core/pool.h"
+#include "core/slice.h"
+#include "core/util.h"
+#include "obj/format.h"
+#include "obj/macho/macho.h"
+
+static SrcLoc no_loc(void) {
+ SrcLoc l = {0, 0, 0};
+ return l;
+}
+
+/* ---- mach-section scratch struct ---- */
+
+typedef struct MSecRec {
+ char segname[16];
+ char sectname[16];
+ u32 seg_len;
+ u32 sect_len;
+ u64 addr;
+ u64 size;
+ u32 fileoff;
+ u32 align_log2;
+ u32 reloff;
+ u32 nreloc;
+ u32 flags;
+ u32 reserved2;
+ ObjSecId obj_sec; /* assigned in pass 1 */
+} MSecRec;
+
+static u32 fixed16_len(const char* s) {
+ u32 n = 0;
+ while (n < 16 && s[n] != 0) ++n;
+ return n;
+}
+
+static u16 sec_kind_from_seg_sect(const char* segname, u32 seg_len,
+ const char* sectname, u32 sect_len,
+ u32 flags) {
+ u32 stype = flags & SECTION_TYPE;
+ if (stype == S_ZEROFILL || stype == S_THREAD_LOCAL_ZEROFILL) return SEC_BSS;
+ if (flags & S_ATTR_PURE_INSTRUCTIONS) return SEC_TEXT;
+
+ if (seg_len == 7 && memcmp(segname, "__DWARF", 7) == 0) return SEC_DEBUG;
+ if (seg_len == 6 && memcmp(segname, "__TEXT", 6) == 0) {
+ if (sect_len == 6 && memcmp(sectname, "__text", 6) == 0) return SEC_TEXT;
+ return SEC_RODATA; /* __const, __cstring, ... */
+ }
+ if (seg_len == 6 && memcmp(segname, "__DATA", 6) == 0) {
+ if (sect_len == 5 && memcmp(sectname, "__bss", 5) == 0) return SEC_BSS;
+ return SEC_DATA;
+ }
+ return SEC_OTHER;
+}
+
+static u16 sec_flags_from(u32 mflags, u16 sec_kind) {
+ u16 f = 0;
+ if (sec_kind == SEC_TEXT || (mflags & S_ATTR_PURE_INSTRUCTIONS)) {
+ f |= SF_ALLOC | SF_EXEC;
+ } else if (sec_kind == SEC_RODATA) {
+ f |= SF_ALLOC;
+ } else if (sec_kind == SEC_DATA || sec_kind == SEC_BSS) {
+ f |= SF_ALLOC | SF_WRITE;
+ }
+ u32 stype = mflags & SECTION_TYPE;
+ if (stype == S_THREAD_LOCAL_REGULAR || stype == S_THREAD_LOCAL_ZEROFILL ||
+ stype == S_THREAD_LOCAL_VARIABLES) {
+ f |= SF_TLS;
+ }
+ if (stype == S_CSTRING_LITERALS) {
+ f |= SF_MERGE | SF_STRINGS;
+ }
+ return f;
+}
+
+static u16 sec_sem_from(u32 mflags, u16 sec_kind) {
+ u32 stype = mflags & SECTION_TYPE;
+ if (stype == S_ZEROFILL || stype == S_THREAD_LOCAL_ZEROFILL ||
+ sec_kind == SEC_BSS) {
+ return SSEM_NOBITS;
+ }
+ if (stype == S_MOD_INIT_FUNC_POINTERS) return SSEM_INIT_ARRAY;
+ if (stype == S_MOD_TERM_FUNC_POINTERS) return SSEM_FINI_ARRAY;
+ return SSEM_PROGBITS;
+}
+
+ObjBuilder* read_macho(Compiler* c, const char* name, const u8* data,
+ size_t len) {
+ (void)name;
+ if (len < MACHO_HDR64_SIZE)
+ compiler_panic(c, no_loc(), "read_macho: input shorter than header");
+
+ u32 magic = rd_u32_le(data + 0);
+ if (magic != MH_MAGIC_64)
+ compiler_panic(c, no_loc(), "read_macho: bad magic 0x%x", magic);
+
+ u32 cputype = rd_u32_le(data + 4);
+ const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_MACHO);
+ const ObjMachoArchOps* macho =
+ fmt && fmt->macho_cputype ? fmt->macho_cputype(cputype) : NULL;
+ u32 filetype = rd_u32_le(data + 12);
+ u32 ncmds = rd_u32_le(data + 16);
+ u32 sizeofcmds = rd_u32_le(data + 20);
+
+ if (!macho || !macho->reloc_from)
+ compiler_panic(c, no_loc(), "read_macho: unsupported cputype 0x%x",
+ cputype);
+ if (filetype != MH_OBJECT)
+ compiler_panic(c, no_loc(),
+ "read_macho: only MH_OBJECT supported, got filetype %u",
+ filetype);
+
+ if ((u64)MACHO_HDR64_SIZE + sizeofcmds > len)
+ compiler_panic(c, no_loc(), "read_macho: load commands exceed file");
+
+ /* ---- pass 1: walk load commands, collect sections, symtab cmd. */
+ MSecRec* msecs = NULL;
+ u32 nmsecs = 0;
+ u32 symoff = 0, nsyms = 0, stroff = 0, strsize = 0;
+
+ u64 pos = MACHO_HDR64_SIZE;
+ u64 end = pos + sizeofcmds;
+ for (u32 ci = 0; ci < ncmds && pos + 8 <= end; ++ci) {
+ u32 cmd = rd_u32_le(data + pos);
+ u32 cmdsize = rd_u32_le(data + pos + 4);
+ if (cmdsize < 8 || pos + cmdsize > end)
+ compiler_panic(c, no_loc(), "read_macho: malformed load command");
+
+ if (cmd == LC_SEGMENT_64) {
+ u32 nsects = rd_u32_le(data + pos + 64);
+ if (MACHO_SEGCMD64_SIZE + (u64)nsects * MACHO_SECT64_SIZE > cmdsize)
+ compiler_panic(c, no_loc(), "read_macho: segment cmd truncated");
+ MSecRec* extra = arena_array(c->scratch, MSecRec, nmsecs + nsects);
+ if (msecs && nmsecs) memcpy(extra, msecs, sizeof(MSecRec) * nmsecs);
+ msecs = extra;
+ const u8* sp = data + pos + MACHO_SEGCMD64_SIZE;
+ for (u32 si = 0; si < nsects; ++si, sp += MACHO_SECT64_SIZE) {
+ MSecRec* m = &msecs[nmsecs++];
+ memset(m, 0, sizeof *m);
+ memcpy(m->sectname, sp + 0, 16);
+ memcpy(m->segname, sp + 16, 16);
+ m->seg_len = fixed16_len(m->segname);
+ m->sect_len = fixed16_len(m->sectname);
+ m->addr = rd_u64_le(sp + 32);
+ m->size = rd_u64_le(sp + 40);
+ m->fileoff = rd_u32_le(sp + 48);
+ m->align_log2 = rd_u32_le(sp + 52);
+ m->reloff = rd_u32_le(sp + 56);
+ m->nreloc = rd_u32_le(sp + 60);
+ m->flags = rd_u32_le(sp + 64);
+ m->reserved2 = rd_u32_le(sp + 72);
+ }
+ } else if (cmd == LC_SYMTAB) {
+ symoff = rd_u32_le(data + pos + 8);
+ nsyms = rd_u32_le(data + pos + 12);
+ stroff = rd_u32_le(data + pos + 16);
+ strsize = rd_u32_le(data + pos + 20);
+ }
+ pos += cmdsize;
+ }
+
+ if (stroff + (u64)strsize > len)
+ compiler_panic(c, no_loc(), "read_macho: string table out of range");
+ if (symoff + (u64)nsyms * MACHO_NLIST64_SIZE > len)
+ compiler_panic(c, no_loc(), "read_macho: symbol table out of range");
+ const u8* strtab = data + stroff;
+
+ ObjBuilder* ob = obj_new(c);
+ if (!ob) compiler_panic(c, no_loc(), "read_macho: obj_new failed");
+
+ /* ---- pass 2: create ObjSecs and copy bytes. */
+ for (u32 i = 0; i < nmsecs; ++i) {
+ MSecRec* m = &msecs[i];
+ /* Build "__SEG,__sect"-form name; matches what emit_macho would
+ * round-trip back out. */
+ char nmbuf[34];
+ u32 nlen = 0;
+ memcpy(nmbuf + nlen, m->segname, m->seg_len);
+ nlen += m->seg_len;
+ nmbuf[nlen++] = ',';
+ memcpy(nmbuf + nlen, m->sectname, m->sect_len);
+ nlen += m->sect_len;
+ Sym sn = pool_intern_slice(c->global, (Slice){.s = nmbuf, .len = nlen});
+
+ u16 kind = sec_kind_from_seg_sect(m->segname, m->seg_len, m->sectname,
+ m->sect_len, m->flags);
+ u16 flags = sec_flags_from(m->flags, kind);
+ u16 sem = sec_sem_from(m->flags, kind);
+ u32 align = 1u << (m->align_log2 & 31);
+
+ ObjSecId id = obj_section_ex(ob, sn, (SecKind)kind, (SecSem)sem, flags,
+ align, m->reserved2, 0, 0);
+ if (id == OBJ_SEC_NONE)
+ compiler_panic(c, no_loc(), "read_macho: obj_section_ex failed");
+
+ /* Preserve the raw mach section.flags so emit_macho can write back
+ * the same S_TYPE / S_ATTR_* bits. */
+ obj_section_set_ext(ob, id, OBJ_EXT_MACHO, m->flags, 0);
+
+ if (sem == SSEM_NOBITS) {
+ obj_reserve_bss(ob, id, (u32)m->size, align);
+ } else if (m->size) {
+ if (m->fileoff + m->size > len)
+ compiler_panic(c, no_loc(), "read_macho: section bytes out of range");
+ obj_write(ob, id, data + m->fileoff, (size_t)m->size);
+ }
+ m->obj_sec = id;
+ }
+
+ /* ---- pass 3: parse symbol table. Two-pass strategy: first pass
+ * creates undefs (so relocations can refer to them), second
+ * pass creates defined locals/extdefs. Both write into
+ * mach_idx -> ObjSymId so reloc resolution works. */
+ ObjSymId* sym_macho_to_obj =
+ arena_zarray(c->scratch, ObjSymId, nsyms ? nsyms : 1);
+
+ const u8* sbase = data + symoff;
+ for (u32 i = 0; i < nsyms; ++i) {
+ const u8* p = sbase + (u64)i * MACHO_NLIST64_SIZE;
+ u32 strx = rd_u32_le(p + 0);
+ u8 n_type = p[4];
+ u8 n_sect = p[5];
+ u16 n_desc = rd_u16_le(p + 6);
+ u64 n_value = rd_u64_le(p + 8);
+
+ const char* nm = "";
+ u32 nlen = 0;
+ if (strx < strsize) {
+ nm = (const char*)(strtab + strx);
+ while (strx + nlen < strsize && nm[nlen]) ++nlen;
+ }
+ /* Mach-O names round-trip verbatim — the leading `_` Apple
+ * toolchains apply to C symbols is part of the on-disk name as
+ * far as ObjBuilder is concerned. Name-canonicalization (the
+ * `test_main` ↔ `_test_main` mapping for API callers) happens
+ * one layer up at the linker API boundary (link_c_name_intern
+ * in link.c); the on-disk shape stays byte-for-byte stable. */
+ Sym sn =
+ nlen ? pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}) : 0;
+
+ u8 type_field = (u8)(n_type & N_TYPE);
+ u8 ext = (u8)(n_type & N_EXT);
+ u8 pext = (u8)(n_type & N_PEXT);
+
+ u16 bind = ext ? SB_GLOBAL : SB_LOCAL;
+ /* Weak DEFs (defined symbols) carry N_WEAK_DEF; weak REFs (undef
+ * `__attribute__((weak))` references) carry N_WEAK_REF. Either
+ * one collapses to SB_WEAK in the cfree model. */
+ if (ext && (n_desc & (N_WEAK_DEF | N_WEAK_REF))) bind = SB_WEAK;
+ u8 vis = pext ? SV_HIDDEN : SV_DEFAULT;
+
+ u16 kind;
+ ObjSecId sec_id = OBJ_SEC_NONE;
+ u64 value = 0;
+ u64 size = 0;
+ u64 cmnalign = 0;
+
+ if (type_field == N_UNDF) {
+ if (ext && n_value != 0) {
+ /* Common: n_value is size, n_desc encodes log2(align) in
+ * GET_COMM_ALIGN bits. */
+ kind = SK_COMMON;
+ value = 0;
+ size = n_value;
+ u32 la = (u32)((n_desc >> 8) & 0xf);
+ cmnalign = 1u << la;
+ } else {
+ kind = SK_UNDEF;
+ }
+ } else if (type_field == N_ABS) {
+ kind = SK_ABS;
+ value = n_value;
+ } else if (type_field == N_SECT) {
+ if (n_sect == 0 || n_sect > nmsecs) {
+ kind = SK_NOTYPE;
+ } else {
+ sec_id = msecs[n_sect - 1].obj_sec;
+ /* Mach-O n_value for defined symbols is segment-relative addr;
+ * convert back to a section-local offset. */
+ u64 base = msecs[n_sect - 1].addr;
+ value = (n_value >= base) ? (n_value - base) : 0;
+ kind = (msecs[n_sect - 1].flags & S_ATTR_PURE_INSTRUCTIONS) ? SK_FUNC
+ : SK_OBJ;
+ }
+ } else {
+ kind = SK_NOTYPE;
+ }
+
+ ObjSymId id = obj_symbol_ex(ob, sn, (SymBind)bind, (SymVis)vis,
+ (SymKind)kind, sec_id, value, size, cmnalign);
+ obj_sym_mark_referenced(ob, id);
+ /* n_desc carries Mach-O attribute bits beyond what bind/vis/kind
+ * model — N_NO_DEAD_STRIP, N_REF_TO_WEAK, N_ARM_THUMB_DEF, etc.
+ * Mask off the bits we already round-trip via bind (N_WEAK_DEF /
+ * N_WEAK_REF) and the alignment field for commons (which lives
+ * in cmnalign), then stash the remainder so emit_macho can OR it
+ * back in. */
+ u16 desc_pass = n_desc;
+ desc_pass &= (u16) ~(N_WEAK_DEF | N_WEAK_REF);
+ if (kind == SK_COMMON) desc_pass &= 0x00ff; /* drop align field */
+ if (desc_pass) obj_symbol_set_flags(ob, id, desc_pass);
+ sym_macho_to_obj[i] = id;
+ }
+
+ /* ---- pass 4: parse per-section relocations into ObjBuilder relocs.
+ * Mach-O encodes addends out-of-band as a leading
+ * ARM64_RELOC_ADDEND followed by the real reloc; the
+ * reader collapses the pair on the way in. */
+ /* Lazily-populated section-start local symbols, for clang-emitted
+ * non-extern (section-relative) relocations. See the r_extern==0
+ * branch below for the encoding. */
+ ObjSymId* sec_start_sym =
+ arena_zarray(c->scratch, ObjSymId, nmsecs ? nmsecs : 1);
+ for (u32 i = 0; i < nmsecs; ++i) sec_start_sym[i] = OBJ_SYM_NONE;
+ for (u32 i = 0; i < nmsecs; ++i) {
+ MSecRec* m = &msecs[i];
+ if (!m->nreloc) continue;
+ if (m->reloff + (u64)m->nreloc * MACHO_RELOC_SIZE > len)
+ compiler_panic(c, no_loc(), "read_macho: relocation table out of range");
+ const u8* rp = data + m->reloff;
+ i64 pending_addend = 0;
+ int have_pending = 0;
+ int pending_subtractor = 0;
+ u32 pending_subtractor_offset = 0;
+ u32 pending_subtractor_length = 0;
+ for (u32 j = 0; j < m->nreloc; ++j) {
+ u32 r_address = rd_u32_le(rp + j * MACHO_RELOC_SIZE);
+ u32 packed = rd_u32_le(rp + j * MACHO_RELOC_SIZE + 4);
+ u32 r_symbolnum = packed & 0x00ffffffu;
+ u32 r_pcrel = (packed >> 24) & 1u;
+ u32 r_length = (packed >> 25) & 3u;
+ u32 r_extern = (packed >> 27) & 1u;
+ u32 r_type = (packed >> 28) & 0xfu;
+
+ if (r_type == ARM64_RELOC_ADDEND) {
+ /* Sign-extend 24-bit addend. */
+ i32 ad = (i32)(r_symbolnum & 0x00ffffffu);
+ if (ad & 0x00800000) ad |= ~0x00ffffff;
+ pending_addend = (i64)ad;
+ have_pending = 1;
+ continue;
+ }
+
+ u32 kind;
+ if (r_type == ARM64_RELOC_SUBTRACTOR) {
+ kind = (r_length == 3) ? R_RV_SUB64
+ : (r_length == 2) ? R_RV_SUB32
+ : (r_length == 1) ? R_RV_SUB16
+ : R_RV_SUB8;
+ } else {
+ kind = macho->reloc_from(r_type);
+ }
+ if (kind == (u32)-1)
+ compiler_panic(c, no_loc(), "read_macho: unsupported reloc type %u",
+ r_type);
+
+ /* Refine kind by (r_pcrel, r_length) when the type field alone
+ * is ambiguous. ARM64_RELOC_UNSIGNED collapses R_ABS64/R_ABS32
+ * and PC-relative variants. */
+ if (r_type == ARM64_RELOC_UNSIGNED) {
+ if (pending_subtractor && pending_subtractor_offset == r_address &&
+ pending_subtractor_length == r_length) {
+ kind = (r_length == 3) ? R_RV_ADD64
+ : (r_length == 2) ? R_RV_ADD32
+ : (r_length == 1) ? R_RV_ADD16
+ : R_RV_ADD8;
+ pending_subtractor = 0;
+ } else if (r_pcrel) {
+ kind = (r_length == 3) ? R_PC64 : R_PC32;
+ } else {
+ kind = (r_length == 3) ? R_ABS64 : R_ABS32;
+ }
+ } else if (r_type == ARM64_RELOC_BRANCH26) {
+ kind = R_AARCH64_CALL26;
+ } else if (r_type == ARM64_RELOC_PAGEOFF12) {
+ /* PAGEOFF12 is access-size-agnostic in Mach-O; the linker
+ * applier needs to scale the immediate by the load/store size
+ * (or apply it raw for ADD). Inspect the patched instruction
+ * at r_address to pick the right RelocKind so the applier in
+ * link_reloc.c shifts the lo12 correctly. */
+ if (m->fileoff + r_address + 4u > len)
+ compiler_panic(c, no_loc(),
+ "read_macho: PAGEOFF12 r_address %u out of range",
+ r_address);
+ u32 ins = rd_u32_le(data + m->fileoff + r_address);
+ /* ADD (immediate): bits 30:24 = 0010001 (W=10001 / X=10010001).
+ * Mask 0x7f800000 isolates sf=0/1 + the 0010001 pattern; values
+ * 0x11000000 (32-bit) and 0x91000000 (64-bit) — match the latter
+ * via the same 0x7f mask leaving bit 31 free. */
+ if ((ins & 0x7f800000u) == 0x11000000u) {
+ kind = R_AARCH64_ADD_ABS_LO12_NC;
+ } else if ((ins & 0x3b000000u) == 0x39000000u) {
+ /* LDR/STR (immediate unsigned offset). Bits 29:27=111, bit 26=V
+ * (0=integer, 1=SIMD/FP), bits 25:24=01. size in [31:30] plus
+ * opc bit 23 for the SIMD 128-bit case (size=00, opc=11). */
+ u32 sz = (ins >> 30) & 3u;
+ u32 v_bit = (ins >> 26) & 1u;
+ u32 opc1 = (ins >> 23) & 1u;
+ if (v_bit && sz == 0 && opc1) {
+ kind = R_AARCH64_LDST128_ABS_LO12_NC;
+ } else {
+ kind = (sz == 0) ? R_AARCH64_LDST8_ABS_LO12_NC
+ : (sz == 1) ? R_AARCH64_LDST16_ABS_LO12_NC
+ : (sz == 2) ? R_AARCH64_LDST32_ABS_LO12_NC
+ : R_AARCH64_LDST64_ABS_LO12_NC;
+ }
+ }
+ /* else: leave as the default R_AARCH64_ADD_ABS_LO12_NC. */
+ }
+
+ ObjSymId target = OBJ_SYM_NONE;
+ i64 inplace_addend_override = 0;
+ int use_inplace_addend = 0;
+ if (r_extern) {
+ if (r_symbolnum < nsyms) target = sym_macho_to_obj[r_symbolnum];
+ if (!have_pending && r_type == ARM64_RELOC_UNSIGNED) {
+ u32 rsz = 1u << r_length;
+ if ((u64)m->fileoff + r_address + rsz > len)
+ compiler_panic(c, no_loc(),
+ "read_macho: extern unsigned reloc r_address out "
+ "of range");
+ const u8* pv = data + m->fileoff + r_address;
+ u64 inplace;
+ if (r_length == 3)
+ inplace = rd_u64_le(pv);
+ else if (r_length == 2)
+ inplace = (u64)rd_u32_le(pv);
+ else if (r_length == 1)
+ inplace = (u64)rd_u16_le(pv);
+ else
+ inplace = (u64)pv[0];
+ inplace_addend_override = (i64)inplace;
+ use_inplace_addend = 1;
+ }
+ } else {
+ /* Section-relative reloc — clang emits these for compact unwind,
+ * EH frame, and DWARF debug info. r_symbolnum is the 1-based
+ * section index; the in-place value at r_address is the absolute
+ * .o virtual address of the referent. Synthesize a local
+ * symbol pointing to the target section's start (lazily, once
+ * per section) and re-express the reloc as
+ * target = sec_start_sym, addend = inplace - section.addr. */
+ if (r_symbolnum == 0 || r_symbolnum > nmsecs)
+ compiler_panic(c, no_loc(),
+ "read_macho: section-relative reloc references "
+ "invalid section index %u",
+ r_symbolnum);
+ u32 sec_idx = r_symbolnum - 1u;
+ MSecRec* tm = &msecs[sec_idx];
+ if (sec_start_sym[sec_idx] == OBJ_SYM_NONE) {
+ /* Build ".Lcfree.macho_secstart.<sec_idx>" without snprintf
+ * (the freestanding build doesn't pull in stdio). */
+ static const char prefix[] = ".Lcfree.macho_secstart.";
+ char nmbuf[sizeof(prefix) + 10];
+ u32 nlen = (u32)(sizeof(prefix) - 1);
+ memcpy(nmbuf, prefix, nlen);
+ char dec[10];
+ u32 dn = 0;
+ u32 v = sec_idx;
+ do {
+ dec[dn++] = (char)('0' + (v % 10u));
+ v /= 10u;
+ } while (v);
+ for (u32 k = 0; k < dn; ++k) nmbuf[nlen + k] = dec[dn - 1 - k];
+ nlen += dn;
+ Sym sn =
+ pool_intern_slice(c->global, (Slice){.s = nmbuf, .len = nlen});
+ u16 sk = (tm->flags & S_ATTR_PURE_INSTRUCTIONS) ? SK_FUNC : SK_OBJ;
+ sec_start_sym[sec_idx] =
+ obj_symbol(ob, sn, SB_LOCAL, (SymKind)sk, tm->obj_sec, 0, 0);
+ }
+ target = sec_start_sym[sec_idx];
+ u32 rsz = 1u << r_length;
+ if ((u64)m->fileoff + r_address + rsz > len)
+ compiler_panic(c, no_loc(),
+ "read_macho: non-extern reloc r_address out of range");
+ u64 inplace;
+ const u8* pv = data + m->fileoff + r_address;
+ if (r_length == 3)
+ inplace = rd_u64_le(pv);
+ else if (r_length == 2)
+ inplace = (u64)rd_u32_le(pv);
+ else if (r_length == 1)
+ inplace = (u64)rd_u16_le(pv);
+ else
+ inplace = (u64)pv[0];
+ inplace_addend_override = (i64)inplace - (i64)tm->addr;
+ use_inplace_addend = 1;
+ }
+
+ i64 addend = have_pending
+ ? pending_addend
+ : (use_inplace_addend ? inplace_addend_override : 0);
+ int has_explicit = have_pending || use_inplace_addend || addend != 0;
+ have_pending = 0;
+ pending_addend = 0;
+
+ obj_reloc_ex(ob, m->obj_sec, r_address, (RelocKind)kind, target, addend,
+ has_explicit, 0);
+ if (r_type == ARM64_RELOC_SUBTRACTOR) {
+ pending_subtractor = 1;
+ pending_subtractor_offset = r_address;
+ pending_subtractor_length = r_length;
+ }
+ }
+ }
+
+ obj_finalize(ob);
+ return ob;
+}
+
+/* ---- read_macho_dso ----
+ *
+ * MH_DYLIB reader. Walks load commands once to find LC_ID_DYLIB
+ * (install-name) and LC_SYMTAB (symbol table + string table), then
+ * emits one defined ObjSym per externally-visible nlist entry.
+ *
+ * Like read_elf_dso, the produced ObjBuilder carries no sections /
+ * relocations / groups — only symbol definitions in OBJ_SEC_NONE. The
+ * consumer's resolve_undefs sees these as defined globals and marks the
+ * matching consumer-side undef as `imported`. The dylib's own undefs
+ * (its imports of other dylibs) are filtered: they don't satisfy any
+ * undef in the consumer. */
+
+ObjBuilder* read_macho_dso(Compiler* c, const char* name, const u8* data,
+ size_t len, Sym* install_name_out) {
+ (void)name;
+ if (install_name_out) *install_name_out = 0;
+ if (len < MACHO_HDR64_SIZE)
+ compiler_panic(c, no_loc(), "read_macho_dso: input shorter than header");
+
+ u32 magic = rd_u32_le(data + 0);
+ if (magic != MH_MAGIC_64)
+ compiler_panic(c, no_loc(), "read_macho_dso: bad magic 0x%x", magic);
+
+ u32 cputype = rd_u32_le(data + 4);
+ u32 filetype = rd_u32_le(data + 12);
+ u32 ncmds = rd_u32_le(data + 16);
+ u32 sizeofcmds = rd_u32_le(data + 20);
+
+ {
+ const ObjFormatImpl* fmt = obj_format_lookup(CFREE_OBJ_MACHO);
+ const ObjMachoArchOps* macho =
+ fmt && fmt->macho_cputype ? fmt->macho_cputype(cputype) : NULL;
+ if (!macho)
+ compiler_panic(c, no_loc(), "read_macho_dso: unsupported cputype 0x%x",
+ cputype);
+ }
+ if (filetype != MH_DYLIB && filetype != MH_BUNDLE)
+ compiler_panic(c, no_loc(),
+ "read_macho_dso: not MH_DYLIB/MH_BUNDLE (filetype=%u)",
+ filetype);
+ if ((u64)MACHO_HDR64_SIZE + sizeofcmds > len)
+ compiler_panic(c, no_loc(), "read_macho_dso: load commands exceed file");
+
+ u32 symoff = 0, nsyms = 0, stroff = 0, strsize = 0;
+ Sym install_name = 0;
+
+ u64 pos = MACHO_HDR64_SIZE;
+ u64 end = pos + sizeofcmds;
+ for (u32 ci = 0; ci < ncmds && pos + 8 <= end; ++ci) {
+ u32 cmd = rd_u32_le(data + pos);
+ u32 cmdsize = rd_u32_le(data + pos + 4);
+ if (cmdsize < 8 || pos + cmdsize > end)
+ compiler_panic(c, no_loc(), "read_macho_dso: malformed load command");
+ if (cmd == LC_ID_DYLIB) {
+ /* dylib_command: cmd, cmdsize, name(lc_str: 4-byte offset within
+ * the cmd), timestamp, current_version, compat_version. */
+ if (cmdsize < 24) goto next;
+ u32 nm_off = rd_u32_le(data + pos + 8);
+ if (nm_off >= cmdsize) goto next;
+ const char* p = (const char*)(data + pos + nm_off);
+ u32 maxlen = cmdsize - nm_off;
+ u32 nlen = 0;
+ while (nlen < maxlen && p[nlen]) ++nlen;
+ if (nlen)
+ install_name =
+ pool_intern_slice(c->global, (Slice){.s = p, .len = nlen});
+ } else if (cmd == LC_SYMTAB) {
+ symoff = rd_u32_le(data + pos + 8);
+ nsyms = rd_u32_le(data + pos + 12);
+ stroff = rd_u32_le(data + pos + 16);
+ strsize = rd_u32_le(data + pos + 20);
+ }
+ next:
+ pos += cmdsize;
+ }
+ if (install_name_out) *install_name_out = install_name;
+
+ if (stroff + (u64)strsize > len)
+ compiler_panic(c, no_loc(), "read_macho_dso: string table out of range");
+ if (symoff + (u64)nsyms * MACHO_NLIST64_SIZE > len)
+ compiler_panic(c, no_loc(), "read_macho_dso: symbol table out of range");
+
+ ObjBuilder* ob = obj_new(c);
+ if (!ob) compiler_panic(c, no_loc(), "read_macho_dso: obj_new failed");
+
+ const u8* strtab = data + stroff;
+ const u8* sbase = data + symoff;
+ for (u32 i = 0; i < nsyms; ++i) {
+ const u8* p = sbase + (u64)i * MACHO_NLIST64_SIZE;
+ u32 strx = rd_u32_le(p + 0);
+ u8 n_type = p[4];
+ u16 n_desc = rd_u16_le(p + 6);
+
+ u8 type_field = (u8)(n_type & N_TYPE);
+ u8 ext = (u8)(n_type & N_EXT);
+ /* Skip non-external (locals) and undef refs (the dylib's own imports). */
+ if (!ext) continue;
+ if (type_field == N_UNDF) continue;
+ /* N_INDR / N_PBUD / N_STAB: skip — not interesting for static link. */
+ if (n_type & N_STAB) continue;
+
+ if (strx >= strsize) continue;
+ const char* nm = (const char*)(strtab + strx);
+ u32 nlen = 0;
+ while (strx + nlen < strsize && nm[nlen]) ++nlen;
+ if (!nlen) continue;
+ Sym sn = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen});
+
+ SymBind bind = (n_desc & (N_WEAK_DEF | N_WEAK_REF)) ? SB_WEAK : SB_GLOBAL;
+ SymKind kind = SK_NOTYPE;
+ /* Mach-O dylib nlist doesn't carry STT_FUNC / STT_OBJECT cleanly —
+ * default to NOTYPE. The consuming linker uses dso_export_is_func
+ * to peek at this for ELF; for Mach-O the `imported` decision flows
+ * through synthetic __got / __stubs regardless of kind. */
+ {
+ ObjSymId did =
+ obj_symbol_ex(ob, sn, bind, SV_DEFAULT, kind, OBJ_SEC_NONE, 0, 0, 0);
+ obj_sym_mark_referenced(ob, did);
+ }
+ }
+
+ obj_finalize(ob);
+ return ob;
+}
diff --git a/src/obj/macho/reloc_aarch64.c b/src/obj/macho/reloc_aarch64.c
@@ -0,0 +1,113 @@
+/* RelocKind <-> arm64 Mach-O reloc-type mapping. Mirror of
+ * elf_reloc_aarch64.c for Mach-O.
+ *
+ * Mach-O relocations carry three independent fields that the cfree
+ * RelocKind enum collapses into a single value: r_type (the 4-bit
+ * ARM64_RELOC_* code), r_pcrel, and r_length. The translator therefore
+ * exposes three accessors — the writer (macho_emit.c) consults all of
+ * them per Reloc, and the reader (macho_read.c) inverts via
+ * macho_aarch64_reloc_from which keys on (r_type, r_pcrel, r_length). */
+
+#include "core/util.h"
+#include "obj/macho/macho.h"
+
+u32 macho_aarch64_reloc_to(u32 kind /* RelocKind */) {
+ switch (kind) {
+ case R_NONE:
+ return (u32)-1;
+ case R_ABS64:
+ case R_ABS32:
+ return ARM64_RELOC_UNSIGNED;
+ case R_REL64:
+ case R_REL32:
+ case R_PC64:
+ case R_PC32:
+ /* PC-relative absolute pointer-difference; encoded as
+ * UNSIGNED with r_pcrel=1, length=3/2. */
+ return ARM64_RELOC_UNSIGNED;
+ case R_AARCH64_JUMP26:
+ case R_AARCH64_CALL26:
+ return ARM64_RELOC_BRANCH26;
+ case R_AARCH64_ADR_PREL_PG_HI21:
+ case R_AARCH64_ADR_PREL_PG_HI21_NC:
+ return ARM64_RELOC_PAGE21;
+ case R_AARCH64_ADD_ABS_LO12_NC:
+ case R_AARCH64_LDST8_ABS_LO12_NC:
+ case R_AARCH64_LDST16_ABS_LO12_NC:
+ case R_AARCH64_LDST32_ABS_LO12_NC:
+ case R_AARCH64_LDST64_ABS_LO12_NC:
+ case R_AARCH64_LDST128_ABS_LO12_NC:
+ return ARM64_RELOC_PAGEOFF12;
+ case R_AARCH64_ADR_GOT_PAGE:
+ return ARM64_RELOC_GOT_LOAD_PAGE21;
+ case R_AARCH64_LD64_GOT_LO12_NC:
+ return ARM64_RELOC_GOT_LOAD_PAGEOFF12;
+ case R_AARCH64_TLVP_LOAD_PAGE21:
+ return ARM64_RELOC_TLVP_LOAD_PAGE21;
+ case R_AARCH64_TLVP_LOAD_PAGEOFF12:
+ return ARM64_RELOC_TLVP_LOAD_PAGEOFF12;
+ default:
+ return (u32)-1;
+ }
+}
+
+u32 macho_aarch64_reloc_pcrel(u32 kind /* RelocKind */) {
+ switch (kind) {
+ case R_REL64:
+ case R_REL32:
+ case R_PC64:
+ case R_PC32:
+ case R_AARCH64_JUMP26:
+ case R_AARCH64_CALL26:
+ case R_AARCH64_ADR_PREL_PG_HI21:
+ case R_AARCH64_ADR_PREL_PG_HI21_NC:
+ case R_AARCH64_ADR_GOT_PAGE:
+ case R_AARCH64_TLVP_LOAD_PAGE21:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+u32 macho_aarch64_reloc_length(u32 kind /* RelocKind */) {
+ /* log2 of the patch width in bytes: 0=byte, 1=hword, 2=word, 3=quad.
+ * AArch64 instructions are 4 bytes and Mach-O encodes any 32-bit fixup
+ * (BRANCH26, PAGE21, PAGEOFF12, ...) with length=2. */
+ switch (kind) {
+ case R_ABS64:
+ case R_REL64:
+ case R_PC64:
+ return 3;
+ default:
+ return 2;
+ }
+}
+
+u32 macho_aarch64_reloc_from(u32 macho_type) {
+ /* The (r_type, r_pcrel, r_length) tuple disambiguates several kinds
+ * collapsed by macho_aarch64_reloc_to. The reader inspects pcrel and
+ * length itself when it matters; this function only maps the type
+ * field, returning the most common AArch64 instance for each. Reader
+ * callers refine via the pcrel/length companion if they need to
+ * distinguish R_ABS64 vs R_PC64 (both UNSIGNED). */
+ switch (macho_type) {
+ case ARM64_RELOC_UNSIGNED:
+ return R_ABS64;
+ case ARM64_RELOC_BRANCH26:
+ return R_AARCH64_CALL26;
+ case ARM64_RELOC_PAGE21:
+ return R_AARCH64_ADR_PREL_PG_HI21;
+ case ARM64_RELOC_PAGEOFF12:
+ return R_AARCH64_ADD_ABS_LO12_NC;
+ case ARM64_RELOC_GOT_LOAD_PAGE21:
+ return R_AARCH64_ADR_GOT_PAGE;
+ case ARM64_RELOC_GOT_LOAD_PAGEOFF12:
+ return R_AARCH64_LD64_GOT_LO12_NC;
+ case ARM64_RELOC_TLVP_LOAD_PAGE21:
+ return R_AARCH64_TLVP_LOAD_PAGE21;
+ case ARM64_RELOC_TLVP_LOAD_PAGEOFF12:
+ return R_AARCH64_TLVP_LOAD_PAGEOFF12;
+ default:
+ return (u32)-1;
+ }
+}
diff --git a/src/obj/macho/reloc_x86_64.c b/src/obj/macho/reloc_x86_64.c
@@ -0,0 +1,84 @@
+#include "core/util.h"
+#include "obj/macho/macho.h"
+
+u32 macho_x86_64_reloc_to(u32 kind /* RelocKind */) {
+ switch (kind) {
+ case R_NONE:
+ return (u32)-1;
+ case R_ABS64:
+ case R_ABS32:
+ return X86_64_RELOC_UNSIGNED;
+ case R_PC32:
+ case R_REL32:
+ case R_PC64:
+ case R_REL64:
+ case R_X64_PC8:
+ return X86_64_RELOC_SIGNED;
+ case R_PLT32:
+ case R_X64_PLT32:
+ return X86_64_RELOC_BRANCH;
+ case R_X64_GOTPCRELX:
+ case R_X64_REX_GOTPCRELX:
+ return X86_64_RELOC_GOT_LOAD;
+ case R_X64_GOTPCREL:
+ return X86_64_RELOC_GOT;
+ case R_X64_TPOFF32:
+ return X86_64_RELOC_TLV;
+ default:
+ return (u32)-1;
+ }
+}
+
+u32 macho_x86_64_reloc_pcrel(u32 kind /* RelocKind */) {
+ switch (kind) {
+ case R_PC32:
+ case R_REL32:
+ case R_PC64:
+ case R_REL64:
+ case R_X64_PC8:
+ case R_PLT32:
+ case R_X64_PLT32:
+ case R_X64_GOTPCREL:
+ case R_X64_GOTPCRELX:
+ case R_X64_REX_GOTPCRELX:
+ case R_X64_TPOFF32:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+u32 macho_x86_64_reloc_length(u32 kind /* RelocKind */) {
+ switch (kind) {
+ case R_ABS64:
+ case R_PC64:
+ case R_REL64:
+ return 3;
+ case R_X64_PC8:
+ return 0;
+ default:
+ return 2;
+ }
+}
+
+u32 macho_x86_64_reloc_from(u32 macho_type) {
+ switch (macho_type) {
+ case X86_64_RELOC_UNSIGNED:
+ return R_ABS64;
+ case X86_64_RELOC_SIGNED:
+ case X86_64_RELOC_SIGNED_1:
+ case X86_64_RELOC_SIGNED_2:
+ case X86_64_RELOC_SIGNED_4:
+ return R_PC32;
+ case X86_64_RELOC_BRANCH:
+ return R_X64_PLT32;
+ case X86_64_RELOC_GOT_LOAD:
+ return R_X64_REX_GOTPCRELX;
+ case X86_64_RELOC_GOT:
+ return R_X64_GOTPCREL;
+ case X86_64_RELOC_TLV:
+ return R_X64_TPOFF32;
+ default:
+ return (u32)-1;
+ }
+}
diff --git a/src/obj/tbd_read.c b/src/obj/macho/tbd_read.c
diff --git a/src/obj/macho_emit.c b/src/obj/macho_emit.c
@@ -1,791 +0,0 @@
-/* Mach-O MH_OBJECT writer. Walks a finalized ObjBuilder and emits a
- * 64-bit little-endian relocatable object via the supplied Writer.
- *
- * Layout strategy (MH_OBJECT — everything in one anonymous segment):
- * 1. plan Mach-O sections (one per non-symtab/strtab/rela ObjSection),
- * mapping cfree section names to (segname, sectname) pairs;
- * 2. partition ObjSyms into local / extdef / undef and assign final
- * indices for LC_DYSYMTAB;
- * 3. build per-section relocation tables via the per-arch translator
- * (only aarch64 is wired today);
- * 4. assign file offsets sequentially: header, load commands, section
- * bytes, relocation tables, symbol table, string table;
- * 5. write header → load commands → section bytes → relocs → symtab
- * → strtab.
- *
- * 64-bit little-endian only. Big-endian / 32-bit panics at entry.
- *
- * Round-trip invariant: read_macho of
- * this output must produce an ObjBuilder shape-equivalent to the input,
- * modulo (a) Mach-O's mandatory (segname, sectname) pairing and (b)
- * any synthesized N_SECT symbols. The (segname,sectname) form chosen
- * here is the canonical post-roundtrip shape — read_macho stores the
- * comma-joined "__SEG,__sect" form in Section.name so a re-emit
- * produces the same bytes. */
-
-#include <string.h>
-
-#include "arch/arch.h"
-#include "core/arena.h"
-#include "core/buf.h"
-#include "core/bytes.h"
-#include "core/heap.h"
-#include "core/pool.h"
-#include "core/slice.h"
-#include "core/util.h"
-#include "obj/macho.h"
-
-static SrcLoc no_loc(void) {
- SrcLoc l = {0, 0, 0};
- return l;
-}
-
-/* ---- LE writer helpers (Writer-based) ---- */
-
-static void wr_u32(Writer* w, u32 v) {
- u8 b[4];
- wr_u32_le(b, v);
- cfree_writer_write(w, b, 4);
-}
-
-static void wr_u64(Writer* w, u64 v) {
- u8 b[8];
- wr_u64_le(b, v);
- cfree_writer_write(w, b, 8);
-}
-
-static void wr_name16(Writer* w, const char* s, u32 len) {
- /* Mach-O section/segment names are 16-byte zero-padded fields. Names
- * longer than 16 are truncated; the on-disk format leaves no room for
- * a longer encoding. */
- u8 buf[16];
- u32 n = len > 16 ? 16 : len;
- memcpy(buf, s, n);
- if (n < 16) memset(buf + n, 0, 16 - n);
- cfree_writer_write(w, buf, 16);
-}
-
-/* ---- (segname,sectname) derivation ---- */
-
-/* Split a cfree section name into Mach-O (segname, sectname) pair.
- * If `name` contains a comma, it is treated as already in
- * "__SEG,__sect" form and split at the first comma. Otherwise we
- * derive the pair from SecKind, ignoring `name` (the input was an
- * ELF-shaped name like ".text" or ".rodata"). */
-typedef struct MSegSect {
- char segname[16];
- char sectname[16];
- u32 seg_len;
- u32 sect_len;
-} MSegSect;
-
-static void copy_fixed16(char* dst, u32* len_out, const char* src, u32 src_len) {
- u32 n = src_len > 16 ? 16 : src_len;
- memcpy(dst, src, n);
- if (n < 16) memset(dst + n, 0, 16 - n);
- *len_out = n;
-}
-
-static void name_to_seg_sect(const char* name, u32 nlen, u16 sec_kind,
- MSegSect* out) {
- /* Comma-separated form: take prefix as segname, suffix as sectname. */
- for (u32 i = 0; i < nlen; ++i) {
- if (name[i] == ',') {
- copy_fixed16(out->segname, &out->seg_len, name, i);
- copy_fixed16(out->sectname, &out->sect_len, name + i + 1,
- nlen - i - 1);
- return;
- }
- }
-
- /* Not comma-separated. Derive from SecKind; ignore `name`. */
- const char* seg;
- const char* sect;
- switch (sec_kind) {
- case SEC_TEXT:
- seg = "__TEXT";
- sect = "__text";
- break;
- case SEC_RODATA:
- seg = "__TEXT";
- sect = "__const";
- break;
- case SEC_DATA:
- seg = "__DATA";
- sect = "__data";
- break;
- case SEC_BSS:
- seg = "__DATA";
- sect = "__bss";
- break;
- case SEC_DEBUG:
- seg = "__DWARF";
- /* Strip a leading `.` from the input name (".debug_info" →
- * "__debug_info") so the dwarf section names round-trip. */
- sect = (nlen && name[0] == '.') ? name + 1 : name;
- copy_fixed16(out->segname, &out->seg_len, seg,
- (u32)slice_from_cstr(seg).len);
- copy_fixed16(out->sectname, &out->sect_len, sect,
- (u32)((nlen && name[0] == '.') ? nlen - 1 : nlen));
- return;
- default:
- seg = "__DATA";
- sect = "__data";
- break;
- }
- copy_fixed16(out->segname, &out->seg_len, seg, (u32)slice_from_cstr(seg).len);
- copy_fixed16(out->sectname, &out->sect_len, sect,
- (u32)slice_from_cstr(sect).len);
-}
-
-/* ---- per-section plan ---- */
-
-typedef struct MSec {
- MSegSect ns;
- u64 addr; /* assigned vmaddr within the segment */
- u64 size; /* bytes (or bss size) */
- u32 fileoff; /* 0 for zerofill */
- u32 align; /* power-of-two; stored as log2 in section_64.align */
- u32 reloff; /* 0 if no relocs */
- u32 nreloc;
- u32 flags; /* S_TYPE | S_ATTR_* */
- u32 entsize;
- u32 obj_sec; /* originating ObjSecId */
- int is_zerofill;
- const Buf* obj_bytes; /* NULL when zerofill */
- u8* relocs; /* arena-allocated; nreloc * 8 bytes */
-} MSec;
-
-static u32 log2_align(u32 a) {
- u32 r = 0;
- while ((1u << r) < a) ++r;
- return r;
-}
-
-static u32 section_flags_for(u16 sec_kind, u16 sec_flags, const char* sectname,
- u32 sect_len) {
- u32 f = 0;
- if (sec_kind == SEC_TEXT || (sec_flags & SF_EXEC)) {
- f |= S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS;
- }
- if (sec_flags & SF_TLS) {
- /* Mach-O distinguishes three TLV section types by sectname:
- * __thread_data → S_THREAD_LOCAL_REGULAR (initial data)
- * __thread_bss → S_THREAD_LOCAL_ZEROFILL (zero-init data)
- * __thread_vars → S_THREAD_LOCAL_VARIABLES (descriptor records)
- * dyld dispatches its TLV-bootstrap pass off the S_TYPE; the
- * S_ATTR_* bits don't carry TLV semantics so we just emit the type. */
- if (sect_len >= 13 && memcmp(sectname, "__thread_vars", 13) == 0)
- return S_THREAD_LOCAL_VARIABLES;
- if (sec_kind == SEC_BSS) return S_THREAD_LOCAL_ZEROFILL;
- return S_THREAD_LOCAL_REGULAR;
- }
- if (sec_kind == SEC_BSS || (sect_len >= 5 && memcmp(sectname, "__bss", 5) == 0)) {
- f |= S_ZEROFILL;
- }
- if (sec_flags & SF_STRINGS) {
- f = (f & ~SECTION_TYPE) | S_CSTRING_LITERALS;
- }
- /* Default S_REGULAR (0) for all others. */
- return f;
-}
-
-/* ---- symbol partition ---- */
-
-typedef struct MSym {
- ObjSymId obj_id;
- u32 strx; /* offset in string table */
- u8 n_type;
- u8 n_sect;
- u16 n_desc;
- u64 n_value;
-} MSym;
-
-static int sym_is_undef(const ObjSym* s) {
- return s->section_id == OBJ_SEC_NONE && s->kind != SK_ABS &&
- s->kind != SK_COMMON;
-}
-
-static int sym_is_extdef(const ObjSym* s) {
- if (sym_is_undef(s)) return 0;
- return s->bind == SB_GLOBAL || s->bind == SB_WEAK;
-}
-
-/* ---- string table ----
- *
- * Mach-O strtab: leading zero byte at offset 0 represents the empty
- * string. Entries are NUL-terminated; we don't dedupe (small symbol
- * counts in v1; matches the simplest llvm output). The "_" prefix on
- * C symbols is added inline in the writer below. */
-
-void emit_macho(Compiler* c, ObjBuilder* ob, Writer* w) {
- Heap* h = (Heap*)c->ctx->heap;
-
- /* Tombstone sweep first — strip/objcopy mutations and the historical
- * UNDEF prune are both expressed via Section.removed / ObjSym.removed
- * post-sweep. See obj_sweep_dead. */
- obj_sweep_dead(ob);
-
- /* ---- target validation ---------------------------------------- */
- const ArchImpl* arch = arch_for_compiler(c);
- const ArchMachoOps* macho = arch ? arch->macho : NULL;
- u32 cputype, cpusubtype;
- u32 (*reloc_to)(u32);
- u32 (*reloc_pcrel)(u32);
- u32 (*reloc_length)(u32);
- if (!macho || !macho->reloc_to || !macho->reloc_pcrel ||
- !macho->reloc_length) {
- compiler_panic(c, no_loc(), "emit_macho: unsupported target arch %u",
- (u32)c->target.arch);
- }
- cputype = macho->cputype;
- cpusubtype = macho->cpusubtype;
- reloc_to = macho->reloc_to;
- reloc_pcrel = macho->reloc_pcrel;
- reloc_length = macho->reloc_length;
- if (c->target.big_endian) {
- compiler_panic(c, no_loc(), "emit_macho: big-endian not supported");
- }
- if (c->target.ptr_size != 8) {
- compiler_panic(c, no_loc(), "emit_macho: ptr_size %u (expected 8)",
- (u32)c->target.ptr_size);
- }
-
- /* ---- pass 1: plan Mach-O sections ----------------------------- */
- u32 nobjsec = obj_section_count(ob);
- MSec* secs = arena_zarray(c->scratch, MSec, nobjsec ? nobjsec : 1);
- u32* obj_to_msec = arena_zarray(c->scratch, u32, nobjsec ? nobjsec : 1);
- u32 nsecs = 0;
-
- for (u32 i = 1; i < nobjsec; ++i) {
- const Section* s = obj_section_get(ob, i);
- if (s->removed) continue; /* see obj_sweep_dead */
- /* Skip ELF-style synthetic sections that read_elf would have
- * filtered: SYMTAB / STRTAB / RELA / GROUP have no Mach-O
- * representation as data sections. */
- if (s->sem == SSEM_SYMTAB || s->sem == SSEM_STRTAB ||
- s->sem == SSEM_RELA || s->sem == SSEM_REL ||
- s->sem == SSEM_GROUP) {
- continue;
- }
- Slice nm_s = pool_slice(c->global, s->name);
- const char* nm = nm_s.s;
- size_t nlen = nm_s.len;
- MSec* m = &secs[nsecs];
- name_to_seg_sect(nm ? nm : "", (u32)nlen, s->kind, &m->ns);
- m->obj_sec = i;
- m->align = s->align ? s->align : 1;
- m->entsize = s->entsize;
- /* Mach-O reader stashes the raw section.flags (S_TYPE | S_ATTR_*)
- * in Section.ext_type when reading a Mach-O input. Use it
- * verbatim so attribute bits like S_ATTR_NO_DEAD_STRIP /
- * S_ATTR_LIVE_SUPPORT round-trip. Fall back to the kind-derived
- * default for sections originating from non-Mach-O readers (e.g.
- * cfree codegen). */
- if (s->ext_kind == OBJ_EXT_MACHO && s->ext_type) {
- m->flags = s->ext_type;
- } else {
- m->flags = section_flags_for(s->kind, s->flags, m->ns.sectname,
- m->ns.sect_len);
- }
- if (s->sem == SSEM_NOBITS || s->kind == SEC_BSS) {
- m->is_zerofill = 1;
- m->size = s->bss_size;
- m->obj_bytes = NULL;
- /* Preserve S_THREAD_LOCAL_ZEROFILL when SF_TLS routed us there;
- * a regular BSS section gets the plain S_ZEROFILL type. */
- u32 stype = m->flags & SECTION_TYPE;
- if (stype != S_THREAD_LOCAL_ZEROFILL)
- m->flags = (m->flags & ~SECTION_TYPE) | S_ZEROFILL;
- } else {
- m->is_zerofill = 0;
- m->size = s->bytes.total;
- m->obj_bytes = &s->bytes;
- }
- obj_to_msec[i] = nsecs + 1; /* 1-based: matches Mach-O n_sect. */
- nsecs++;
- }
-
- /* ---- pass 2: assign vmaddrs (segment-relative) and per-section
- * flat-layout addresses. MH_OBJECT keeps everything in
- * one segment with vmaddr=0; section addr fields are
- * relative offsets within the segment.
- *
- * Two-pass to match the conventional Mach-O `MH_OBJECT` layout:
- * non-zerofill sections come first in vmaddr order, then zerofill
- * sections at the tail. Apple `as` and clang `-c` both lay out
- * this way, and roundtripping must reproduce it so symbol n_values
- * (which are segment-relative addresses) compare equal. */
- u64 cur_addr = 0;
- for (u32 i = 0; i < nsecs; ++i) {
- MSec* m = &secs[i];
- if (m->is_zerofill) continue;
- cur_addr = ALIGN_UP(cur_addr, (u64)m->align);
- m->addr = cur_addr;
- cur_addr += m->size;
- }
- for (u32 i = 0; i < nsecs; ++i) {
- MSec* m = &secs[i];
- if (!m->is_zerofill) continue;
- cur_addr = ALIGN_UP(cur_addr, (u64)m->align);
- m->addr = cur_addr;
- cur_addr += m->size;
- }
- u64 segment_vmsize = cur_addr;
-
- /* ---- pass 3: partition symbols (locals, extdefs, undefs) ------ */
- u32 nobjsym = 0;
- {
- ObjSymIter* it = obj_symiter_new(ob);
- ObjSymEntry e;
- while (obj_symiter_next(it, &e)) ++nobjsym;
- obj_symiter_free(it);
- }
-
- MSym* msyms = arena_zarray(c->scratch, MSym, nobjsym + 1);
- u32 nmsyms = 0;
- u32* sym_obj_to_macho =
- arena_zarray(c->scratch, u32, nobjsym + 2); /* obj_id -> mach idx */
-
- Buf strtab;
- buf_init(&strtab, h);
- /* Mach-O strtab convention: the first byte is " " (space) or NUL —
- * llvm/Apple emit a single NUL. We start with NUL for offset 0. */
- {
- u8 z = 0;
- buf_write(&strtab, &z, 1);
- }
-
- /* Emit in three passes so n_type/sect ordering matches LC_DYSYMTAB
- * (locals, then extdefs, then undefs). */
- for (int pass = 0; pass < 3; ++pass) {
- ObjSymIter* it = obj_symiter_new(ob);
- ObjSymEntry e;
- while (obj_symiter_next(it, &e)) {
- const ObjSym* s = e.sym;
- if (s->removed) continue; /* spurious-UNDEF prune + explicit removal */
- int undef = sym_is_undef(s);
- int extdef = sym_is_extdef(s);
- int local = !undef && !extdef;
- int want = (pass == 0 && local) || (pass == 1 && extdef) ||
- (pass == 2 && undef);
- if (!want) continue;
- MSym* ms = &msyms[nmsyms];
- ms->obj_id = e.id;
-
- Slice nm_s = pool_slice(c->global, s->name);
- const char* nm = nm_s.s;
- size_t nlen = nm_s.len;
- /* Mach-O symbol names are stored on disk verbatim — including
- * the leading `_` Apple toolchains use for C-source-level
- * symbols ("_main" for `int main()`). cfree treats the prefix
- * as part of the on-disk name, not a transform applied at emit.
- * Name-canonicalization for API callers (cfree_jit_lookup,
- * link_set_entry) lives one layer up at the linker boundary
- * (link.c), so emit/read stay byte-for-byte stable. */
- if (nlen && nm) {
- u32 off = buf_pos(&strtab);
- buf_write(&strtab, nm, nlen);
- u8 z = 0;
- buf_write(&strtab, &z, 1);
- ms->strx = off;
- } else {
- ms->strx = 0;
- }
-
- u8 type = 0;
- if (extdef) type |= N_EXT;
- if (s->vis == SV_HIDDEN || s->vis == SV_INTERNAL) {
- /* Mach-O encodes hidden externals as N_PEXT|N_EXT. */
- type |= N_PEXT;
- }
- u8 n_sect = NO_SECT;
- u16 n_desc = 0;
- u64 value = s->value;
-
- if (undef) {
- type |= N_UNDF;
- /* Undefined symbols with non-LOCAL bind are external references
- * (the common case — every `extern int x;`). Setting N_EXT
- * matches what clang emits and what Apple `ld` expects. */
- if (s->bind == SB_GLOBAL || s->bind == SB_WEAK) type |= N_EXT;
- if (s->bind == SB_WEAK) n_desc |= N_WEAK_REF;
- value = 0;
- } else if (s->kind == SK_ABS) {
- type |= N_ABS;
- } else if (s->kind == SK_COMMON) {
- /* Mach-O common symbols are N_UNDF|N_EXT with n_value=size and
- * n_desc carrying log2(align) in the GET_COMM_ALIGN bits. */
- type = N_UNDF | N_EXT;
- value = s->size;
- u32 a = s->common_align ? (u32)s->common_align : 1;
- n_desc = (u16)(log2_align(a) << 8); /* GET_COMM_ALIGN field */
- } else {
- type |= N_SECT;
- u32 ms_idx = (s->section_id < nobjsec) ? obj_to_msec[s->section_id] : 0;
- n_sect = (u8)ms_idx;
- if (n_sect && n_sect <= nsecs) {
- value = secs[n_sect - 1].addr + s->value;
- }
- if (s->bind == SB_WEAK) n_desc |= N_WEAK_DEF;
- }
-
- /* OR in any pass-through n_desc bits the reader stashed in
- * sym->flags (N_NO_DEAD_STRIP, etc.). The bits we already
- * compute (N_WEAK_DEF / N_WEAK_REF and the common-alignment
- * field) are already excluded by read_macho before stashing,
- * so a plain OR can't double-count. */
- n_desc |= s->flags;
-
- ms->n_type = type;
- ms->n_sect = n_sect;
- ms->n_desc = n_desc;
- ms->n_value = value;
-
- sym_obj_to_macho[e.id] = nmsyms + 1; /* 1-based index, 0 = none. */
- nmsyms++;
- }
- obj_symiter_free(it);
- }
-
- u32 nlocals = 0, nextdefs = 0, nundefs = 0;
- for (u32 i = 0; i < nmsyms; ++i) {
- u8 t = msyms[i].n_type;
- u8 ext = (t & N_EXT) != 0;
- u8 typ = (u8)(t & N_TYPE);
- if (typ == N_UNDF && ext) {
- /* Could be undef or common — common has nonzero n_value. */
- if (msyms[i].n_value != 0)
- ++nextdefs; /* common is conventionally extdef-shaped */
- else
- ++nundefs;
- } else if (ext) {
- ++nextdefs;
- } else {
- ++nlocals;
- }
- }
- /* Re-derive without the common fudge by counting partition pass: we
- * already wrote them in (locals,extdefs,undefs) order, so the prefix
- * counts are just the per-pass counts. Mirror the spurious-UNDEF
- * prune from the emit loop above so the LC_DYSYMTAB index counts
- * line up with the symbols we actually wrote. */
- nlocals = 0;
- nextdefs = 0;
- nundefs = 0;
- {
- ObjSymIter* it = obj_symiter_new(ob);
- ObjSymEntry e;
- while (obj_symiter_next(it, &e)) {
- const ObjSym* s = e.sym;
- if (s->removed) continue;
- int undef = sym_is_undef(s);
- if (undef)
- ++nundefs;
- else if (sym_is_extdef(s))
- ++nextdefs;
- else
- ++nlocals;
- }
- obj_symiter_free(it);
- }
-
- /* ---- pass 4: build per-section relocation tables -------------- */
- u32 total_relocs = obj_reloc_total(ob);
- for (u32 i = 0; i < nsecs; ++i) {
- MSec* m = &secs[i];
- u32 nr = obj_reloc_count(ob, m->obj_sec);
- if (!nr) continue;
- /* Worst case: each reloc may be preceded by an ARM64_RELOC_ADDEND
- * pair entry. We size the buffer for that upper bound. */
- u8* buf = (u8*)arena_alloc(c->scratch, (size_t)MACHO_RELOC_SIZE * nr * 2,
- _Alignof(u32));
- u32 j = 0;
- for (u32 ri = 0; ri < total_relocs; ++ri) {
- const Reloc* r = obj_reloc_at(ob, ri);
- if (r->removed) continue;
- if (r->section_id != m->obj_sec) continue;
- if ((r->kind == R_RV_ADD8 || r->kind == R_RV_ADD16 ||
- r->kind == R_RV_ADD32 || r->kind == R_RV_ADD64) &&
- ri + 1u < total_relocs) {
- const Reloc* sub = obj_reloc_at(ob, ri + 1u);
- int paired =
- sub && sub->section_id == r->section_id &&
- sub->offset == r->offset &&
- ((r->kind == R_RV_ADD8 && sub->kind == R_RV_SUB8) ||
- (r->kind == R_RV_ADD16 && sub->kind == R_RV_SUB16) ||
- (r->kind == R_RV_ADD32 && sub->kind == R_RV_SUB32) ||
- (r->kind == R_RV_ADD64 && sub->kind == R_RV_SUB64));
- if (paired) {
- u32 length = (r->kind == R_RV_ADD64) ? 3u
- : (r->kind == R_RV_ADD32) ? 2u
- : (r->kind == R_RV_ADD16) ? 1u
- : 0u;
- u32 add_idx;
- u32 sub_idx;
- u32 sub_type = c->target.arch == CFREE_ARCH_ARM_64
- ? ARM64_RELOC_SUBTRACTOR
- : X86_64_RELOC_SUBTRACTOR;
- u32 unsigned_type = c->target.arch == CFREE_ARCH_ARM_64
- ? ARM64_RELOC_UNSIGNED
- : X86_64_RELOC_UNSIGNED;
- if (r->sym == OBJ_SYM_NONE || sub->sym == OBJ_SYM_NONE) {
- compiler_panic(c, no_loc(),
- "emit_macho: symdiff reloc without symbol");
- }
- add_idx = sym_obj_to_macho[r->sym];
- sub_idx = sym_obj_to_macho[sub->sym];
- if (add_idx == 0 || sub_idx == 0) {
- compiler_panic(c, no_loc(),
- "emit_macho: symdiff reloc target not in symtab");
- }
- {
- u8* slot = buf + (size_t)j * MACHO_RELOC_SIZE;
- wr_u32_le(slot + 0, (u32)r->offset);
- wr_u32_le(slot + 4, ((sub_idx - 1u) & 0x00ffffffu) |
- (length << 25) | (1u << 27) |
- ((sub_type & 0xfu) << 28));
- ++j;
- }
- {
- u8* slot = buf + (size_t)j * MACHO_RELOC_SIZE;
- wr_u32_le(slot + 0, (u32)r->offset);
- wr_u32_le(slot + 4, ((add_idx - 1u) & 0x00ffffffu) |
- (length << 25) | (1u << 27) |
- ((unsigned_type & 0xfu) << 28));
- ++j;
- }
- ++ri;
- continue;
- }
- }
- u32 mtype = reloc_to(r->kind);
- if (mtype == (u32)-1) {
- compiler_panic(c, no_loc(),
- "emit_macho: unsupported reloc kind %u for arch %u",
- (u32)r->kind, (u32)c->target.arch);
- }
- u32 pcrel = reloc_pcrel(r->kind);
- u32 length = reloc_length(r->kind);
-
- /* Resolve target — extern always 1 in our model (every Reloc has
- * an ObjSymId). Skip relocs without a symbol — they would map to
- * a section-relative reloc which the v1 cgtarget never emits. */
- if (r->sym == OBJ_SYM_NONE) {
- compiler_panic(c, no_loc(),
- "emit_macho: reloc without symbol not supported "
- "(sec=%u offset=%u kind=%u)",
- (u32)r->section_id, (u32)r->offset, (u32)r->kind);
- }
- u32 mach_sym_idx = sym_obj_to_macho[r->sym];
- if (mach_sym_idx == 0) {
- compiler_panic(c, no_loc(),
- "emit_macho: reloc target sym %u not in symtab",
- (u32)r->sym);
- }
- u32 r_symbolnum = mach_sym_idx - 1; /* Mach-O uses 0-based. */
-
- /* Non-zero addend: emit a leading ARM64_RELOC_ADDEND pair (only
- * meaningful for non-UNSIGNED types — UNSIGNED carries the addend
- * inline in the patched bytes). */
- if (r->addend != 0 && mtype != ARM64_RELOC_UNSIGNED) {
- u8* slot = buf + (size_t)j * MACHO_RELOC_SIZE;
- wr_u32_le(slot + 0, (u32)r->offset);
- u32 packed = ((u32)(i64)r->addend & 0x00ffffffu) | (0u << 24) |
- (length << 25) | (1u << 27) /*extern*/ |
- (ARM64_RELOC_ADDEND << 28);
- wr_u32_le(slot + 4, packed);
- ++j;
- }
-
- u8* slot = buf + (size_t)j * MACHO_RELOC_SIZE;
- wr_u32_le(slot + 0, (u32)r->offset);
- u32 packed = (r_symbolnum & 0x00ffffffu) |
- ((pcrel & 1u) << 24) |
- ((length & 3u) << 25) |
- (1u << 27) /*extern*/ |
- ((mtype & 0xfu) << 28);
- wr_u32_le(slot + 4, packed);
- ++j;
- }
- m->relocs = buf;
- m->nreloc = j;
- }
-
- /* ---- pass 5: assign file offsets ------------------------------ */
- /* Layout after the load-command block:
- * section bytes (in order, respecting align)
- * relocation tables (per section, 4-aligned)
- * symbol table (8-aligned)
- * string table */
- u32 nload_cmds = 4; /* LC_SEGMENT_64 + LC_BUILD_VERSION + LC_SYMTAB + LC_DYSYMTAB */
- u32 segcmd_size = MACHO_SEGCMD64_SIZE + nsecs * MACHO_SECT64_SIZE;
- u32 build_version_size = 24; /* fixed: cmd+cmdsize+platform+minos+sdk+ntools(0) */
- u32 sizeofcmds = segcmd_size + build_version_size + MACHO_SYMTAB_CMD_SIZE +
- MACHO_DYSYMTAB_CMD_SIZE;
-
- u64 cur = MACHO_HDR64_SIZE + sizeofcmds;
- u32 fileoff_first = (u32)cur;
- for (u32 i = 0; i < nsecs; ++i) {
- MSec* m = &secs[i];
- if (m->is_zerofill) {
- m->fileoff = 0;
- continue;
- }
- cur = ALIGN_UP(cur, (u64)m->align);
- m->fileoff = (u32)cur;
- cur += m->size;
- }
-
- /* Reloc tables. */
- for (u32 i = 0; i < nsecs; ++i) {
- MSec* m = &secs[i];
- if (!m->nreloc) {
- m->reloff = 0;
- continue;
- }
- cur = ALIGN_UP(cur, (u64)4);
- m->reloff = (u32)cur;
- cur += (u64)m->nreloc * MACHO_RELOC_SIZE;
- }
-
- cur = ALIGN_UP(cur, (u64)8);
- u64 symoff = cur;
- cur += (u64)nmsyms * MACHO_NLIST64_SIZE;
- u64 stroff = cur;
- u32 strtab_size = buf_pos(&strtab);
- cur += strtab_size;
-
- /* ---- pass 6: write the file ------------------------------------ */
- cfree_writer_seek(w, 0);
-
- /* mach_header_64 */
- wr_u32(w, MH_MAGIC_64);
- wr_u32(w, cputype);
- wr_u32(w, cpusubtype);
- wr_u32(w, MH_OBJECT);
- wr_u32(w, nload_cmds);
- wr_u32(w, sizeofcmds);
- wr_u32(w, 0); /* flags — MH_OBJECT carries none in v1 */
- wr_u32(w, 0); /* reserved */
-
- /* LC_SEGMENT_64 (anonymous, contains everything) */
- wr_u32(w, LC_SEGMENT_64);
- wr_u32(w, segcmd_size);
- wr_name16(w, "", 0); /* segname: empty for MH_OBJECT */
- wr_u64(w, 0); /* vmaddr */
- wr_u64(w, segment_vmsize); /* vmsize */
- wr_u64(w, fileoff_first); /* fileoff */
- /* filesize = bytes covered by non-zerofill sections (post-section
- * file offset minus the start). */
- u64 filesize = 0;
- for (u32 i = 0; i < nsecs; ++i) {
- MSec* m = &secs[i];
- if (m->is_zerofill) continue;
- u64 end = (u64)m->fileoff + m->size;
- u64 begin = m->fileoff;
- if (end > filesize + fileoff_first) filesize = end - fileoff_first;
- (void)begin;
- }
- wr_u64(w, filesize);
- /* maxprot/initprot — VM_PROT_READ|WRITE|EXECUTE = 7 for object segs. */
- wr_u32(w, 7);
- wr_u32(w, 7);
- wr_u32(w, nsecs);
- wr_u32(w, 0); /* flags */
-
- /* sections inline within the segment command */
- for (u32 i = 0; i < nsecs; ++i) {
- MSec* m = &secs[i];
- wr_name16(w, m->ns.sectname, m->ns.sect_len);
- wr_name16(w, m->ns.segname, m->ns.seg_len);
- wr_u64(w, m->addr);
- wr_u64(w, m->size);
- wr_u32(w, m->fileoff);
- wr_u32(w, log2_align(m->align));
- wr_u32(w, m->reloff);
- wr_u32(w, m->nreloc);
- wr_u32(w, m->flags);
- wr_u32(w, 0); /* reserved1 */
- wr_u32(w, m->entsize); /* reserved2 */
- wr_u32(w, 0); /* reserved3 */
- }
-
- /* LC_BUILD_VERSION — platform=PLATFORM_MACOS(1), minos/sdk=14.0.0,
- * ntools=0. The exact min-version isn't load-bearing for MH_OBJECT,
- * but Apple's `ld` warns when it's missing. */
- wr_u32(w, LC_BUILD_VERSION);
- wr_u32(w, build_version_size);
- wr_u32(w, 1); /* platform: PLATFORM_MACOS */
- wr_u32(w, (14u << 16) | 0); /* minos: 14.0.0 */
- wr_u32(w, (14u << 16) | 0); /* sdk: 14.0.0 */
- wr_u32(w, 0); /* ntools */
-
- /* LC_SYMTAB */
- wr_u32(w, LC_SYMTAB);
- wr_u32(w, MACHO_SYMTAB_CMD_SIZE);
- wr_u32(w, (u32)symoff);
- wr_u32(w, nmsyms);
- wr_u32(w, (u32)stroff);
- wr_u32(w, strtab_size);
-
- /* LC_DYSYMTAB */
- wr_u32(w, LC_DYSYMTAB);
- wr_u32(w, MACHO_DYSYMTAB_CMD_SIZE);
- wr_u32(w, 0); /* ilocalsym */
- wr_u32(w, nlocals);
- wr_u32(w, nlocals);
- wr_u32(w, nextdefs);
- wr_u32(w, nlocals + nextdefs);
- wr_u32(w, nundefs);
- wr_u32(w, 0); wr_u32(w, 0); /* tocoff, ntoc */
- wr_u32(w, 0); wr_u32(w, 0); /* modtaboff, nmodtab */
- wr_u32(w, 0); wr_u32(w, 0); /* extrefsymoff, nextrefsyms */
- wr_u32(w, 0); wr_u32(w, 0); /* indirectsymoff, nindirectsyms */
- wr_u32(w, 0); wr_u32(w, 0); /* extreloff, nextrel */
- wr_u32(w, 0); wr_u32(w, 0); /* locreloff, nlocrel */
-
- /* section bytes */
- for (u32 i = 0; i < nsecs; ++i) {
- MSec* m = &secs[i];
- if (m->is_zerofill || !m->size) continue;
- cfree_writer_seek(w, m->fileoff);
- if (m->obj_bytes) {
- u32 sz = m->obj_bytes->total;
- u8* tmp = (u8*)h->alloc(h, sz ? sz : 1, 1);
- if (sz) buf_flatten(m->obj_bytes, tmp);
- cfree_writer_write(w, tmp, sz);
- h->free(h, tmp, sz ? sz : 1);
- }
- }
-
- /* reloc tables */
- for (u32 i = 0; i < nsecs; ++i) {
- MSec* m = &secs[i];
- if (!m->nreloc) continue;
- cfree_writer_seek(w, m->reloff);
- cfree_writer_write(w, m->relocs, (size_t)m->nreloc * MACHO_RELOC_SIZE);
- }
-
- /* symtab */
- cfree_writer_seek(w, symoff);
- for (u32 i = 0; i < nmsyms; ++i) {
- const MSym* ms = &msyms[i];
- u8 entry[MACHO_NLIST64_SIZE];
- wr_u32_le(entry + 0, ms->strx);
- entry[4] = ms->n_type;
- entry[5] = ms->n_sect;
- wr_u16_le(entry + 6, ms->n_desc);
- wr_u64_le(entry + 8, ms->n_value);
- cfree_writer_write(w, entry, MACHO_NLIST64_SIZE);
- }
-
- /* strtab */
- {
- u8* flat = (u8*)arena_alloc(c->scratch, strtab_size ? strtab_size : 1, 1);
- if (strtab_size) buf_flatten(&strtab, flat);
- cfree_writer_seek(w, stroff);
- cfree_writer_write(w, flat, strtab_size);
- }
- buf_fini(&strtab);
-}
diff --git a/src/obj/macho_read.c b/src/obj/macho_read.c
@@ -1,635 +0,0 @@
-/* Mach-O MH_OBJECT reader. Parses a 64-bit little-endian relocatable
- * object back into a fresh ObjBuilder. The post-finalize ObjBuilder
- * shape is the canonical superset of the writer's input:
- * read_macho of an emit_macho output produces an ObjBuilder
- * shape-equivalent to the writer's input, modulo the synthesized
- * "__SEG,__sect"-form section names.
- *
- * Scope: AArch64 little-endian, MH_OBJECT only (MH_DYLIB is a stub —
- * the linker has no consumer for it yet). Other archs / endianness
- * produce a compiler_panic with a diagnostic. */
-
-#include <string.h>
-
-#include "arch/arch.h"
-#include "core/arena.h"
-#include "core/bytes.h"
-#include "core/heap.h"
-#include "core/pool.h"
-#include "core/slice.h"
-#include "core/util.h"
-#include "obj/macho.h"
-
-static SrcLoc no_loc(void) {
- SrcLoc l = {0, 0, 0};
- return l;
-}
-
-/* ---- mach-section scratch struct ---- */
-
-typedef struct MSecRec {
- char segname[16];
- char sectname[16];
- u32 seg_len;
- u32 sect_len;
- u64 addr;
- u64 size;
- u32 fileoff;
- u32 align_log2;
- u32 reloff;
- u32 nreloc;
- u32 flags;
- u32 reserved2;
- ObjSecId obj_sec; /* assigned in pass 1 */
-} MSecRec;
-
-static u32 fixed16_len(const char* s) {
- u32 n = 0;
- while (n < 16 && s[n] != 0) ++n;
- return n;
-}
-
-static u16 sec_kind_from_seg_sect(const char* segname, u32 seg_len,
- const char* sectname, u32 sect_len,
- u32 flags) {
- u32 stype = flags & SECTION_TYPE;
- if (stype == S_ZEROFILL || stype == S_THREAD_LOCAL_ZEROFILL) return SEC_BSS;
- if (flags & S_ATTR_PURE_INSTRUCTIONS) return SEC_TEXT;
-
- if (seg_len == 7 && memcmp(segname, "__DWARF", 7) == 0) return SEC_DEBUG;
- if (seg_len == 6 && memcmp(segname, "__TEXT", 6) == 0) {
- if (sect_len == 6 && memcmp(sectname, "__text", 6) == 0) return SEC_TEXT;
- return SEC_RODATA; /* __const, __cstring, ... */
- }
- if (seg_len == 6 && memcmp(segname, "__DATA", 6) == 0) {
- if (sect_len == 5 && memcmp(sectname, "__bss", 5) == 0) return SEC_BSS;
- return SEC_DATA;
- }
- return SEC_OTHER;
-}
-
-static u16 sec_flags_from(u32 mflags, u16 sec_kind) {
- u16 f = 0;
- if (sec_kind == SEC_TEXT || (mflags & S_ATTR_PURE_INSTRUCTIONS)) {
- f |= SF_ALLOC | SF_EXEC;
- } else if (sec_kind == SEC_RODATA) {
- f |= SF_ALLOC;
- } else if (sec_kind == SEC_DATA || sec_kind == SEC_BSS) {
- f |= SF_ALLOC | SF_WRITE;
- }
- u32 stype = mflags & SECTION_TYPE;
- if (stype == S_THREAD_LOCAL_REGULAR || stype == S_THREAD_LOCAL_ZEROFILL ||
- stype == S_THREAD_LOCAL_VARIABLES) {
- f |= SF_TLS;
- }
- if (stype == S_CSTRING_LITERALS) {
- f |= SF_MERGE | SF_STRINGS;
- }
- return f;
-}
-
-static u16 sec_sem_from(u32 mflags, u16 sec_kind) {
- u32 stype = mflags & SECTION_TYPE;
- if (stype == S_ZEROFILL || stype == S_THREAD_LOCAL_ZEROFILL ||
- sec_kind == SEC_BSS) {
- return SSEM_NOBITS;
- }
- if (stype == S_MOD_INIT_FUNC_POINTERS) return SSEM_INIT_ARRAY;
- if (stype == S_MOD_TERM_FUNC_POINTERS) return SSEM_FINI_ARRAY;
- return SSEM_PROGBITS;
-}
-
-ObjBuilder* read_macho(Compiler* c, const char* name, const u8* data,
- size_t len) {
- (void)name;
- if (len < MACHO_HDR64_SIZE)
- compiler_panic(c, no_loc(), "read_macho: input shorter than header");
-
- u32 magic = rd_u32_le(data + 0);
- if (magic != MH_MAGIC_64)
- compiler_panic(c, no_loc(), "read_macho: bad magic 0x%x", magic);
-
- u32 cputype = rd_u32_le(data + 4);
- const ArchImpl* arch = arch_lookup_macho_cputype(cputype);
- const ArchMachoOps* macho = arch ? arch->macho : NULL;
- u32 filetype = rd_u32_le(data + 12);
- u32 ncmds = rd_u32_le(data + 16);
- u32 sizeofcmds = rd_u32_le(data + 20);
-
- if (!macho || !macho->reloc_from)
- compiler_panic(c, no_loc(), "read_macho: unsupported cputype 0x%x",
- cputype);
- if (filetype != MH_OBJECT)
- compiler_panic(c, no_loc(),
- "read_macho: only MH_OBJECT supported, got filetype %u",
- filetype);
-
- if ((u64)MACHO_HDR64_SIZE + sizeofcmds > len)
- compiler_panic(c, no_loc(), "read_macho: load commands exceed file");
-
- /* ---- pass 1: walk load commands, collect sections, symtab cmd. */
- MSecRec* msecs = NULL;
- u32 nmsecs = 0;
- u32 symoff = 0, nsyms = 0, stroff = 0, strsize = 0;
-
- u64 pos = MACHO_HDR64_SIZE;
- u64 end = pos + sizeofcmds;
- for (u32 ci = 0; ci < ncmds && pos + 8 <= end; ++ci) {
- u32 cmd = rd_u32_le(data + pos);
- u32 cmdsize = rd_u32_le(data + pos + 4);
- if (cmdsize < 8 || pos + cmdsize > end)
- compiler_panic(c, no_loc(), "read_macho: malformed load command");
-
- if (cmd == LC_SEGMENT_64) {
- u32 nsects = rd_u32_le(data + pos + 64);
- if (MACHO_SEGCMD64_SIZE + (u64)nsects * MACHO_SECT64_SIZE > cmdsize)
- compiler_panic(c, no_loc(), "read_macho: segment cmd truncated");
- MSecRec* extra = arena_array(c->scratch, MSecRec, nmsecs + nsects);
- if (msecs && nmsecs)
- memcpy(extra, msecs, sizeof(MSecRec) * nmsecs);
- msecs = extra;
- const u8* sp = data + pos + MACHO_SEGCMD64_SIZE;
- for (u32 si = 0; si < nsects; ++si, sp += MACHO_SECT64_SIZE) {
- MSecRec* m = &msecs[nmsecs++];
- memset(m, 0, sizeof *m);
- memcpy(m->sectname, sp + 0, 16);
- memcpy(m->segname, sp + 16, 16);
- m->seg_len = fixed16_len(m->segname);
- m->sect_len = fixed16_len(m->sectname);
- m->addr = rd_u64_le(sp + 32);
- m->size = rd_u64_le(sp + 40);
- m->fileoff = rd_u32_le(sp + 48);
- m->align_log2 = rd_u32_le(sp + 52);
- m->reloff = rd_u32_le(sp + 56);
- m->nreloc = rd_u32_le(sp + 60);
- m->flags = rd_u32_le(sp + 64);
- m->reserved2 = rd_u32_le(sp + 72);
- }
- } else if (cmd == LC_SYMTAB) {
- symoff = rd_u32_le(data + pos + 8);
- nsyms = rd_u32_le(data + pos + 12);
- stroff = rd_u32_le(data + pos + 16);
- strsize = rd_u32_le(data + pos + 20);
- }
- pos += cmdsize;
- }
-
- if (stroff + (u64)strsize > len)
- compiler_panic(c, no_loc(), "read_macho: string table out of range");
- if (symoff + (u64)nsyms * MACHO_NLIST64_SIZE > len)
- compiler_panic(c, no_loc(), "read_macho: symbol table out of range");
- const u8* strtab = data + stroff;
-
- ObjBuilder* ob = obj_new(c);
- if (!ob) compiler_panic(c, no_loc(), "read_macho: obj_new failed");
-
- /* ---- pass 2: create ObjSecs and copy bytes. */
- for (u32 i = 0; i < nmsecs; ++i) {
- MSecRec* m = &msecs[i];
- /* Build "__SEG,__sect"-form name; matches what emit_macho would
- * round-trip back out. */
- char nmbuf[34];
- u32 nlen = 0;
- memcpy(nmbuf + nlen, m->segname, m->seg_len);
- nlen += m->seg_len;
- nmbuf[nlen++] = ',';
- memcpy(nmbuf + nlen, m->sectname, m->sect_len);
- nlen += m->sect_len;
- Sym sn = pool_intern_slice(c->global, (Slice){ .s = nmbuf, .len = nlen });
-
- u16 kind = sec_kind_from_seg_sect(m->segname, m->seg_len, m->sectname,
- m->sect_len, m->flags);
- u16 flags = sec_flags_from(m->flags, kind);
- u16 sem = sec_sem_from(m->flags, kind);
- u32 align = 1u << (m->align_log2 & 31);
-
- ObjSecId id = obj_section_ex(ob, sn, (SecKind)kind, (SecSem)sem, flags,
- align, m->reserved2, 0, 0);
- if (id == OBJ_SEC_NONE)
- compiler_panic(c, no_loc(), "read_macho: obj_section_ex failed");
-
- /* Preserve the raw mach section.flags so emit_macho can write back
- * the same S_TYPE / S_ATTR_* bits. */
- obj_section_set_ext(ob, id, OBJ_EXT_MACHO, m->flags, 0);
-
- if (sem == SSEM_NOBITS) {
- obj_reserve_bss(ob, id, (u32)m->size, align);
- } else if (m->size) {
- if (m->fileoff + m->size > len)
- compiler_panic(c, no_loc(), "read_macho: section bytes out of range");
- obj_write(ob, id, data + m->fileoff, (size_t)m->size);
- }
- m->obj_sec = id;
- }
-
- /* ---- pass 3: parse symbol table. Two-pass strategy: first pass
- * creates undefs (so relocations can refer to them), second
- * pass creates defined locals/extdefs. Both write into
- * mach_idx -> ObjSymId so reloc resolution works. */
- ObjSymId* sym_macho_to_obj =
- arena_zarray(c->scratch, ObjSymId, nsyms ? nsyms : 1);
-
- const u8* sbase = data + symoff;
- for (u32 i = 0; i < nsyms; ++i) {
- const u8* p = sbase + (u64)i * MACHO_NLIST64_SIZE;
- u32 strx = rd_u32_le(p + 0);
- u8 n_type = p[4];
- u8 n_sect = p[5];
- u16 n_desc = rd_u16_le(p + 6);
- u64 n_value = rd_u64_le(p + 8);
-
- const char* nm = "";
- u32 nlen = 0;
- if (strx < strsize) {
- nm = (const char*)(strtab + strx);
- while (strx + nlen < strsize && nm[nlen]) ++nlen;
- }
- /* Mach-O names round-trip verbatim — the leading `_` Apple
- * toolchains apply to C symbols is part of the on-disk name as
- * far as ObjBuilder is concerned. Name-canonicalization (the
- * `test_main` ↔ `_test_main` mapping for API callers) happens
- * one layer up at the linker API boundary (link_c_name_intern
- * in link.c); the on-disk shape stays byte-for-byte stable. */
- Sym sn = nlen ? pool_intern_slice(c->global, (Slice){ .s = nm, .len = nlen }) : 0;
-
- u8 type_field = (u8)(n_type & N_TYPE);
- u8 ext = (u8)(n_type & N_EXT);
- u8 pext = (u8)(n_type & N_PEXT);
-
- u16 bind = ext ? SB_GLOBAL : SB_LOCAL;
- /* Weak DEFs (defined symbols) carry N_WEAK_DEF; weak REFs (undef
- * `__attribute__((weak))` references) carry N_WEAK_REF. Either
- * one collapses to SB_WEAK in the cfree model. */
- if (ext && (n_desc & (N_WEAK_DEF | N_WEAK_REF))) bind = SB_WEAK;
- u8 vis = pext ? SV_HIDDEN : SV_DEFAULT;
-
- u16 kind;
- ObjSecId sec_id = OBJ_SEC_NONE;
- u64 value = 0;
- u64 size = 0;
- u64 cmnalign = 0;
-
- if (type_field == N_UNDF) {
- if (ext && n_value != 0) {
- /* Common: n_value is size, n_desc encodes log2(align) in
- * GET_COMM_ALIGN bits. */
- kind = SK_COMMON;
- value = 0;
- size = n_value;
- u32 la = (u32)((n_desc >> 8) & 0xf);
- cmnalign = 1u << la;
- } else {
- kind = SK_UNDEF;
- }
- } else if (type_field == N_ABS) {
- kind = SK_ABS;
- value = n_value;
- } else if (type_field == N_SECT) {
- if (n_sect == 0 || n_sect > nmsecs) {
- kind = SK_NOTYPE;
- } else {
- sec_id = msecs[n_sect - 1].obj_sec;
- /* Mach-O n_value for defined symbols is segment-relative addr;
- * convert back to a section-local offset. */
- u64 base = msecs[n_sect - 1].addr;
- value = (n_value >= base) ? (n_value - base) : 0;
- kind = (msecs[n_sect - 1].flags & S_ATTR_PURE_INSTRUCTIONS) ? SK_FUNC
- : SK_OBJ;
- }
- } else {
- kind = SK_NOTYPE;
- }
-
- ObjSymId id = obj_symbol_ex(ob, sn, (SymBind)bind, (SymVis)vis,
- (SymKind)kind, sec_id, value, size, cmnalign);
- obj_sym_mark_referenced(ob, id);
- /* n_desc carries Mach-O attribute bits beyond what bind/vis/kind
- * model — N_NO_DEAD_STRIP, N_REF_TO_WEAK, N_ARM_THUMB_DEF, etc.
- * Mask off the bits we already round-trip via bind (N_WEAK_DEF /
- * N_WEAK_REF) and the alignment field for commons (which lives
- * in cmnalign), then stash the remainder so emit_macho can OR it
- * back in. */
- u16 desc_pass = n_desc;
- desc_pass &= (u16)~(N_WEAK_DEF | N_WEAK_REF);
- if (kind == SK_COMMON) desc_pass &= 0x00ff; /* drop align field */
- if (desc_pass) obj_symbol_set_flags(ob, id, desc_pass);
- sym_macho_to_obj[i] = id;
- }
-
- /* ---- pass 4: parse per-section relocations into ObjBuilder relocs.
- * Mach-O encodes addends out-of-band as a leading
- * ARM64_RELOC_ADDEND followed by the real reloc; the
- * reader collapses the pair on the way in. */
- /* Lazily-populated section-start local symbols, for clang-emitted
- * non-extern (section-relative) relocations. See the r_extern==0
- * branch below for the encoding. */
- ObjSymId* sec_start_sym =
- arena_zarray(c->scratch, ObjSymId, nmsecs ? nmsecs : 1);
- for (u32 i = 0; i < nmsecs; ++i) sec_start_sym[i] = OBJ_SYM_NONE;
- for (u32 i = 0; i < nmsecs; ++i) {
- MSecRec* m = &msecs[i];
- if (!m->nreloc) continue;
- if (m->reloff + (u64)m->nreloc * MACHO_RELOC_SIZE > len)
- compiler_panic(c, no_loc(),
- "read_macho: relocation table out of range");
- const u8* rp = data + m->reloff;
- i64 pending_addend = 0;
- int have_pending = 0;
- int pending_subtractor = 0;
- u32 pending_subtractor_offset = 0;
- u32 pending_subtractor_length = 0;
- for (u32 j = 0; j < m->nreloc; ++j) {
- u32 r_address = rd_u32_le(rp + j * MACHO_RELOC_SIZE);
- u32 packed = rd_u32_le(rp + j * MACHO_RELOC_SIZE + 4);
- u32 r_symbolnum = packed & 0x00ffffffu;
- u32 r_pcrel = (packed >> 24) & 1u;
- u32 r_length = (packed >> 25) & 3u;
- u32 r_extern = (packed >> 27) & 1u;
- u32 r_type = (packed >> 28) & 0xfu;
-
- if (r_type == ARM64_RELOC_ADDEND) {
- /* Sign-extend 24-bit addend. */
- i32 ad = (i32)(r_symbolnum & 0x00ffffffu);
- if (ad & 0x00800000) ad |= ~0x00ffffff;
- pending_addend = (i64)ad;
- have_pending = 1;
- continue;
- }
-
- u32 kind;
- if (r_type == ARM64_RELOC_SUBTRACTOR) {
- kind = (r_length == 3) ? R_RV_SUB64
- : (r_length == 2) ? R_RV_SUB32
- : (r_length == 1) ? R_RV_SUB16
- : R_RV_SUB8;
- } else {
- kind = macho->reloc_from(r_type);
- }
- if (kind == (u32)-1)
- compiler_panic(c, no_loc(),
- "read_macho: unsupported reloc type %u", r_type);
-
- /* Refine kind by (r_pcrel, r_length) when the type field alone
- * is ambiguous. ARM64_RELOC_UNSIGNED collapses R_ABS64/R_ABS32
- * and PC-relative variants. */
- if (r_type == ARM64_RELOC_UNSIGNED) {
- if (pending_subtractor && pending_subtractor_offset == r_address &&
- pending_subtractor_length == r_length) {
- kind = (r_length == 3) ? R_RV_ADD64
- : (r_length == 2) ? R_RV_ADD32
- : (r_length == 1) ? R_RV_ADD16
- : R_RV_ADD8;
- pending_subtractor = 0;
- } else if (r_pcrel) {
- kind = (r_length == 3) ? R_PC64 : R_PC32;
- } else {
- kind = (r_length == 3) ? R_ABS64 : R_ABS32;
- }
- } else if (r_type == ARM64_RELOC_BRANCH26) {
- kind = R_AARCH64_CALL26;
- } else if (r_type == ARM64_RELOC_PAGEOFF12) {
- /* PAGEOFF12 is access-size-agnostic in Mach-O; the linker
- * applier needs to scale the immediate by the load/store size
- * (or apply it raw for ADD). Inspect the patched instruction
- * at r_address to pick the right RelocKind so the applier in
- * link_reloc.c shifts the lo12 correctly. */
- if (m->fileoff + r_address + 4u > len)
- compiler_panic(c, no_loc(),
- "read_macho: PAGEOFF12 r_address %u out of range",
- r_address);
- u32 ins = rd_u32_le(data + m->fileoff + r_address);
- /* ADD (immediate): bits 30:24 = 0010001 (W=10001 / X=10010001).
- * Mask 0x7f800000 isolates sf=0/1 + the 0010001 pattern; values
- * 0x11000000 (32-bit) and 0x91000000 (64-bit) — match the latter
- * via the same 0x7f mask leaving bit 31 free. */
- if ((ins & 0x7f800000u) == 0x11000000u) {
- kind = R_AARCH64_ADD_ABS_LO12_NC;
- } else if ((ins & 0x3b000000u) == 0x39000000u) {
- /* LDR/STR (immediate unsigned offset). Bits 29:27=111, bit 26=V
- * (0=integer, 1=SIMD/FP), bits 25:24=01. size in [31:30] plus
- * opc bit 23 for the SIMD 128-bit case (size=00, opc=11). */
- u32 sz = (ins >> 30) & 3u;
- u32 v_bit = (ins >> 26) & 1u;
- u32 opc1 = (ins >> 23) & 1u;
- if (v_bit && sz == 0 && opc1) {
- kind = R_AARCH64_LDST128_ABS_LO12_NC;
- } else {
- kind = (sz == 0) ? R_AARCH64_LDST8_ABS_LO12_NC
- : (sz == 1) ? R_AARCH64_LDST16_ABS_LO12_NC
- : (sz == 2) ? R_AARCH64_LDST32_ABS_LO12_NC
- : R_AARCH64_LDST64_ABS_LO12_NC;
- }
- }
- /* else: leave as the default R_AARCH64_ADD_ABS_LO12_NC. */
- }
-
- ObjSymId target = OBJ_SYM_NONE;
- i64 inplace_addend_override = 0;
- int use_inplace_addend = 0;
- if (r_extern) {
- if (r_symbolnum < nsyms) target = sym_macho_to_obj[r_symbolnum];
- if (!have_pending && r_type == ARM64_RELOC_UNSIGNED) {
- u32 rsz = 1u << r_length;
- if ((u64)m->fileoff + r_address + rsz > len)
- compiler_panic(c, no_loc(),
- "read_macho: extern unsigned reloc r_address out "
- "of range");
- const u8* pv = data + m->fileoff + r_address;
- u64 inplace;
- if (r_length == 3) inplace = rd_u64_le(pv);
- else if (r_length == 2) inplace = (u64)rd_u32_le(pv);
- else if (r_length == 1) inplace = (u64)rd_u16_le(pv);
- else inplace = (u64)pv[0];
- inplace_addend_override = (i64)inplace;
- use_inplace_addend = 1;
- }
- } else {
- /* Section-relative reloc — clang emits these for compact unwind,
- * EH frame, and DWARF debug info. r_symbolnum is the 1-based
- * section index; the in-place value at r_address is the absolute
- * .o virtual address of the referent. Synthesize a local
- * symbol pointing to the target section's start (lazily, once
- * per section) and re-express the reloc as
- * target = sec_start_sym, addend = inplace - section.addr. */
- if (r_symbolnum == 0 || r_symbolnum > nmsecs)
- compiler_panic(c, no_loc(),
- "read_macho: section-relative reloc references "
- "invalid section index %u",
- r_symbolnum);
- u32 sec_idx = r_symbolnum - 1u;
- MSecRec* tm = &msecs[sec_idx];
- if (sec_start_sym[sec_idx] == OBJ_SYM_NONE) {
- /* Build ".Lcfree.macho_secstart.<sec_idx>" without snprintf
- * (the freestanding build doesn't pull in stdio). */
- static const char prefix[] = ".Lcfree.macho_secstart.";
- char nmbuf[sizeof(prefix) + 10];
- u32 nlen = (u32)(sizeof(prefix) - 1);
- memcpy(nmbuf, prefix, nlen);
- char dec[10];
- u32 dn = 0;
- u32 v = sec_idx;
- do {
- dec[dn++] = (char)('0' + (v % 10u));
- v /= 10u;
- } while (v);
- for (u32 k = 0; k < dn; ++k) nmbuf[nlen + k] = dec[dn - 1 - k];
- nlen += dn;
- Sym sn = pool_intern_slice(c->global, (Slice){ .s = nmbuf, .len = nlen });
- u16 sk = (tm->flags & S_ATTR_PURE_INSTRUCTIONS) ? SK_FUNC : SK_OBJ;
- sec_start_sym[sec_idx] = obj_symbol(ob, sn, SB_LOCAL, (SymKind)sk,
- tm->obj_sec, 0, 0);
- }
- target = sec_start_sym[sec_idx];
- u32 rsz = 1u << r_length;
- if ((u64)m->fileoff + r_address + rsz > len)
- compiler_panic(c, no_loc(),
- "read_macho: non-extern reloc r_address out of range");
- u64 inplace;
- const u8* pv = data + m->fileoff + r_address;
- if (r_length == 3) inplace = rd_u64_le(pv);
- else if (r_length == 2) inplace = (u64)rd_u32_le(pv);
- else if (r_length == 1) inplace = (u64)rd_u16_le(pv);
- else inplace = (u64)pv[0];
- inplace_addend_override = (i64)inplace - (i64)tm->addr;
- use_inplace_addend = 1;
- }
-
- i64 addend = have_pending ? pending_addend
- : (use_inplace_addend ? inplace_addend_override
- : 0);
- int has_explicit = have_pending || use_inplace_addend || addend != 0;
- have_pending = 0;
- pending_addend = 0;
-
- obj_reloc_ex(ob, m->obj_sec, r_address, (RelocKind)kind, target,
- addend, has_explicit, 0);
- if (r_type == ARM64_RELOC_SUBTRACTOR) {
- pending_subtractor = 1;
- pending_subtractor_offset = r_address;
- pending_subtractor_length = r_length;
- }
- }
- }
-
- obj_finalize(ob);
- return ob;
-}
-
-/* ---- read_macho_dso ----
- *
- * MH_DYLIB reader. Walks load commands once to find LC_ID_DYLIB
- * (install-name) and LC_SYMTAB (symbol table + string table), then
- * emits one defined ObjSym per externally-visible nlist entry.
- *
- * Like read_elf_dso, the produced ObjBuilder carries no sections /
- * relocations / groups — only symbol definitions in OBJ_SEC_NONE. The
- * consumer's resolve_undefs sees these as defined globals and marks the
- * matching consumer-side undef as `imported`. The dylib's own undefs
- * (its imports of other dylibs) are filtered: they don't satisfy any
- * undef in the consumer. */
-
-ObjBuilder* read_macho_dso(Compiler* c, const char* name, const u8* data,
- size_t len, Sym* install_name_out) {
- (void)name;
- if (install_name_out) *install_name_out = 0;
- if (len < MACHO_HDR64_SIZE)
- compiler_panic(c, no_loc(), "read_macho_dso: input shorter than header");
-
- u32 magic = rd_u32_le(data + 0);
- if (magic != MH_MAGIC_64)
- compiler_panic(c, no_loc(), "read_macho_dso: bad magic 0x%x", magic);
-
- u32 cputype = rd_u32_le(data + 4);
- u32 filetype = rd_u32_le(data + 12);
- u32 ncmds = rd_u32_le(data + 16);
- u32 sizeofcmds = rd_u32_le(data + 20);
-
- if (!arch_lookup_macho_cputype(cputype))
- compiler_panic(c, no_loc(), "read_macho_dso: unsupported cputype 0x%x",
- cputype);
- if (filetype != MH_DYLIB && filetype != MH_BUNDLE)
- compiler_panic(c, no_loc(),
- "read_macho_dso: not MH_DYLIB/MH_BUNDLE (filetype=%u)",
- filetype);
- if ((u64)MACHO_HDR64_SIZE + sizeofcmds > len)
- compiler_panic(c, no_loc(), "read_macho_dso: load commands exceed file");
-
- u32 symoff = 0, nsyms = 0, stroff = 0, strsize = 0;
- Sym install_name = 0;
-
- u64 pos = MACHO_HDR64_SIZE;
- u64 end = pos + sizeofcmds;
- for (u32 ci = 0; ci < ncmds && pos + 8 <= end; ++ci) {
- u32 cmd = rd_u32_le(data + pos);
- u32 cmdsize = rd_u32_le(data + pos + 4);
- if (cmdsize < 8 || pos + cmdsize > end)
- compiler_panic(c, no_loc(), "read_macho_dso: malformed load command");
- if (cmd == LC_ID_DYLIB) {
- /* dylib_command: cmd, cmdsize, name(lc_str: 4-byte offset within
- * the cmd), timestamp, current_version, compat_version. */
- if (cmdsize < 24) goto next;
- u32 nm_off = rd_u32_le(data + pos + 8);
- if (nm_off >= cmdsize) goto next;
- const char* p = (const char*)(data + pos + nm_off);
- u32 maxlen = cmdsize - nm_off;
- u32 nlen = 0;
- while (nlen < maxlen && p[nlen]) ++nlen;
- if (nlen) install_name = pool_intern_slice(c->global, (Slice){ .s = p, .len = nlen });
- } else if (cmd == LC_SYMTAB) {
- symoff = rd_u32_le(data + pos + 8);
- nsyms = rd_u32_le(data + pos + 12);
- stroff = rd_u32_le(data + pos + 16);
- strsize = rd_u32_le(data + pos + 20);
- }
- next:
- pos += cmdsize;
- }
- if (install_name_out) *install_name_out = install_name;
-
- if (stroff + (u64)strsize > len)
- compiler_panic(c, no_loc(), "read_macho_dso: string table out of range");
- if (symoff + (u64)nsyms * MACHO_NLIST64_SIZE > len)
- compiler_panic(c, no_loc(), "read_macho_dso: symbol table out of range");
-
- ObjBuilder* ob = obj_new(c);
- if (!ob) compiler_panic(c, no_loc(), "read_macho_dso: obj_new failed");
-
- const u8* strtab = data + stroff;
- const u8* sbase = data + symoff;
- for (u32 i = 0; i < nsyms; ++i) {
- const u8* p = sbase + (u64)i * MACHO_NLIST64_SIZE;
- u32 strx = rd_u32_le(p + 0);
- u8 n_type = p[4];
- u16 n_desc = rd_u16_le(p + 6);
-
- u8 type_field = (u8)(n_type & N_TYPE);
- u8 ext = (u8)(n_type & N_EXT);
- /* Skip non-external (locals) and undef refs (the dylib's own imports). */
- if (!ext) continue;
- if (type_field == N_UNDF) continue;
- /* N_INDR / N_PBUD / N_STAB: skip — not interesting for static link. */
- if (n_type & N_STAB) continue;
-
- if (strx >= strsize) continue;
- const char* nm = (const char*)(strtab + strx);
- u32 nlen = 0;
- while (strx + nlen < strsize && nm[nlen]) ++nlen;
- if (!nlen) continue;
- Sym sn = pool_intern_slice(c->global, (Slice){ .s = nm, .len = nlen });
-
- SymBind bind = (n_desc & (N_WEAK_DEF | N_WEAK_REF)) ? SB_WEAK : SB_GLOBAL;
- SymKind kind = SK_NOTYPE;
- /* Mach-O dylib nlist doesn't carry STT_FUNC / STT_OBJECT cleanly —
- * default to NOTYPE. The consuming linker uses dso_export_is_func
- * to peek at this for ELF; for Mach-O the `imported` decision flows
- * through synthetic __got / __stubs regardless of kind. */
- {
- ObjSymId did =
- obj_symbol_ex(ob, sn, bind, SV_DEFAULT, kind, OBJ_SEC_NONE, 0, 0, 0);
- obj_sym_mark_referenced(ob, did);
- }
- }
-
- obj_finalize(ob);
- return ob;
-}
diff --git a/src/obj/macho_reloc_aarch64.c b/src/obj/macho_reloc_aarch64.c
@@ -1,113 +0,0 @@
-/* RelocKind <-> arm64 Mach-O reloc-type mapping. Mirror of
- * elf_reloc_aarch64.c for Mach-O.
- *
- * Mach-O relocations carry three independent fields that the cfree
- * RelocKind enum collapses into a single value: r_type (the 4-bit
- * ARM64_RELOC_* code), r_pcrel, and r_length. The translator therefore
- * exposes three accessors — the writer (macho_emit.c) consults all of
- * them per Reloc, and the reader (macho_read.c) inverts via
- * macho_aarch64_reloc_from which keys on (r_type, r_pcrel, r_length). */
-
-#include "core/util.h"
-#include "obj/macho.h"
-
-u32 macho_aarch64_reloc_to(u32 kind /* RelocKind */) {
- switch (kind) {
- case R_NONE:
- return (u32)-1;
- case R_ABS64:
- case R_ABS32:
- return ARM64_RELOC_UNSIGNED;
- case R_REL64:
- case R_REL32:
- case R_PC64:
- case R_PC32:
- /* PC-relative absolute pointer-difference; encoded as
- * UNSIGNED with r_pcrel=1, length=3/2. */
- return ARM64_RELOC_UNSIGNED;
- case R_AARCH64_JUMP26:
- case R_AARCH64_CALL26:
- return ARM64_RELOC_BRANCH26;
- case R_AARCH64_ADR_PREL_PG_HI21:
- case R_AARCH64_ADR_PREL_PG_HI21_NC:
- return ARM64_RELOC_PAGE21;
- case R_AARCH64_ADD_ABS_LO12_NC:
- case R_AARCH64_LDST8_ABS_LO12_NC:
- case R_AARCH64_LDST16_ABS_LO12_NC:
- case R_AARCH64_LDST32_ABS_LO12_NC:
- case R_AARCH64_LDST64_ABS_LO12_NC:
- case R_AARCH64_LDST128_ABS_LO12_NC:
- return ARM64_RELOC_PAGEOFF12;
- case R_AARCH64_ADR_GOT_PAGE:
- return ARM64_RELOC_GOT_LOAD_PAGE21;
- case R_AARCH64_LD64_GOT_LO12_NC:
- return ARM64_RELOC_GOT_LOAD_PAGEOFF12;
- case R_AARCH64_TLVP_LOAD_PAGE21:
- return ARM64_RELOC_TLVP_LOAD_PAGE21;
- case R_AARCH64_TLVP_LOAD_PAGEOFF12:
- return ARM64_RELOC_TLVP_LOAD_PAGEOFF12;
- default:
- return (u32)-1;
- }
-}
-
-u32 macho_aarch64_reloc_pcrel(u32 kind /* RelocKind */) {
- switch (kind) {
- case R_REL64:
- case R_REL32:
- case R_PC64:
- case R_PC32:
- case R_AARCH64_JUMP26:
- case R_AARCH64_CALL26:
- case R_AARCH64_ADR_PREL_PG_HI21:
- case R_AARCH64_ADR_PREL_PG_HI21_NC:
- case R_AARCH64_ADR_GOT_PAGE:
- case R_AARCH64_TLVP_LOAD_PAGE21:
- return 1;
- default:
- return 0;
- }
-}
-
-u32 macho_aarch64_reloc_length(u32 kind /* RelocKind */) {
- /* log2 of the patch width in bytes: 0=byte, 1=hword, 2=word, 3=quad.
- * AArch64 instructions are 4 bytes and Mach-O encodes any 32-bit fixup
- * (BRANCH26, PAGE21, PAGEOFF12, ...) with length=2. */
- switch (kind) {
- case R_ABS64:
- case R_REL64:
- case R_PC64:
- return 3;
- default:
- return 2;
- }
-}
-
-u32 macho_aarch64_reloc_from(u32 macho_type) {
- /* The (r_type, r_pcrel, r_length) tuple disambiguates several kinds
- * collapsed by macho_aarch64_reloc_to. The reader inspects pcrel and
- * length itself when it matters; this function only maps the type
- * field, returning the most common AArch64 instance for each. Reader
- * callers refine via the pcrel/length companion if they need to
- * distinguish R_ABS64 vs R_PC64 (both UNSIGNED). */
- switch (macho_type) {
- case ARM64_RELOC_UNSIGNED:
- return R_ABS64;
- case ARM64_RELOC_BRANCH26:
- return R_AARCH64_CALL26;
- case ARM64_RELOC_PAGE21:
- return R_AARCH64_ADR_PREL_PG_HI21;
- case ARM64_RELOC_PAGEOFF12:
- return R_AARCH64_ADD_ABS_LO12_NC;
- case ARM64_RELOC_GOT_LOAD_PAGE21:
- return R_AARCH64_ADR_GOT_PAGE;
- case ARM64_RELOC_GOT_LOAD_PAGEOFF12:
- return R_AARCH64_LD64_GOT_LO12_NC;
- case ARM64_RELOC_TLVP_LOAD_PAGE21:
- return R_AARCH64_TLVP_LOAD_PAGE21;
- case ARM64_RELOC_TLVP_LOAD_PAGEOFF12:
- return R_AARCH64_TLVP_LOAD_PAGEOFF12;
- default:
- return (u32)-1;
- }
-}
diff --git a/src/obj/macho_reloc_x86_64.c b/src/obj/macho_reloc_x86_64.c
@@ -1,84 +0,0 @@
-#include "core/util.h"
-#include "obj/macho.h"
-
-u32 macho_x86_64_reloc_to(u32 kind /* RelocKind */) {
- switch (kind) {
- case R_NONE:
- return (u32)-1;
- case R_ABS64:
- case R_ABS32:
- return X86_64_RELOC_UNSIGNED;
- case R_PC32:
- case R_REL32:
- case R_PC64:
- case R_REL64:
- case R_X64_PC8:
- return X86_64_RELOC_SIGNED;
- case R_PLT32:
- case R_X64_PLT32:
- return X86_64_RELOC_BRANCH;
- case R_X64_GOTPCRELX:
- case R_X64_REX_GOTPCRELX:
- return X86_64_RELOC_GOT_LOAD;
- case R_X64_GOTPCREL:
- return X86_64_RELOC_GOT;
- case R_X64_TPOFF32:
- return X86_64_RELOC_TLV;
- default:
- return (u32)-1;
- }
-}
-
-u32 macho_x86_64_reloc_pcrel(u32 kind /* RelocKind */) {
- switch (kind) {
- case R_PC32:
- case R_REL32:
- case R_PC64:
- case R_REL64:
- case R_X64_PC8:
- case R_PLT32:
- case R_X64_PLT32:
- case R_X64_GOTPCREL:
- case R_X64_GOTPCRELX:
- case R_X64_REX_GOTPCRELX:
- case R_X64_TPOFF32:
- return 1;
- default:
- return 0;
- }
-}
-
-u32 macho_x86_64_reloc_length(u32 kind /* RelocKind */) {
- switch (kind) {
- case R_ABS64:
- case R_PC64:
- case R_REL64:
- return 3;
- case R_X64_PC8:
- return 0;
- default:
- return 2;
- }
-}
-
-u32 macho_x86_64_reloc_from(u32 macho_type) {
- switch (macho_type) {
- case X86_64_RELOC_UNSIGNED:
- return R_ABS64;
- case X86_64_RELOC_SIGNED:
- case X86_64_RELOC_SIGNED_1:
- case X86_64_RELOC_SIGNED_2:
- case X86_64_RELOC_SIGNED_4:
- return R_PC32;
- case X86_64_RELOC_BRANCH:
- return R_X64_PLT32;
- case X86_64_RELOC_GOT_LOAD:
- return R_X64_REX_GOTPCRELX;
- case X86_64_RELOC_GOT:
- return R_X64_GOTPCREL;
- case X86_64_RELOC_TLV:
- return R_X64_TPOFF32;
- default:
- return (u32)-1;
- }
-}
diff --git a/src/obj/registry.c b/src/obj/registry.c
@@ -1,15 +1,22 @@
-#include "obj/format.h"
-
#include <cfree/config.h>
#include <string.h>
#include "core/slice.h"
+#include "obj/coff/coff.h"
+#include "obj/elf/elf.h"
+#include "obj/format.h"
+#include "obj/macho/macho.h"
#include "obj/obj.h"
void link_emit_elf(LinkImage*, Writer*);
void link_emit_macho(LinkImage*, Writer*);
void link_emit_coff(LinkImage*, Writer*);
+#if CFREE_OBJ_ELF_ENABLED
+void layout_dyn(Linker*, LinkImage*);
+void link_dyn_state_free(LinkImage*);
+#endif
+
#if CFREE_OBJ_COFF_ENABLED
int coff_classify_obj_input(Compiler*, ObjBuilder*, Sym* soname_out);
Sym coff_archive_hint(Compiler*, const char* archive_name);
@@ -18,6 +25,186 @@ ObjFormatArchiveAction coff_archive_member(Compiler*,
ObjBuilder** out);
#endif
+#if CFREE_ARCH_AA64_ENABLED
+void aa64_emit_macho_stub(u8* dst, u64 stub_vaddr, u64 got_slot_vaddr);
+void aa64_emit_coff_iat_stub(u8* dst, u64 stub_vaddr, u64 iat_slot_vaddr);
+#endif
+#if CFREE_ARCH_X64_ENABLED
+void x64_emit_coff_iat_stub(u8* dst, u64 stub_vaddr, u64 iat_slot_vaddr);
+#endif
+
+#if CFREE_OBJ_ELF_ENABLED
+static const ObjElfArchOps obj_elf_arch_ops[] = {
+#if CFREE_ARCH_AA64_ENABLED
+ {
+ .arch = CFREE_ARCH_ARM_64,
+ .e_machine = EM_AARCH64,
+ .e_flags = 0,
+ .default_musl_interp = "/lib/ld-musl-aarch64.so.1",
+ .r_relative = ELF_R_AARCH64_RELATIVE,
+ .r_glob_dat = ELF_R_AARCH64_GLOB_DAT,
+ .r_jump_slot = ELF_R_AARCH64_JUMP_SLOT,
+ .reloc_to = elf_aarch64_reloc_to,
+ .reloc_from = elf_aarch64_reloc_from,
+ },
+#endif
+#if CFREE_ARCH_X64_ENABLED
+ {
+ .arch = CFREE_ARCH_X86_64,
+ .e_machine = EM_X86_64,
+ .e_flags = 0,
+ .default_musl_interp = "/lib/ld-musl-x86_64.so.1",
+ .r_relative = ELF_R_X86_64_RELATIVE,
+ .r_glob_dat = ELF_R_X86_64_GLOB_DAT,
+ .r_jump_slot = ELF_R_X86_64_JUMP_SLOT,
+ .reloc_to = elf_x86_64_reloc_to,
+ .reloc_from = elf_x86_64_reloc_from,
+ },
+#endif
+#if CFREE_ARCH_RV64_ENABLED
+ {
+ .arch = CFREE_ARCH_RV64,
+ .e_machine = EM_RISCV,
+ .e_flags = EF_RISCV_RVC | EF_RISCV_FLOAT_ABI_DOUBLE,
+ .default_musl_interp = "/lib/ld-musl-riscv64.so.1",
+ .r_relative = ELF_R_RISCV_RELATIVE,
+ .r_glob_dat = ELF_R_RISCV_64,
+ .r_jump_slot = ELF_R_RISCV_JUMP_SLOT,
+ .reloc_to = elf_riscv64_reloc_to,
+ .reloc_from = elf_riscv64_reloc_from,
+ },
+#endif
+#if !CFREE_ARCH_AA64_ENABLED && !CFREE_ARCH_X64_ENABLED && \
+ !CFREE_ARCH_RV64_ENABLED
+ {.arch = CFREE_ARCH_WASM},
+#endif
+};
+
+static const ObjElfArchOps* obj_elf_arch(CfreeArchKind arch) {
+ u32 i;
+ for (i = 0; i < (u32)(sizeof obj_elf_arch_ops / sizeof obj_elf_arch_ops[0]);
+ ++i) {
+ if (obj_elf_arch_ops[i].arch == arch) return &obj_elf_arch_ops[i];
+ }
+ return NULL;
+}
+
+static const ObjElfArchOps* obj_elf_machine(u32 e_machine) {
+ u32 i;
+ for (i = 0; i < (u32)(sizeof obj_elf_arch_ops / sizeof obj_elf_arch_ops[0]);
+ ++i) {
+ if (obj_elf_arch_ops[i].e_machine &&
+ obj_elf_arch_ops[i].e_machine == e_machine)
+ return &obj_elf_arch_ops[i];
+ }
+ return NULL;
+}
+#endif
+
+#if CFREE_OBJ_MACHO_ENABLED
+static const ObjMachoArchOps obj_macho_arch_ops[] = {
+#if CFREE_ARCH_AA64_ENABLED
+ {
+ .arch = CFREE_ARCH_ARM_64,
+ .cputype = CPU_TYPE_ARM64,
+ .cpusubtype = CPU_SUBTYPE_ARM64_ALL,
+ .stub_size = 12u,
+ .emit_stub = aa64_emit_macho_stub,
+ .reloc_to = macho_aarch64_reloc_to,
+ .reloc_pcrel = macho_aarch64_reloc_pcrel,
+ .reloc_length = macho_aarch64_reloc_length,
+ .reloc_from = macho_aarch64_reloc_from,
+ },
+#endif
+#if CFREE_ARCH_X64_ENABLED
+ {
+ .arch = CFREE_ARCH_X86_64,
+ .cputype = CPU_TYPE_X86_64,
+ .cpusubtype = CPU_SUBTYPE_X86_64_ALL,
+ .stub_size = 0u,
+ .emit_stub = NULL,
+ .reloc_to = macho_x86_64_reloc_to,
+ .reloc_pcrel = macho_x86_64_reloc_pcrel,
+ .reloc_length = macho_x86_64_reloc_length,
+ .reloc_from = macho_x86_64_reloc_from,
+ },
+#endif
+#if !CFREE_ARCH_AA64_ENABLED && !CFREE_ARCH_X64_ENABLED
+ {.arch = CFREE_ARCH_WASM},
+#endif
+};
+
+static const ObjMachoArchOps* obj_macho_arch(CfreeArchKind arch) {
+ u32 i;
+ for (i = 0;
+ i < (u32)(sizeof obj_macho_arch_ops / sizeof obj_macho_arch_ops[0]);
+ ++i) {
+ if (obj_macho_arch_ops[i].arch == arch) return &obj_macho_arch_ops[i];
+ }
+ return NULL;
+}
+
+static const ObjMachoArchOps* obj_macho_cputype(u32 cputype) {
+ u32 i;
+ for (i = 0;
+ i < (u32)(sizeof obj_macho_arch_ops / sizeof obj_macho_arch_ops[0]);
+ ++i) {
+ if (obj_macho_arch_ops[i].cputype &&
+ obj_macho_arch_ops[i].cputype == cputype)
+ return &obj_macho_arch_ops[i];
+ }
+ return NULL;
+}
+#endif
+
+#if CFREE_OBJ_COFF_ENABLED
+static const ObjCoffArchOps obj_coff_arch_ops[] = {
+#if CFREE_ARCH_AA64_ENABLED
+ {
+ .arch = CFREE_ARCH_ARM_64,
+ .machine = IMAGE_FILE_MACHINE_ARM64,
+ .stub_size = 12u,
+ .emit_iat_stub = aa64_emit_coff_iat_stub,
+ .reloc_to = coff_aarch64_reloc_to,
+ .reloc_from = coff_aarch64_reloc_from,
+ },
+#endif
+#if CFREE_ARCH_X64_ENABLED
+ {
+ .arch = CFREE_ARCH_X86_64,
+ .machine = IMAGE_FILE_MACHINE_AMD64,
+ .stub_size = 6u,
+ .emit_iat_stub = x64_emit_coff_iat_stub,
+ .reloc_to = coff_x86_64_reloc_to,
+ .reloc_from = coff_x86_64_reloc_from,
+ },
+#endif
+#if !CFREE_ARCH_AA64_ENABLED && !CFREE_ARCH_X64_ENABLED
+ {.arch = CFREE_ARCH_WASM},
+#endif
+};
+
+static const ObjCoffArchOps* obj_coff_arch(CfreeArchKind arch) {
+ u32 i;
+ for (i = 0; i < (u32)(sizeof obj_coff_arch_ops / sizeof obj_coff_arch_ops[0]);
+ ++i) {
+ if (obj_coff_arch_ops[i].arch == arch) return &obj_coff_arch_ops[i];
+ }
+ return NULL;
+}
+
+static const ObjCoffArchOps* obj_coff_machine(u16 machine) {
+ u32 i;
+ if (machine == 0xA641u) machine = IMAGE_FILE_MACHINE_ARM64;
+ for (i = 0; i < (u32)(sizeof obj_coff_arch_ops / sizeof obj_coff_arch_ops[0]);
+ ++i) {
+ if (obj_coff_arch_ops[i].machine && obj_coff_arch_ops[i].machine == machine)
+ return &obj_coff_arch_ops[i];
+ }
+ return NULL;
+}
+#endif
+
static const ObjFormatImpl obj_format_impl_wasm = {
.kind = CFREE_OBJ_WASM,
.bin_fmt = CFREE_BIN_WASM,
@@ -41,6 +228,10 @@ static const ObjFormatImpl obj_format_impl_elf = {
.read = read_elf,
.read_dso = read_elf_dso,
.link_emit = link_emit_elf,
+ .layout_dyn = layout_dyn,
+ .free_dyn = link_dyn_state_free,
+ .elf_arch = obj_elf_arch,
+ .elf_machine = obj_elf_machine,
};
#endif
@@ -55,6 +246,8 @@ static const ObjFormatImpl obj_format_impl_macho = {
.read = read_macho,
.read_dso = read_macho_dso,
.link_emit = link_emit_macho,
+ .macho_arch = obj_macho_arch,
+ .macho_cputype = obj_macho_cputype,
};
#endif
@@ -69,6 +262,8 @@ static const ObjFormatImpl obj_format_impl_coff = {
.read = read_coff,
.read_dso = read_coff_dso,
.link_emit = link_emit_coff,
+ .coff_arch = obj_coff_arch,
+ .coff_machine = obj_coff_machine,
.classify_obj_input = coff_classify_obj_input,
.archive_hint = coff_archive_hint,
.archive_member = coff_archive_member,
diff --git a/test/coff/README.md b/test/coff/README.md
@@ -12,7 +12,7 @@ Hand-built `ObjBuilder` → `emit_coff` → bytes → `read_coff` → second
1. Structural equivalence between the original `ObjBuilder` and the
readback (sections, symbols, relocations, groups all preserved
modulo synthesized SECTION symbols and section-definition aux
- records — the asymmetry that `src/obj/coff_read.c` documents).
+ records — the asymmetry that `src/obj/coff/read.c` documents).
2. Byte stability across `emit_coff(read_coff(emit_coff(ob)))` — the
second emit must produce the exact bytes of the first.
diff --git a/test/coff/cfree-roundtrip-coff.c b/test/coff/cfree-roundtrip-coff.c
@@ -63,15 +63,14 @@ static CfreeDiagSink g_diag = {diag_emit, NULL, 0, 0};
static int g_failures;
static const char* g_test_name = "?";
-#define EXPECT(cond, ...) \
- do { \
- if (!(cond)) { \
- fprintf(stderr, "FAIL [%s] %s:%d: ", g_test_name, __FILE__, \
- __LINE__); \
- fprintf(stderr, __VA_ARGS__); \
- fputc('\n', stderr); \
- g_failures++; \
- } \
+#define EXPECT(cond, ...) \
+ do { \
+ if (!(cond)) { \
+ fprintf(stderr, "FAIL [%s] %s:%d: ", g_test_name, __FILE__, __LINE__); \
+ fprintf(stderr, __VA_ARGS__); \
+ fputc('\n', stderr); \
+ g_failures++; \
+ } \
} while (0)
/* ---- target builders ---------------------------------------------- */
@@ -655,8 +654,7 @@ static void test_data_with_reloc_rel32_x64(void) {
* SK_FUNC + section_id == 0 emits Type=function but the reader collapses
* to SK_UNDEF on readback (no "undef function" kind in cfree's model),
* which breaks byte stability. See CORPUS.md §10. */
- ObjSymId helper =
- obj_symbol(ob, hn, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0);
+ ObjSymId helper = obj_symbol(ob, hn, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0);
obj_reloc(ob, sec, 1, R_PC32, helper, 0);
obj_finalize(ob);
@@ -678,8 +676,8 @@ static void verify_aa64_branch26(const ObjBuilder* ob, Pool* p) {
const Reloc* r = obj_reloc_at(ob, i);
if (r->removed) continue;
if (r->section_id != text_id) continue;
- EXPECT(r->kind == R_AARCH64_CALL26,
- "branch26 reloc kind=%u (want %u)", r->kind, R_AARCH64_CALL26);
+ EXPECT(r->kind == R_AARCH64_CALL26, "branch26 reloc kind=%u (want %u)",
+ r->kind, R_AARCH64_CALL26);
++seen;
}
EXPECT(seen == 1, "branch26 reloc count=%d (want 1)", seen);
@@ -709,8 +707,7 @@ static void test_aa64_branch26(void) {
static const uint8_t bytes[8] = {0, 0, 0, 0x94, 0xc0, 0x03, 0x5f, 0xd6};
obj_write(ob, sec, bytes, sizeof bytes);
/* See reloc_rel32_x64 note on SK_UNDEF for undef symbols. */
- ObjSymId callee =
- obj_symbol(ob, cn, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0);
+ ObjSymId callee = obj_symbol(ob, cn, SB_GLOBAL, SK_UNDEF, OBJ_SEC_NONE, 0, 0);
obj_reloc(ob, sec, 0, R_AARCH64_CALL26, callee, 0);
obj_finalize(ob);
@@ -768,8 +765,7 @@ static void test_aa64_pagebase_pageoffset(void) {
obj_write(ob, tsec, txt, sizeof txt);
static const uint8_t str[6] = "hello";
obj_write(ob, rsec, str, sizeof str);
- ObjSymId kStr =
- obj_symbol(ob, kn, SB_LOCAL, SK_OBJ, rsec, 0, sizeof str);
+ ObjSymId kStr = obj_symbol(ob, kn, SB_LOCAL, SK_OBJ, rsec, 0, sizeof str);
obj_reloc(ob, tsec, 0, R_AARCH64_ADR_PREL_PG_HI21, kStr, 0);
obj_reloc(ob, tsec, 4, R_AARCH64_ADD_ABS_LO12_NC, kStr, 0);
obj_finalize(ob);
@@ -953,10 +949,8 @@ static void verify_comdat_group(const ObjBuilder* ob, Pool* p) {
const Section* dsec = find_section_named(ob, p, ".data$x");
EXPECT(tsec != NULL, ".text$x missing");
EXPECT(dsec != NULL, ".data$x missing");
- if (tsec)
- EXPECT((tsec->flags & SF_GROUP) != 0, ".text$x missing SF_GROUP");
- if (dsec)
- EXPECT((dsec->flags & SF_GROUP) != 0, ".data$x missing SF_GROUP");
+ if (tsec) EXPECT((tsec->flags & SF_GROUP) != 0, ".text$x missing SF_GROUP");
+ if (dsec) EXPECT((dsec->flags & SF_GROUP) != 0, ".data$x missing SF_GROUP");
/* COFF encodes COMDAT per-section (each member section carries its
* own section-definition aux with the selection rule); the wire
@@ -975,8 +969,7 @@ static void verify_comdat_group(const ObjBuilder* ob, Pool* p) {
}
obj_groupiter_free(it);
EXPECT(seen == 2, "expected 2 groups after COMDAT round-trip, got %d", seen);
- EXPECT(total_member_sections == 2,
- "total COMDAT member sections=%u (want 2)",
+ EXPECT(total_member_sections == 2, "total COMDAT member sections=%u (want 2)",
total_member_sections);
}
@@ -1000,16 +993,16 @@ static void test_comdat_group(void) {
/* Short section names (<= 8 bytes) — section names that overflow into
* the strtab don't round-trip COMDAT detection because the section
* symbol's name is truncated on emit but the reader compares the
- * resolved long name. See CORPUS.md §10 / src/obj/coff_read.c
+ * resolved long name. See CORPUS.md §10 / src/obj/coff/read.c
* is_section_sym logic. */
Sym tn = pool_intern_slice(p, SLICE_LIT(".text$x"));
Sym dn = pool_intern_slice(p, SLICE_LIT(".data$x"));
Sym sign = pool_intern_slice(p, SLICE_LIT("inline_fn"));
- ObjSecId tsec = obj_section(ob, tn, SEC_TEXT,
- SF_ALLOC | SF_EXEC | SF_GROUP, 16);
- ObjSecId dsec = obj_section(ob, dn, SEC_DATA,
- SF_ALLOC | SF_WRITE | SF_GROUP, 8);
+ ObjSecId tsec =
+ obj_section(ob, tn, SEC_TEXT, SF_ALLOC | SF_EXEC | SF_GROUP, 16);
+ ObjSecId dsec =
+ obj_section(ob, dn, SEC_DATA, SF_ALLOC | SF_WRITE | SF_GROUP, 8);
obj_write(ob, tsec, TEXT_X64, sizeof TEXT_X64);
static const uint8_t z8[8] = {0};
obj_write(ob, dsec, z8, sizeof z8);
@@ -1112,18 +1105,19 @@ static void test_section_symbol_synthesis(void) {
ObjBuilder* ob = obj_new(c);
Pool* p = c->global;
- ObjSecId text = obj_section(ob, pool_intern_slice(p, SLICE_LIT(".text")), SEC_TEXT,
- SF_ALLOC | SF_EXEC, 16);
+ ObjSecId text = obj_section(ob, pool_intern_slice(p, SLICE_LIT(".text")),
+ SEC_TEXT, SF_ALLOC | SF_EXEC, 16);
obj_write(ob, text, TEXT_X64, sizeof TEXT_X64);
- ObjSecId data = obj_section(ob, pool_intern_slice(p, SLICE_LIT(".data")), SEC_DATA,
- SF_ALLOC | SF_WRITE, 8);
+ ObjSecId data = obj_section(ob, pool_intern_slice(p, SLICE_LIT(".data")),
+ SEC_DATA, SF_ALLOC | SF_WRITE, 8);
static const uint8_t z8[8] = {0};
obj_write(ob, data, z8, sizeof z8);
- ObjSecId rdata = obj_section(ob, pool_intern_slice(p, SLICE_LIT(".rdata")), SEC_RODATA,
- SF_ALLOC, 8);
+ ObjSecId rdata = obj_section(ob, pool_intern_slice(p, SLICE_LIT(".rdata")),
+ SEC_RODATA, SF_ALLOC, 8);
obj_write(ob, rdata, "hi\0", 3);
- ObjSecId bss = obj_section_ex(ob, pool_intern_slice(p, SLICE_LIT(".bss")), SEC_BSS,
- SSEM_NOBITS, SF_ALLOC | SF_WRITE, 8, 0, 0, 0);
+ ObjSecId bss =
+ obj_section_ex(ob, pool_intern_slice(p, SLICE_LIT(".bss")), SEC_BSS,
+ SSEM_NOBITS, SF_ALLOC | SF_WRITE, 8, 0, 0, 0);
obj_reserve_bss(ob, bss, 16, 8);
obj_finalize(ob);
@@ -1141,8 +1135,8 @@ static void verify_tls_section(const ObjBuilder* ob, Pool* p) {
const Section* s = find_section_named(ob, p, ".tls$");
EXPECT(s != NULL, ".tls$ not present");
if (s) {
- EXPECT((s->flags & SF_TLS) != 0,
- ".tls$ missing SF_TLS (flags=0x%x)", s->flags);
+ EXPECT((s->flags & SF_TLS) != 0, ".tls$ missing SF_TLS (flags=0x%x)",
+ s->flags);
}
}
@@ -1288,15 +1282,19 @@ static void test_short_import_amd64(void) {
const uint32_t kSymLen = (uint32_t)(sizeof kSym - 1);
const uint32_t kDllLen = (uint32_t)(sizeof kDll - 1);
const uint32_t kDataLen = sizeof kSym + sizeof kDll; /* 12 + 13 = 25 */
- const size_t kTotal = 20 + kDataLen; /* 45 */
+ const size_t kTotal = 20 + kDataLen; /* 45 */
uint8_t buf[64];
EXPECT(kTotal <= sizeof buf, "buf too small");
memset(buf, 0, kTotal);
/* Header. */
- buf[0] = 0x00; buf[1] = 0x00; /* Sig1 = 0 */
- buf[2] = 0xFF; buf[3] = 0xFF; /* Sig2 = 0xFFFF */
- buf[4] = 0x00; buf[5] = 0x00; /* Version = 0 */
- buf[6] = 0x64; buf[7] = 0x86; /* Machine = AMD64 (0x8664) */
+ buf[0] = 0x00;
+ buf[1] = 0x00; /* Sig1 = 0 */
+ buf[2] = 0xFF;
+ buf[3] = 0xFF; /* Sig2 = 0xFFFF */
+ buf[4] = 0x00;
+ buf[5] = 0x00; /* Version = 0 */
+ buf[6] = 0x64;
+ buf[7] = 0x86; /* Machine = AMD64 (0x8664) */
/* TimeDateStamp = 0 (bytes 8..11 already 0). */
buf[12] = (uint8_t)(kDataLen & 0xFF);
buf[13] = (uint8_t)((kDataLen >> 8) & 0xFF);
@@ -1304,7 +1302,8 @@ static void test_short_import_amd64(void) {
buf[15] = (uint8_t)((kDataLen >> 24) & 0xFF);
/* OrdinalOrHint = 0 (16..17). */
/* TypeFlags = Type=CODE(0) | NameType=NAME(1)<<2 = 0x0004. */
- buf[18] = 0x04; buf[19] = 0x00;
+ buf[18] = 0x04;
+ buf[19] = 0x00;
/* Body: symbol name NUL DLL name NUL. */
memcpy(buf + 20, kSym, sizeof kSym);
memcpy(buf + 20 + sizeof kSym, kDll, sizeof kDll);
@@ -1335,8 +1334,7 @@ static void test_short_import_amd64(void) {
const ObjSym* s = obj_symbol_get(ob, imp_id);
EXPECT(s->bind == SB_GLOBAL, "__imp_ bind=%u (want SB_GLOBAL)", s->bind);
EXPECT(s->section_id == OBJ_SEC_NONE,
- "__imp_ section_id=%u (want OBJ_SEC_NONE)",
- (unsigned)s->section_id);
+ "__imp_ section_id=%u (want OBJ_SEC_NONE)", (unsigned)s->section_id);
}
Sym dll = 0;
@@ -1344,7 +1342,8 @@ static void test_short_import_amd64(void) {
EXPECT(got, "obj_get_coff_import_dll returned 0 (annotation missing)");
if (got) EXPECT(sym_eq_str(p, dll, kDll), "DLL name mismatch");
- (void)kSymLen; (void)kDllLen;
+ (void)kSymLen;
+ (void)kDllLen;
obj_free(ob);
cfree_compiler_free((CfreeCompiler*)c);
}
@@ -1400,4 +1399,3 @@ int main(void) {
fprintf(stderr, "OK %zu tests\n", NTESTS);
return 0;
}
-
diff --git a/test/coff/pe-dso-forwarder.c b/test/coff/pe-dso-forwarder.c
@@ -19,7 +19,7 @@
#include "core/core.h"
#include "core/pool.h"
-#include "obj/coff.h"
+#include "obj/coff/coff.h"
#include "obj/obj.h"
/* ---- env vtables --------------------------------------------------- */
@@ -54,14 +54,14 @@ static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc,
static CfreeDiagSink g_diag = {diag_emit, NULL, 0, 0};
static int g_failures;
-#define EXPECT(cond, ...) \
- do { \
- if (!(cond)) { \
- fprintf(stderr, "FAIL %s:%d: ", __FILE__, __LINE__); \
- fprintf(stderr, __VA_ARGS__); \
- fputc('\n', stderr); \
- g_failures++; \
- } \
+#define EXPECT(cond, ...) \
+ do { \
+ if (!(cond)) { \
+ fprintf(stderr, "FAIL %s:%d: ", __FILE__, __LINE__); \
+ fprintf(stderr, __VA_ARGS__); \
+ fputc('\n', stderr); \
+ g_failures++; \
+ } \
} while (0)
/* ---- compiler ----------------------------------------------------- */
@@ -119,33 +119,33 @@ static void wr_u32(uint8_t* p, uint32_t v) {
* including the forwarder target string so the reader classifies
* "OTHERDLL.OtherSym" EAT entries as forwarders. */
-#define E_LFANEW 0x40u
-#define FH_OFF (E_LFANEW + 4u)
-#define OH_OFF (FH_OFF + COFF_FILE_HEADER_SIZE)
-#define SH_OFF (OH_OFF + COFF_OPT_HDR64_SIZE)
-#define RAW_OFF 0x170u
-#define SEC_VA 0x1000u
-#define SEC_RAW_SZ 0x200u
-#define FILE_SIZE (RAW_OFF + SEC_RAW_SZ)
+#define E_LFANEW 0x40u
+#define FH_OFF (E_LFANEW + 4u)
+#define OH_OFF (FH_OFF + COFF_FILE_HEADER_SIZE)
+#define SH_OFF (OH_OFF + COFF_OPT_HDR64_SIZE)
+#define RAW_OFF 0x170u
+#define SEC_VA 0x1000u
+#define SEC_RAW_SZ 0x200u
+#define FILE_SIZE (RAW_OFF + SEC_RAW_SZ)
/* In-section offsets (relative to RAW_OFF / RVA = SEC_VA + off). */
-#define EXP_DIR_OFF 0u
-#define EAT_OFF (EXP_DIR_OFF + COFF_EXPORT_DIR_SIZE) /* +40 */
-#define EAT_COUNT 2u
-#define ENT_OFF (EAT_OFF + EAT_COUNT * 4u) /* +48 */
-#define ENT_COUNT 2u
-#define ORD_OFF (ENT_OFF + ENT_COUNT * 4u) /* +56 */
-#define DLLNAME_OFF (ORD_OFF + ENT_COUNT * 2u) /* +60 */
-
-static const char kDllName[] = "TestDll.dll";
-static const char kDirect[] = "DirectFn";
+#define EXP_DIR_OFF 0u
+#define EAT_OFF (EXP_DIR_OFF + COFF_EXPORT_DIR_SIZE) /* +40 */
+#define EAT_COUNT 2u
+#define ENT_OFF (EAT_OFF + EAT_COUNT * 4u) /* +48 */
+#define ENT_COUNT 2u
+#define ORD_OFF (ENT_OFF + ENT_COUNT * 4u) /* +56 */
+#define DLLNAME_OFF (ORD_OFF + ENT_COUNT * 2u) /* +60 */
+
+static const char kDllName[] = "TestDll.dll";
+static const char kDirect[] = "DirectFn";
static const char kForwarded[] = "ForwardedFn";
static const char kForwardTarget[] = "OTHERDLL.OtherSym";
-#define DIRECT_NAME_OFF (DLLNAME_OFF + (uint32_t)sizeof kDllName)
+#define DIRECT_NAME_OFF (DLLNAME_OFF + (uint32_t)sizeof kDllName)
#define FORWARDED_NAME_OFF (DIRECT_NAME_OFF + (uint32_t)sizeof kDirect)
-#define FORWARD_TGT_OFF (FORWARDED_NAME_OFF + (uint32_t)sizeof kForwarded)
-#define EXP_DIR_END (FORWARD_TGT_OFF + (uint32_t)sizeof kForwardTarget)
+#define FORWARD_TGT_OFF (FORWARDED_NAME_OFF + (uint32_t)sizeof kForwarded)
+#define EXP_DIR_END (FORWARD_TGT_OFF + (uint32_t)sizeof kForwardTarget)
/* Some RVA outside the export directory range — interpreted as a
* direct export pointing into the (notional) code section. */
@@ -163,10 +163,10 @@ static void build_dso(uint8_t* buf) {
/* IMAGE_FILE_HEADER. */
wr_u16(buf + FH_OFF + 0, IMAGE_FILE_MACHINE_AMD64);
- wr_u16(buf + FH_OFF + 2, 1); /* NumberOfSections */
- wr_u32(buf + FH_OFF + 4, 0); /* TimeDateStamp */
- wr_u32(buf + FH_OFF + 8, 0); /* PointerToSymbolTable */
- wr_u32(buf + FH_OFF + 12, 0); /* NumberOfSymbols */
+ wr_u16(buf + FH_OFF + 2, 1); /* NumberOfSections */
+ wr_u32(buf + FH_OFF + 4, 0); /* TimeDateStamp */
+ wr_u32(buf + FH_OFF + 8, 0); /* PointerToSymbolTable */
+ wr_u32(buf + FH_OFF + 12, 0); /* NumberOfSymbols */
wr_u16(buf + FH_OFF + 16, COFF_OPT_HDR64_SIZE);
wr_u16(buf + FH_OFF + 18, IMAGE_FILE_DLL);
@@ -174,8 +174,8 @@ static void build_dso(uint8_t* buf) {
* matter: Magic, and the export DataDirectory at index 0. */
wr_u16(buf + OH_OFF + 0, IMAGE_NT_OPTIONAL_HDR64_MAGIC);
/* Data directories live at the tail of the optional header. */
- uint32_t dd_off = OH_OFF + COFF_OPT_HDR64_SIZE
- - COFF_NUM_DATA_DIRECTORIES * COFF_DATA_DIRECTORY_SIZE;
+ uint32_t dd_off = OH_OFF + COFF_OPT_HDR64_SIZE -
+ COFF_NUM_DATA_DIRECTORIES * COFF_DATA_DIRECTORY_SIZE;
uint32_t exp_rva = SEC_VA + EXP_DIR_OFF;
uint32_t exp_size = EXP_DIR_END;
wr_u32(buf + dd_off + IMAGE_DIRECTORY_ENTRY_EXPORT * 8u + 0, exp_rva);
@@ -183,33 +183,33 @@ static void build_dso(uint8_t* buf) {
/* One section header: ".edata". */
memcpy(buf + SH_OFF + 0, ".edata\0\0", 8);
- wr_u32(buf + SH_OFF + 8, exp_size); /* VirtualSize */
- wr_u32(buf + SH_OFF + 12, SEC_VA); /* VirtualAddress */
- wr_u32(buf + SH_OFF + 16, SEC_RAW_SZ); /* SizeOfRawData */
- wr_u32(buf + SH_OFF + 20, RAW_OFF); /* PointerToRawData */
- wr_u32(buf + SH_OFF + 24, 0); /* PtrToRelocations */
- wr_u32(buf + SH_OFF + 28, 0); /* PtrToLinenumbers */
- wr_u16(buf + SH_OFF + 32, 0); /* NumberOfRelocations */
- wr_u16(buf + SH_OFF + 34, 0); /* NumberOfLinenumbers */
- wr_u32(buf + SH_OFF + 36, 0x40000040u); /* Characteristics:
- INITIALIZED_DATA |
- MEM_READ */
+ wr_u32(buf + SH_OFF + 8, exp_size); /* VirtualSize */
+ wr_u32(buf + SH_OFF + 12, SEC_VA); /* VirtualAddress */
+ wr_u32(buf + SH_OFF + 16, SEC_RAW_SZ); /* SizeOfRawData */
+ wr_u32(buf + SH_OFF + 20, RAW_OFF); /* PointerToRawData */
+ wr_u32(buf + SH_OFF + 24, 0); /* PtrToRelocations */
+ wr_u32(buf + SH_OFF + 28, 0); /* PtrToLinenumbers */
+ wr_u16(buf + SH_OFF + 32, 0); /* NumberOfRelocations */
+ wr_u16(buf + SH_OFF + 34, 0); /* NumberOfLinenumbers */
+ wr_u32(buf + SH_OFF + 36, 0x40000040u); /* Characteristics:
+ INITIALIZED_DATA |
+ MEM_READ */
/* Section raw data — written via RAW_OFF + off. */
uint8_t* sec = buf + RAW_OFF;
/* Export Directory header. */
- wr_u32(sec + EXP_DIR_OFF + 0, 0); /* Characteristics */
- wr_u32(sec + EXP_DIR_OFF + 4, 0); /* TimeDateStamp */
- wr_u16(sec + EXP_DIR_OFF + 8, 0); /* MajorVersion */
- wr_u16(sec + EXP_DIR_OFF + 10, 0); /* MinorVersion */
- wr_u32(sec + EXP_DIR_OFF + 12, SEC_VA + DLLNAME_OFF); /* Name */
- wr_u32(sec + EXP_DIR_OFF + 16, 1); /* Base */
- wr_u32(sec + EXP_DIR_OFF + 20, EAT_COUNT); /* NumberOfFunctions */
- wr_u32(sec + EXP_DIR_OFF + 24, ENT_COUNT); /* NumberOfNames */
- wr_u32(sec + EXP_DIR_OFF + 28, SEC_VA + EAT_OFF); /* AddressOfFunctions */
- wr_u32(sec + EXP_DIR_OFF + 32, SEC_VA + ENT_OFF); /* AddressOfNames */
- wr_u32(sec + EXP_DIR_OFF + 36, SEC_VA + ORD_OFF); /* AddressOfNameOrds */
+ wr_u32(sec + EXP_DIR_OFF + 0, 0); /* Characteristics */
+ wr_u32(sec + EXP_DIR_OFF + 4, 0); /* TimeDateStamp */
+ wr_u16(sec + EXP_DIR_OFF + 8, 0); /* MajorVersion */
+ wr_u16(sec + EXP_DIR_OFF + 10, 0); /* MinorVersion */
+ wr_u32(sec + EXP_DIR_OFF + 12, SEC_VA + DLLNAME_OFF); /* Name */
+ wr_u32(sec + EXP_DIR_OFF + 16, 1); /* Base */
+ wr_u32(sec + EXP_DIR_OFF + 20, EAT_COUNT); /* NumberOfFunctions */
+ wr_u32(sec + EXP_DIR_OFF + 24, ENT_COUNT); /* NumberOfNames */
+ wr_u32(sec + EXP_DIR_OFF + 28, SEC_VA + EAT_OFF); /* AddressOfFunctions */
+ wr_u32(sec + EXP_DIR_OFF + 32, SEC_VA + ENT_OFF); /* AddressOfNames */
+ wr_u32(sec + EXP_DIR_OFF + 36, SEC_VA + ORD_OFF); /* AddressOfNameOrds */
/* EAT: index 0 = direct (outside export-dir range);
* index 1 = forwarder (inside export-dir range, pointing at
diff --git a/test/elf/unit/smoke.c b/test/elf/unit/smoke.c
@@ -9,7 +9,7 @@
*
* and checks that the readback produces the same shape (modulo
* synthesized STT_SECTION symbols and section ordering — the equivalence
- * the read_elf comment in src/obj/elf_read.c documents).
+ * the read_elf comment in src/obj/elf/read.c documents).
*
* Exit 0 = pass; non-zero = fail (with a one-line stderr explanation). */
diff --git a/test/emu/rv64_extras_test.c b/test/emu/rv64_extras_test.c
@@ -27,7 +27,7 @@
#include "core/core.h"
#include "emu/emu.h"
#include "emu/rv64_ops.h"
-#include "obj/elf.h"
+#include "obj/elf/elf.h"
/* Loader side-channel — declared in elf_load.c. */
int emu_load_elf_attach(EmuCPUState*, const EmuLoadedImage*);
@@ -191,8 +191,10 @@ static void decode_rvc(void) {
EmuInst insts[8];
unsigned char buf[16];
u32 n;
- buf[0] = 0x15; buf[1] = 0x45; /* C.LI a0, 5 */
- buf[2] = 0x05; buf[3] = 0x05; /* C.ADDI a0, 1 */
+ buf[0] = 0x15;
+ buf[1] = 0x45; /* C.LI a0, 5 */
+ buf[2] = 0x05;
+ buf[3] = 0x05; /* C.ADDI a0, 1 */
((u32*)(buf + 4))[0] = rv_ecall();
n = emu_decode_block(CFREE_EMU_ARCH_RISCV64, buf, 0x10000, insts, 8);
EXPECT(n >= 3u, "RVC decode block returned %u insts", n);
@@ -201,8 +203,8 @@ static void decode_rvc(void) {
"RVC c.li -> addi a0, x0, 5 (got op=%u rd=%u imm=%lld)",
(unsigned)insts[0].op, (unsigned)insts[0].operands[0],
(long long)(i64)insts[0].operands[3]);
- EXPECT(insts[0].guest_bytes == 2u,
- "RVC insn must advance PC by 2, got %u", insts[0].guest_bytes);
+ EXPECT(insts[0].guest_bytes == 2u, "RVC insn must advance PC by 2, got %u",
+ insts[0].guest_bytes);
EXPECT(insts[1].op == RV64_OP_ADDI && (u32)insts[1].operands[0] == 10u &&
(i64)insts[1].operands[3] == 1,
"RVC c.addi -> addi a0, a0, 1");
@@ -237,27 +239,31 @@ static unsigned char* build_fp_elf(size_t* out_len) {
/* Instruction stream: 7 insns = 28 bytes. */
u32 prog[16];
size_t prog_n = 0;
- prog[prog_n++] = rv_addi(10, 0, 42); /* a0 = 42 */
- prog[prog_n++] = enc_fcvt_s_w(0, 10); /* ft0 = (float)a0 */
- prog[prog_n++] = enc_fmv_x_w(11, 0); /* a1 = bits(ft0) */
- prog[prog_n++] = enc_csrrs(12, 0x003, 0); /* a2 = fcsr */
- prog[prog_n++] = rv_addi(10, 0, 0); /* a0 = 0 (exit code) */
- prog[prog_n++] = rv_addi(17, 0, 94); /* a7 = SYS_exit_group */
- prog[prog_n++] = rv_ecall(); /* ecall */
+ prog[prog_n++] = rv_addi(10, 0, 42); /* a0 = 42 */
+ prog[prog_n++] = enc_fcvt_s_w(0, 10); /* ft0 = (float)a0 */
+ prog[prog_n++] = enc_fmv_x_w(11, 0); /* a1 = bits(ft0) */
+ prog[prog_n++] = enc_csrrs(12, 0x003, 0); /* a2 = fcsr */
+ prog[prog_n++] = rv_addi(10, 0, 0); /* a0 = 0 (exit code) */
+ prog[prog_n++] = rv_addi(17, 0, 94); /* a7 = SYS_exit_group */
+ prog[prog_n++] = rv_ecall(); /* ecall */
size_t prog_bytes = prog_n * 4u;
size_t total = TEXT_OFF + prog_bytes;
unsigned char* b = (unsigned char*)calloc(1, total);
if (!b) return NULL;
- b[EI_MAG0] = ELFMAG0; b[EI_MAG1] = ELFMAG1;
- b[EI_MAG2] = ELFMAG2; b[EI_MAG3] = ELFMAG3;
+ b[EI_MAG0] = ELFMAG0;
+ b[EI_MAG1] = ELFMAG1;
+ b[EI_MAG2] = ELFMAG2;
+ b[EI_MAG3] = ELFMAG3;
b[EI_CLASS] = ELFCLASS64;
b[EI_DATA] = ELFDATA2LSB;
b[EI_VERSION] = EV_CURRENT;
/* e_type=ET_EXEC, e_machine=EM_RISCV, e_entry, e_phoff, ... */
- unsigned* p32; unsigned long long* p64;
+ unsigned* p32;
+ unsigned long long* p64;
/* Use the same put helpers idiom from smoke_test: open-code them. */
- b[16] = ET_EXEC; b[17] = 0;
+ b[16] = ET_EXEC;
+ b[17] = 0;
b[18] = (unsigned char)EM_RISCV;
b[19] = (unsigned char)(EM_RISCV >> 8);
b[20] = EV_CURRENT;
@@ -265,19 +271,25 @@ static unsigned char* build_fp_elf(size_t* out_len) {
unsigned long long ent = BASE_VA + TEXT_OFF;
for (int i = 0; i < 8; ++i) b[24 + i] = (unsigned char)(ent >> (8 * i));
/* e_phoff = 64 */
- b[32] = 64; for (int i = 1; i < 8; ++i) b[32 + i] = 0;
+ b[32] = 64;
+ for (int i = 1; i < 8; ++i) b[32 + i] = 0;
/* e_ehsize=64, e_phentsize=56, e_phnum=1 */
- b[52] = ELF64_EHDR_SIZE; b[53] = 0;
- b[54] = ELF64_PHDR_SIZE; b[55] = 0;
- b[56] = 1; b[57] = 0;
+ b[52] = ELF64_EHDR_SIZE;
+ b[53] = 0;
+ b[54] = ELF64_PHDR_SIZE;
+ b[55] = 0;
+ b[56] = 1;
+ b[57] = 0;
/* PT_LOAD covering [0, total) at VA BASE_VA. */
b[64] = PT_LOAD; /* p_type lo */
b[64 + 4] = (unsigned char)(PF_R | PF_X);
/* p_offset = 0; p_vaddr = BASE_VA; p_paddr = BASE_VA; p_filesz = total;
* p_memsz = total; p_align = PAGE. */
- for (int i = 0; i < 8; ++i) b[64 + 16 + i] = (unsigned char)(BASE_VA >> (8 * i));
- for (int i = 0; i < 8; ++i) b[64 + 24 + i] = (unsigned char)(BASE_VA >> (8 * i));
+ for (int i = 0; i < 8; ++i)
+ b[64 + 16 + i] = (unsigned char)(BASE_VA >> (8 * i));
+ for (int i = 0; i < 8; ++i)
+ b[64 + 24 + i] = (unsigned char)(BASE_VA >> (8 * i));
unsigned long long tot = total;
for (int i = 0; i < 8; ++i) b[64 + 32 + i] = (unsigned char)(tot >> (8 * i));
for (int i = 0; i < 8; ++i) b[64 + 40 + i] = (unsigned char)(tot >> (8 * i));
@@ -286,7 +298,8 @@ static unsigned char* build_fp_elf(size_t* out_len) {
/* Copy the program bytes at file offset TEXT_OFF. */
memcpy(b + TEXT_OFF, prog, prog_bytes);
- (void)p32; (void)p64;
+ (void)p32;
+ (void)p64;
*out_len = total;
return b;
}
@@ -307,10 +320,14 @@ static void fp_csr_interp(void) {
if (!elf) return;
memset(&img, 0, sizeof img);
- int rc = emu_load_elf(cc, CFREE_EMU_ARCH_RISCV64, elf, elf_len, NULL, NULL,
- &img);
+ int rc =
+ emu_load_elf(cc, CFREE_EMU_ARCH_RISCV64, elf, elf_len, NULL, NULL, &img);
EXPECT(rc == 0, "emu_load_elf rc=%d", rc);
- if (rc != 0) { free(elf); cfree_compiler_free(c); return; }
+ if (rc != 0) {
+ free(elf);
+ cfree_compiler_free(c);
+ return;
+ }
cpu = emu_cpu_new(cc, CFREE_EMU_ARCH_RISCV64, img.entry_pc, img.initial_sp);
EXPECT(cpu != NULL, "cpu_new");
@@ -320,9 +337,15 @@ static void fp_csr_interp(void) {
for (steps = 0; steps < 64u; ++steps) {
u64 pc = emu_cpu_pc(cpu);
unsigned char* p = emu_cpu_va_to_host_pub(cpu, pc, 4);
- if (!p) { EXPECT(0, "PC OOB"); break; }
+ if (!p) {
+ EXPECT(0, "PC OOB");
+ break;
+ }
n = emu_decode_block(CFREE_EMU_ARCH_RISCV64, p, pc, insts, 16);
- if (n == 0) { EXPECT(0, "decode 0"); break; }
+ if (n == 0) {
+ EXPECT(0, "decode 0");
+ break;
+ }
emu_cpu_interp_block(cpu, insts, n);
if (emu_cpu_trap_reason(cpu) != EMU_TRAP_NONE) break;
}
@@ -334,8 +357,7 @@ static void fp_csr_interp(void) {
EXPECT(emu_cpu_xreg(cpu, 11) == 0x42280000ull,
"a1 should hold bits of (float)42 = 0x42280000, got 0x%llx",
(unsigned long long)emu_cpu_xreg(cpu, 11));
- EXPECT(emu_cpu_xreg(cpu, 12) == 0,
- "a2 fcsr starts at 0, got 0x%llx",
+ EXPECT(emu_cpu_xreg(cpu, 12) == 0, "a2 fcsr starts at 0, got 0x%llx",
(unsigned long long)emu_cpu_xreg(cpu, 12));
emu_cpu_free(cpu);
@@ -404,12 +426,15 @@ static unsigned char* build_minimal_interp_elf(size_t* out_len) {
size_t total = TEXT_OFF + 16;
unsigned char* b = (unsigned char*)calloc(1, total);
if (!b) return NULL;
- b[EI_MAG0] = ELFMAG0; b[EI_MAG1] = ELFMAG1;
- b[EI_MAG2] = ELFMAG2; b[EI_MAG3] = ELFMAG3;
+ b[EI_MAG0] = ELFMAG0;
+ b[EI_MAG1] = ELFMAG1;
+ b[EI_MAG2] = ELFMAG2;
+ b[EI_MAG3] = ELFMAG3;
b[EI_CLASS] = ELFCLASS64;
b[EI_DATA] = ELFDATA2LSB;
b[EI_VERSION] = EV_CURRENT;
- b[16] = ET_DYN; b[17] = 0;
+ b[16] = ET_DYN;
+ b[17] = 0;
b[18] = (unsigned char)EM_RISCV;
b[19] = (unsigned char)(EM_RISCV >> 8);
b[20] = EV_CURRENT;
@@ -457,12 +482,15 @@ static unsigned char* build_program_with_interp(size_t* out_len) {
size_t total = interp_off + sizeof(interp_path) + 0x100;
unsigned char* b = (unsigned char*)calloc(1, total);
if (!b) return NULL;
- b[EI_MAG0] = ELFMAG0; b[EI_MAG1] = ELFMAG1;
- b[EI_MAG2] = ELFMAG2; b[EI_MAG3] = ELFMAG3;
+ b[EI_MAG0] = ELFMAG0;
+ b[EI_MAG1] = ELFMAG1;
+ b[EI_MAG2] = ELFMAG2;
+ b[EI_MAG3] = ELFMAG3;
b[EI_CLASS] = ELFCLASS64;
b[EI_DATA] = ELFDATA2LSB;
b[EI_VERSION] = EV_CURRENT;
- b[16] = ET_EXEC; b[17] = 0;
+ b[16] = ET_EXEC;
+ b[17] = 0;
b[18] = (unsigned char)EM_RISCV;
b[19] = (unsigned char)(EM_RISCV >> 8);
b[20] = EV_CURRENT;
@@ -475,8 +503,10 @@ static unsigned char* build_program_with_interp(size_t* out_len) {
/* PT_LOAD covering [0, total) at VA BASE_VA. */
b[64] = PT_LOAD;
b[64 + 4] = (unsigned char)(PF_R | PF_X);
- for (int i = 0; i < 8; ++i) b[64 + 16 + i] = (unsigned char)(BASE_VA >> (8 * i));
- for (int i = 0; i < 8; ++i) b[64 + 24 + i] = (unsigned char)(BASE_VA >> (8 * i));
+ for (int i = 0; i < 8; ++i)
+ b[64 + 16 + i] = (unsigned char)(BASE_VA >> (8 * i));
+ for (int i = 0; i < 8; ++i)
+ b[64 + 24 + i] = (unsigned char)(BASE_VA >> (8 * i));
unsigned long long tot = total;
for (int i = 0; i < 8; ++i) b[64 + 32 + i] = (unsigned char)(tot >> (8 * i));
for (int i = 0; i < 8; ++i) b[64 + 40 + i] = (unsigned char)(tot >> (8 * i));
@@ -488,8 +518,10 @@ static unsigned char* build_program_with_interp(size_t* out_len) {
unsigned long long ioff = interp_off;
for (int i = 0; i < 8; ++i) b[ph2 + 8 + i] = (unsigned char)(ioff >> (8 * i));
unsigned long long ilen = sizeof(interp_path);
- for (int i = 0; i < 8; ++i) b[ph2 + 32 + i] = (unsigned char)(ilen >> (8 * i));
- for (int i = 0; i < 8; ++i) b[ph2 + 40 + i] = (unsigned char)(ilen >> (8 * i));
+ for (int i = 0; i < 8; ++i)
+ b[ph2 + 32 + i] = (unsigned char)(ilen >> (8 * i));
+ for (int i = 0; i < 8; ++i)
+ b[ph2 + 40 + i] = (unsigned char)(ilen >> (8 * i));
/* Program text: exit(42). */
u32 i0 = rv_addi(10, 0, 42);
u32 i1 = rv_addi(17, 0, 94);
@@ -511,7 +543,10 @@ static void pt_interp_handoff(void) {
unsigned char* prog = build_program_with_interp(&prog_len);
EXPECT(interp && prog, "buffer alloc");
if (!interp || !prog) {
- free(interp); free(prog); cfree_compiler_free(c); return;
+ free(interp);
+ free(prog);
+ cfree_compiler_free(c);
+ return;
}
/* Stage the interpreter bytes; loader consumes them on the next
@@ -524,7 +559,10 @@ static void pt_interp_handoff(void) {
&img);
EXPECT(rc == 0, "emu_load_elf with PT_INTERP rc=%d", rc);
if (rc != 0) {
- free(interp); free(prog); cfree_compiler_free(c); return;
+ free(interp);
+ free(prog);
+ cfree_compiler_free(c);
+ return;
}
/* entry_pc should be the interpreter's entry (which we placed past
* the program). The program's BASE_VA is 0x40000, so the interpreter
@@ -552,8 +590,7 @@ static void pt_interp_handoff(void) {
EXPECT(emu_cpu_trap_reason(cpu) == EMU_TRAP_EXIT,
"interp exited via EMU_TRAP_EXIT");
EXPECT(emu_cpu_exit_code(cpu) == 99,
- "interp exit code 99 (= a0 at exit), got %d",
- emu_cpu_exit_code(cpu));
+ "interp exit code 99 (= a0 at exit), got %d", emu_cpu_exit_code(cpu));
emu_cpu_free(cpu);
emu_unload_image(cc, &img);
diff --git a/test/emu/rv64_smoke_test.c b/test/emu/rv64_smoke_test.c
@@ -31,7 +31,7 @@
#include "core/core.h"
#include "emu/emu.h"
#include "emu/rv64_ops.h"
-#include "obj/elf.h"
+#include "obj/elf/elf.h"
/* The loader exposes emu_load_elf_attach via a forward decl since the
* locked include/cfree/emu.h does not expose it. cpu.c exports the
@@ -155,29 +155,29 @@ static unsigned char* build_minimal_elf(size_t* out_len) {
b[EI_DATA] = ELFDATA2LSB;
b[EI_VERSION] = EV_CURRENT;
b[EI_OSABI] = ELFOSABI_NONE;
- put16(b, 16, ET_EXEC); /* e_type */
- put16(b, 18, EM_RISCV); /* e_machine */
- put32(b, 20, EV_CURRENT); /* e_version */
- put64(b, 24, BASE_VA + TEXT_OFF); /* e_entry */
- put64(b, 32, 64); /* e_phoff */
- put64(b, 40, 0); /* e_shoff (none) */
- put32(b, 48, 0); /* e_flags */
- put16(b, 52, ELF64_EHDR_SIZE); /* e_ehsize */
- put16(b, 54, ELF64_PHDR_SIZE); /* e_phentsize */
- put16(b, 56, 1); /* e_phnum */
- put16(b, 58, 0); /* e_shentsize */
- put16(b, 60, 0); /* e_shnum */
- put16(b, 62, 0); /* e_shstrndx */
+ put16(b, 16, ET_EXEC); /* e_type */
+ put16(b, 18, EM_RISCV); /* e_machine */
+ put32(b, 20, EV_CURRENT); /* e_version */
+ put64(b, 24, BASE_VA + TEXT_OFF); /* e_entry */
+ put64(b, 32, 64); /* e_phoff */
+ put64(b, 40, 0); /* e_shoff (none) */
+ put32(b, 48, 0); /* e_flags */
+ put16(b, 52, ELF64_EHDR_SIZE); /* e_ehsize */
+ put16(b, 54, ELF64_PHDR_SIZE); /* e_phentsize */
+ put16(b, 56, 1); /* e_phnum */
+ put16(b, 58, 0); /* e_shentsize */
+ put16(b, 60, 0); /* e_shnum */
+ put16(b, 62, 0); /* e_shstrndx */
/* PT_LOAD phdr — 56 bytes at offset 64. */
- put32(b, 64 + 0, PT_LOAD); /* p_type */
- put32(b, 64 + 4, PF_R | PF_X); /* p_flags */
- put64(b, 64 + 8, 0); /* p_offset */
- put64(b, 64 + 16, BASE_VA); /* p_vaddr */
- put64(b, 64 + 24, BASE_VA); /* p_paddr */
- put64(b, 64 + 32, total); /* p_filesz */
- put64(b, 64 + 40, total); /* p_memsz */
- put64(b, 64 + 48, PAGE); /* p_align */
+ put32(b, 64 + 0, PT_LOAD); /* p_type */
+ put32(b, 64 + 4, PF_R | PF_X); /* p_flags */
+ put64(b, 64 + 8, 0); /* p_offset */
+ put64(b, 64 + 16, BASE_VA); /* p_vaddr */
+ put64(b, 64 + 24, BASE_VA); /* p_paddr */
+ put64(b, 64 + 32, total); /* p_filesz */
+ put64(b, 64 + 40, total); /* p_memsz */
+ put64(b, 64 + 48, PAGE); /* p_align */
/* .text: addi a0,zero,42 ; addi a7,zero,94 ; ecall */
put32(b, TEXT_OFF + 0, rv_addi(RV_A0, RV_ZERO, 42));
@@ -208,8 +208,8 @@ static void decoder_smoke(void) {
EXPECT((i64)insts[0].operands[3] == 42, "imm should be 42");
EXPECT(insts[1].op == RV64_OP_ADDI, "second insn must be ADDI");
EXPECT((i64)insts[1].operands[3] == 94, "imm should be 94");
- EXPECT(insts[2].op == RV64_OP_ECALL,
- "third insn must be ECALL, got %u", insts[2].op);
+ EXPECT(insts[2].op == RV64_OP_ECALL, "third insn must be ECALL, got %u",
+ insts[2].op);
EXPECT(insts[2].flags & RV64_INST_FLAG_TERMINATOR,
"ECALL must be marked terminator");
/* The block stops at ECALL; the ADD at offset 12 should not have
@@ -274,8 +274,7 @@ static void interp_smoke(void) {
if (emu_cpu_trap_reason(cpu) != EMU_TRAP_NONE) break;
}
EXPECT(emu_cpu_trap_reason(cpu) == EMU_TRAP_EXIT,
- "expected EMU_TRAP_EXIT, got %u",
- (unsigned)emu_cpu_trap_reason(cpu));
+ "expected EMU_TRAP_EXIT, got %u", (unsigned)emu_cpu_trap_reason(cpu));
exit_code = emu_cpu_exit_code(cpu);
EXPECT(exit_code == 42, "exit_code should be 42, got %d", exit_code);