commit 242fc8d4057cf42576e05f634c2707e8ebc8bfdc
parent 305d49af9c1d7c268a4db82e443d796923046ed1
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 11 May 2026 07:34:22 -0700
link: LinkArchDesc vtable + obj-format C-name policy + ISA encoder lifts
Cuts every per-arch / per-format `if (target.X == Y) { ... }` chain in
the linker that was hand-encoding instruction bytes or duplicating
format policy inline. Net: -213 lines of inline dispatch + magic hex
across the linker, +new descriptor + ISA helpers + obj-format helpers.
70/70 test-libc cells green (7 darwin + 9 Linux × 7 cases), full
test suite (119 + 980 + 1549) clean.
Per-arch link descriptor (link/link_arch.h + link_arch_{aa64,x64,rv64}.c):
- LinkArchDesc collects e_machine, ELF dyn-reloc type numbers
(RELATIVE / GLOB_DAT / JUMP_SLOT — RV64 maps GLOB_DAT onto R_RISCV_64
per psABI), PLT geometry (plt0_size / plt_entry_size / iplt_stub_size),
default musl PT_INTERP, and three stub emitters: emit_plt0,
emit_plt_entry, emit_iplt_stub. Dispatched by target.arch via
link_arch_desc_for(Compiler*).
- Each link_arch_<arch>.c implements the emitters using the new
named encoders in arch/<arch>_isa.h — no raw hex in any of the new
files.
- emit_iplt_stub returns 0 or 2 LinkArchIPltReloc records; the linker
fills section/vaddr/target/addend. x64/rv64 encode the stub→slot
disp inline (0 records); aa64 leaves the immediates zero and reports
ADR_PREL_PG_HI21 + LDST64_ABS_LO12_NC apply records.
Linker callsite collapses:
- link_dyn.c: ~110 lines of three-way `if (arch==X)` PLT0+per-entry hex
→ desc->emit_plt0 + emit_plt_entry. JUMP_SLOT switch in .rela.plt
emit → desc->elf_r_jump_slot. plt_bytes/entry sizes (32u/16u) →
desc->plt0_size / plt_entry_size.
- link_layout.c: IPLT stub switch + aa64-only reloc append → desc
->emit_iplt_stub + a generic loop over the returned records. `12u`
literals → desc->iplt_stub_size at every call site.
- link_elf.c: dyn_reloc_type() helper deleted; emit_relative_record /
emit_globdat_record use desc->elf_r_relative / elf_r_glob_dat
directly. e_machine 3-way switch → desc->e_machine.
- link_dyn.c interp default was hardcoded /lib/ld-musl-aarch64.so.1
for any arch; now per-arch via desc->default_musl_interp (drivers
that call link_set_interp_path are unaffected).
ISA encoder additions (no behavior change; replace bare hex):
- arch/aa64_isa.h: aa64_adrp / aa64_adr (PCREL_ADR), aa64_add_imm /
aa64_sub_imm (lifted from aarch64.c — local copies removed),
aa64_ldr64_uimm12 / aa64_str64_uimm12 (LDST_UIMM), aa64_stp64_pre /
aa64_ldp64_pre (LDSTP_PRE), aa64_nop (HINT). New AA64Format kinds +
pack/unpack pairs follow the file's existing pattern; aa64_isa.c
table extended with matching descriptor rows.
- arch/x64_isa.h: named constants (X64_OP_JMP_RM64 / X64_MODRM_JMP_RIPREL
/ X64_NOP1 / X64_NOP6_BYTE0..5 + sizes) and inline writers
(x64_write_jmp_riprel, x64_write_nop_pad, x64_write_nop6).
- arch/rv64_isa.h: rv_nop convenience wrapper (rv_ld / rv_auipc /
rv_jalr / rv_jr / RV_T1 / RV_T3 already existed).
Mach-O stub (link_macho.c:encode_stub): the third hand-rolled copy of
ADRP+LDR+BR (with 0x90000010u/0xF9400210u/0xD61F0200u) now calls
aa64_adrp / aa64_ldr64_uimm12 / aa64_br via the same arch/aa64_isa.h
helpers — single source of truth for the encoding across PLT, IPLT,
and Mach-O __stubs.
Obj-format C-name policy helpers (obj/obj_secnames.c + obj.h):
- obj_format_c_mangle(Compiler*, const char*) — single source for the
Mach-O leading-underscore rule. Replaces three identical 15-line
alloc/memcpy/free blocks (link.c:link_intern_c_name,
link_layout.c:boundary_name, link_jit.c:cfree_jit_lookup).
- obj_format_demangle_c — reverse for diagnostic display
(link_layout.c undef-ref panic stripped its own `_` inline).
- obj_format_default_entry_name — `_main` for Mach-O / `_start`
otherwise. Replaces inline ternary in link_new.
- obj_symbol_extern_via_got(Compiler*, ObjBuilder*, ObjSymId) — lifts
the byte-identical use_got_for_sym helper that lived in all three
of arch/{aarch64,x64,rv64}.c into one place.
ELF wire-format constants (obj/elf.h): ELF64_SYM_SIZE / ELF64_RELA_SIZE
/ ELF64_DYN_SIZE used wherever link_dyn.c / link_elf.c were
multiplying by bare 24u / 16u.
Test-runner perf (test/libc/run.sh): podman run now passes
--pull=never and pins arch-specific image tags (alpine:3.20.10,
debian:bookworm-slim, …) to match what extract.sh / Containerfile
already build. A cold/warm `podman run` was 32 s due to a docker.io
manifest lookup; full 9-cell Linux matrix now finishes in ~18 s
(was 5+ minutes).
Makefile: factor -MMD -MP into DEPFLAGS so self-stage2 can override
to empty (cfree's own dep iterator is still a stub).
Diffstat:
23 files changed, 996 insertions(+), 213 deletions(-)
diff --git a/Makefile b/Makefile
@@ -8,6 +8,11 @@ HOST_SYSROOT_LDFLAGS = -isysroot $(SYSROOT)
CFLAGS_COMMON = -std=c11 -Wpedantic -Wall -Wextra -Werror $(HOST_SYSROOT_CFLAGS)
+# Dep-file flags. clang supports -MMD/-MP; cfree-cc doesn't yet (dep iterator
+# is a stub — emits a misleading "out of memory"). Stage 2 overrides this to
+# empty until cfree implements the API.
+DEPFLAGS = -MMD -MP
+
# libcfree: written in C11 freestanding; sees both src/ (internal) and
# include/ (its own public surface).
LIB_CFLAGS = $(CFLAGS_COMMON) -ffreestanding -Iinclude -Isrc
@@ -128,11 +133,11 @@ $(BIN): $(DRIVER_OBJS) $(LIB_AR)
build/lib/%.o: src/%.c
@mkdir -p $(dir $@)
- $(CC) $(LIB_CFLAGS) -MMD -MP -c $< -o $@
+ $(CC) $(LIB_CFLAGS) $(DEPFLAGS) -c $< -o $@
build/driver/%.o: driver/%.c
@mkdir -p $(dir $@)
- $(CC) $(DRIVER_CFLAGS) -MMD -MP -c $< -o $@
+ $(CC) $(DRIVER_CFLAGS) $(DEPFLAGS) -c $< -o $@
# Self-host: build cfree with clang (stage 1), then rebuild libcfree.a +
# driver + bin from clean using stage-1 cfree as CC (stage 2). Stage 1 is
@@ -152,7 +157,7 @@ self: $(BIN)
# Stage 2 sysroot: cfree finds system headers via -isystem, not -isysroot.
# rt/include ships cfree's freestanding headers (stddef.h, stdarg.h, ...)
# and must precede the SDK so its versions win over Xcode's.
-SELF_SYSROOT_CFLAGS = -isystem rt/include -isystem $(SYSROOT)/usr/include
+SELF_SYSROOT_CFLAGS = -isystem rt/include -isystem rt/include/libc
SELF_SYSROOT_LDFLAGS =
SELF_LIB_CFLAGS = $(SELF_SYSROOT_CFLAGS) -Iinclude -Isrc
SELF_DRIVER_CFLAGS = $(SELF_SYSROOT_CFLAGS) -Iinclude
@@ -163,7 +168,8 @@ self-stage2:
CFLAGS_COMMON='$(SELF_SYSROOT_CFLAGS)' \
LIB_CFLAGS='$(SELF_LIB_CFLAGS)' \
DRIVER_CFLAGS='$(SELF_DRIVER_CFLAGS)' \
- HOST_SYSROOT_LDFLAGS='$(SELF_SYSROOT_LDFLAGS)'
+ HOST_SYSROOT_LDFLAGS='$(SELF_SYSROOT_LDFLAGS)' \
+ DEPFLAGS=''
format:
find src include driver test rt \( -path test/lex -o -path test/pp \) -prune -o \( -name '*.c' -o -name '*.h' \) -print | xargs clang-format -i --style=google
diff --git a/src/arch/aa64_isa.c b/src/arch/aa64_isa.c
@@ -60,6 +60,35 @@ const AA64InsnDesc aa64_insn_table[] = {
{"br", 0xD61F0000u, 0xFFFFFC1Fu, AA64_FMT_BR_REG, {0, 0, 0}},
{"blr", 0xD63F0000u, 0xFFFFFC1Fu, AA64_FMT_BR_REG, {0, 0, 0}},
{"ret", 0xD65F0000u, 0xFFFFFC1Fu, AA64_FMT_BR_REG, {0, 0, 0}},
+
+ /* PC-relative addressing. Mask: family bits 28:24 + op(31). The two
+ * halves of the immediate (immlo at 30:29, immhi at 23:5) and Rd
+ * stay variable. */
+ {"adr", 0x10000000u, 0x9F000000u, AA64_FMT_PCREL_ADR, {0, 0, 0}},
+ {"adrp", 0x90000000u, 0x9F000000u, AA64_FMT_PCREL_ADR, {0, 0, 0}},
+
+ /* Add/Sub immediate. Mask: family bits 28:24 + op(30) + S(29). The
+ * sh bit (22) and imm12 (21:10) stay variable. */
+ {"add_imm", 0x11000000u, 0x7F000000u, AA64_FMT_ADDSUB_IMM, {0, 0, 0}},
+ {"adds_imm", 0x31000000u, 0x7F000000u, AA64_FMT_ADDSUB_IMM, {0, 0, 0}},
+ {"sub_imm", 0x51000000u, 0x7F000000u, AA64_FMT_ADDSUB_IMM, {0, 0, 0}},
+ {"subs_imm", 0x71000000u, 0x7F000000u, AA64_FMT_ADDSUB_IMM, {0, 0, 0}},
+
+ /* Load/store, unsigned 12-bit immediate offset. Mask: family bits
+ * 29:27 + 25:24 + size(31:30) + V(26) + opc(23:22). Only the 64-bit
+ * integer LDR/STR forms (size=11, V=0) are listed today; widen as
+ * other widths come online. */
+ {"str_uimm", 0xF9000000u, 0xFFC00000u, AA64_FMT_LDST_UIMM, {0, 0, 0}},
+ {"ldr_uimm", 0xF9400000u, 0xFFC00000u, AA64_FMT_LDST_UIMM, {0, 0, 0}},
+
+ /* Load/store pair, pre-indexed. Mask: family bits 30:23 + opc(31:30)
+ * + V(26) + L(22). Only the 64-bit integer form (opc=10, V=0) is
+ * registered today. */
+ {"stp_pre", 0xA9800000u, 0xFFC00000u, AA64_FMT_LDSTP_PRE, {0, 0, 0}},
+ {"ldp_pre", 0xA9C00000u, 0xFFC00000u, AA64_FMT_LDSTP_PRE, {0, 0, 0}},
+
+ /* Hint. Mask: family + bits 12:5 zero (NOP slot). */
+ {"nop", 0xD503201Fu, 0xFFFFFFFFu, AA64_FMT_HINT, {0, 0, 0}},
};
const u32 aa64_insn_table_n =
diff --git a/src/arch/aa64_isa.h b/src/arch/aa64_isa.h
@@ -35,11 +35,16 @@
/* ---- format kinds ---- */
typedef enum AA64Format {
AA64_FMT_MOVEWIDE,
- AA64_FMT_LOG_SR, /* logical, shifted register */
- AA64_FMT_ADDSUB_SR, /* add/sub, shifted register */
- AA64_FMT_DP3, /* data-processing, 3 source */
- AA64_FMT_DP2, /* data-processing, 2 source */
- AA64_FMT_BR_REG, /* unconditional branch (register) */
+ AA64_FMT_LOG_SR, /* logical, shifted register */
+ AA64_FMT_ADDSUB_SR, /* add/sub, shifted register */
+ AA64_FMT_DP3, /* data-processing, 3 source */
+ AA64_FMT_DP2, /* data-processing, 2 source */
+ AA64_FMT_BR_REG, /* unconditional branch (register) */
+ AA64_FMT_PCREL_ADR, /* PC-relative ADR / ADRP */
+ AA64_FMT_ADDSUB_IMM, /* add/sub, immediate */
+ AA64_FMT_LDST_UIMM, /* load/store, unsigned 12-bit immediate offset */
+ AA64_FMT_LDSTP_PRE, /* load/store pair, pre-indexed */
+ AA64_FMT_HINT, /* hint (NOP / YIELD / ...) */
} AA64Format;
/* ====================================================================
@@ -347,6 +352,251 @@ static inline u32 aa64_ret(u32 Rn) {
}
/* ====================================================================
+ * PC-relative addressing (ADR / ADRP)
+ * op(1) immlo(2) 10000 immhi(19) Rd(5)
+ * 31 30..29 28..24 23..5 4..0
+ *
+ * op = 0 → ADR (PC + sign_extend(immhi:immlo))
+ * op = 1 → ADRP (page(PC) + sign_extend(immhi:immlo) << 12)
+ *
+ * The two immediate halves stay split because the linker's
+ * R_AARCH64_ADR_PREL_PG_HI21 reloc patches them in place; keeping the
+ * field layout symmetric with the encoded word lets reloc-apply code
+ * reuse the same pack/unpack helpers.
+ * ==================================================================== */
+
+#define AA64_ADR_OP_ADR 0u
+#define AA64_ADR_OP_ADRP 1u
+
+#define AA64_PCREL_ADR_FAMILY_MATCH 0x10000000u
+#define AA64_PCREL_ADR_FAMILY_MASK 0x1F000000u /* bits 28:24 */
+
+typedef struct AA64PCRelAdr {
+ u32 op, immlo, immhi, Rd;
+} AA64PCRelAdr;
+
+static inline u32 aa64_pcrel_adr_pack(AA64PCRelAdr f) {
+ return ((f.op & 1u) << 31) | ((f.immlo & 3u) << 29) |
+ AA64_PCREL_ADR_FAMILY_MATCH | ((f.immhi & 0x7ffffu) << 5) |
+ (f.Rd & 0x1fu);
+}
+
+static inline AA64PCRelAdr aa64_pcrel_adr_unpack(u32 w) {
+ AA64PCRelAdr f;
+ f.op = (w >> 31) & 1u;
+ f.immlo = (w >> 29) & 3u;
+ f.immhi = (w >> 5) & 0x7ffffu;
+ f.Rd = w & 0x1fu;
+ return f;
+}
+
+static inline u32 aa64_adrp(u32 Rd, u32 immlo, u32 immhi) {
+ return aa64_pcrel_adr_pack((AA64PCRelAdr){
+ .op = AA64_ADR_OP_ADRP, .immlo = immlo, .immhi = immhi, .Rd = Rd});
+}
+static inline u32 aa64_adr(u32 Rd, u32 immlo, u32 immhi) {
+ return aa64_pcrel_adr_pack((AA64PCRelAdr){
+ .op = AA64_ADR_OP_ADR, .immlo = immlo, .immhi = immhi, .Rd = Rd});
+}
+
+/* ====================================================================
+ * Add/Sub, immediate (ADD / SUB / ADDS / SUBS, 12-bit imm with shift)
+ * sf op(1) S(1) 100010 sh(1) imm12(12) Rn(5) Rd(5)
+ * 31 30 29 28..23 22 21..10 9..5 4..0
+ *
+ * sh selects whether imm12 is left-shifted by 12. Used by PLT entries
+ * for `add x16, x16, #lo12(slot)` where sh=0 and imm12 = slot & 0xfff.
+ * ==================================================================== */
+
+#define AA64_ADDSUBIMM_FAMILY_MATCH 0x11000000u
+#define AA64_ADDSUBIMM_FAMILY_MASK 0x1F000000u /* bits 28:24 */
+
+typedef struct AA64AddSubImm {
+ u32 sf, op, S, sh, imm12, Rn, Rd;
+} AA64AddSubImm;
+
+static inline u32 aa64_addsubimm_pack(AA64AddSubImm f) {
+ return ((f.sf & 1u) << 31) | ((f.op & 1u) << 30) | ((f.S & 1u) << 29) |
+ AA64_ADDSUBIMM_FAMILY_MATCH | ((f.sh & 1u) << 22) |
+ ((f.imm12 & 0xfffu) << 10) | ((f.Rn & 0x1fu) << 5) | (f.Rd & 0x1fu);
+}
+
+static inline AA64AddSubImm aa64_addsubimm_unpack(u32 w) {
+ AA64AddSubImm f;
+ f.sf = (w >> 31) & 1u;
+ f.op = (w >> 30) & 1u;
+ f.S = (w >> 29) & 1u;
+ f.sh = (w >> 22) & 1u;
+ f.imm12 = (w >> 10) & 0xfffu;
+ f.Rn = (w >> 5) & 0x1fu;
+ f.Rd = w & 0x1fu;
+ return f;
+}
+
+static inline u32 aa64_add_imm(u32 sf, u32 Rd, u32 Rn, u32 imm12, u32 sh) {
+ return aa64_addsubimm_pack((AA64AddSubImm){
+ .sf = sf, .op = 0, .sh = sh, .imm12 = imm12, .Rn = Rn, .Rd = Rd});
+}
+static inline u32 aa64_sub_imm(u32 sf, u32 Rd, u32 Rn, u32 imm12, u32 sh) {
+ return aa64_addsubimm_pack((AA64AddSubImm){
+ .sf = sf, .op = 1, .sh = sh, .imm12 = imm12, .Rn = Rn, .Rd = Rd});
+}
+
+/* ====================================================================
+ * Load/store, unsigned 12-bit immediate offset (LDR / STR, scaled)
+ * size(2) 111 V(1) 01 opc(2) imm12(12) Rn(5) Rt(5)
+ * 31..30 29..27 26 25..24 23..22 21..10 9..5 4..0
+ *
+ * size=11, V=0, opc=01 → LDR (64-bit, integer). imm12 is the byte
+ * offset divided by the access size (8 for LDR Xt), giving a 0..32760
+ * byte range.
+ *
+ * Only the LDR Xt form is needed by the linker today (PLT loads through
+ * x16/x17); the family encoders cover STR and the smaller widths so
+ * future callers can drop in without touching this header.
+ * ==================================================================== */
+
+#define AA64_LDST_SIZE_64 3u
+#define AA64_LDST_OPC_STR 0u
+#define AA64_LDST_OPC_LDR 1u
+
+#define AA64_LDST_UIMM_FAMILY_MATCH 0x39000000u
+#define AA64_LDST_UIMM_FAMILY_MASK 0x3B000000u /* bits 29:27 + bits 25:24 */
+
+typedef struct AA64LdStUimm {
+ u32 size, V, opc, imm12, Rn, Rt;
+} AA64LdStUimm;
+
+static inline u32 aa64_ldst_uimm_pack(AA64LdStUimm f) {
+ return ((f.size & 3u) << 30) | AA64_LDST_UIMM_FAMILY_MATCH |
+ ((f.V & 1u) << 26) | ((f.opc & 3u) << 22) |
+ ((f.imm12 & 0xfffu) << 10) | ((f.Rn & 0x1fu) << 5) | (f.Rt & 0x1fu);
+}
+
+static inline AA64LdStUimm aa64_ldst_uimm_unpack(u32 w) {
+ AA64LdStUimm f;
+ f.size = (w >> 30) & 3u;
+ f.V = (w >> 26) & 1u;
+ f.opc = (w >> 22) & 3u;
+ f.imm12 = (w >> 10) & 0xfffu;
+ f.Rn = (w >> 5) & 0x1fu;
+ f.Rt = w & 0x1fu;
+ return f;
+}
+
+/* LDR Xt, [Xn, #imm12_scaled]. imm12_scaled is the encoded field —
+ * callers pass `byte_offset >> 3` for the 64-bit form. */
+static inline u32 aa64_ldr64_uimm12(u32 Rt, u32 Rn, u32 imm12_scaled) {
+ return aa64_ldst_uimm_pack((AA64LdStUimm){.size = AA64_LDST_SIZE_64,
+ .V = 0,
+ .opc = AA64_LDST_OPC_LDR,
+ .imm12 = imm12_scaled,
+ .Rn = Rn,
+ .Rt = Rt});
+}
+static inline u32 aa64_str64_uimm12(u32 Rt, u32 Rn, u32 imm12_scaled) {
+ return aa64_ldst_uimm_pack((AA64LdStUimm){.size = AA64_LDST_SIZE_64,
+ .V = 0,
+ .opc = AA64_LDST_OPC_STR,
+ .imm12 = imm12_scaled,
+ .Rn = Rn,
+ .Rt = Rt});
+}
+
+/* ====================================================================
+ * Load/store register pair, pre-indexed (STP / LDP, 64-bit form)
+ * opc(2) 101 V(1) 010 L(1) imm7(7) Rt2(5) Rn(5) Rt(5)
+ * 31..30 29..27 26 25..23 22 21..15 14..10 9..5 4..0
+ *
+ * 64-bit integer form fixes opc=10, V=0. L=0 → STP, L=1 → LDP.
+ * imm7 is a signed 7-bit value scaled by 8 (for the 64-bit form): the
+ * encoded field equals `byte_offset / 8`. Callers pass the scaled
+ * value already; the helper masks to 7 bits to handle negative inputs
+ * sign-extended in i32.
+ * ==================================================================== */
+
+#define AA64_LDSTP_PRE_FAMILY_MATCH 0xA9800000u
+#define AA64_LDSTP_PRE_FAMILY_MASK 0x7FC00000u /* bits 30:23 */
+
+typedef struct AA64LdStPPre {
+ u32 opc, V, L, imm7, Rt2, Rn, Rt;
+} AA64LdStPPre;
+
+static inline u32 aa64_ldstp_pre_pack(AA64LdStPPre f) {
+ return ((f.opc & 3u) << 30) | AA64_LDSTP_PRE_FAMILY_MATCH |
+ ((f.V & 1u) << 26) | ((f.L & 1u) << 22) | ((f.imm7 & 0x7fu) << 15) |
+ ((f.Rt2 & 0x1fu) << 10) | ((f.Rn & 0x1fu) << 5) | (f.Rt & 0x1fu);
+}
+
+static inline AA64LdStPPre aa64_ldstp_pre_unpack(u32 w) {
+ AA64LdStPPre f;
+ f.opc = (w >> 30) & 3u;
+ f.V = (w >> 26) & 1u;
+ f.L = (w >> 22) & 1u;
+ f.imm7 = (w >> 15) & 0x7fu;
+ f.Rt2 = (w >> 10) & 0x1fu;
+ f.Rn = (w >> 5) & 0x1fu;
+ f.Rt = w & 0x1fu;
+ return f;
+}
+
+/* STP Xt, Xt2, [Xn, #imm7_scaled]! — opc=10 selects the 64-bit form.
+ * imm7_scaled is `byte_offset / 8`; callers pass it pre-scaled (e.g.
+ * -2 for [sp, #-16]!). */
+static inline u32 aa64_stp64_pre(u32 Rt, u32 Rt2, u32 Rn, i32 imm7_scaled) {
+ return aa64_ldstp_pre_pack((AA64LdStPPre){.opc = 2,
+ .V = 0,
+ .L = 0,
+ .imm7 = (u32)imm7_scaled & 0x7fu,
+ .Rt2 = Rt2,
+ .Rn = Rn,
+ .Rt = Rt});
+}
+static inline u32 aa64_ldp64_pre(u32 Rt, u32 Rt2, u32 Rn, i32 imm7_scaled) {
+ return aa64_ldstp_pre_pack((AA64LdStPPre){.opc = 2,
+ .V = 0,
+ .L = 1,
+ .imm7 = (u32)imm7_scaled & 0x7fu,
+ .Rt2 = Rt2,
+ .Rn = Rn,
+ .Rt = Rt});
+}
+
+/* ====================================================================
+ * Hint instructions (NOP / YIELD / WFE / WFI / SEV / SEVL)
+ * 1101 0101 0000 0011 0010 CRm(4) op2(3) 11111
+ * 31..16 15..12 11..8 7..5 4..0
+ *
+ * NOP encodes CRm=0, op2=0 → 0xD503201F. The full hint family lives
+ * inside the system-instruction space; we only expose NOP today since
+ * that's the only slot the linker fills.
+ * ==================================================================== */
+
+#define AA64_HINT_FAMILY_MATCH 0xD503201Fu
+#define AA64_HINT_FAMILY_MASK 0xFFFFF01Fu /* CRm + op2 vary */
+
+#define AA64_HINT_OP_NOP 0u /* CRm=0, op2=0 */
+
+typedef struct AA64Hint {
+ u32 CRm, op2;
+} AA64Hint;
+
+static inline u32 aa64_hint_pack(AA64Hint f) {
+ return AA64_HINT_FAMILY_MATCH | ((f.CRm & 0xfu) << 8) | ((f.op2 & 7u) << 5);
+}
+
+static inline AA64Hint aa64_hint_unpack(u32 w) {
+ AA64Hint f;
+ f.CRm = (w >> 8) & 0xfu;
+ f.op2 = (w >> 5) & 7u;
+ return f;
+}
+
+static inline u32 aa64_nop(void) {
+ return aa64_hint_pack((AA64Hint){.CRm = 0, .op2 = AA64_HINT_OP_NOP});
+}
+
+/* ====================================================================
* Disassembler descriptor table.
* ==================================================================== */
diff --git a/src/arch/aarch64.c b/src/arch/aarch64.c
@@ -44,16 +44,9 @@
#define AA64_NOP 0xD503201Fu
-/* ADD/SUB immediate (12-bit imm, optional shift-12). Rd/Rn = 31 means SP
- * for these encodings (not ZR). */
-static inline u32 aa64_add_imm(u32 sf, u32 Rd, u32 Rn, u32 imm12, u32 sh) {
- return 0x11000000u | (sf << 31) | ((sh & 1) << 22) | ((imm12 & 0xfff) << 10) |
- ((Rn & 0x1f) << 5) | (Rd & 0x1f);
-}
-static inline u32 aa64_sub_imm(u32 sf, u32 Rd, u32 Rn, u32 imm12, u32 sh) {
- return 0x51000000u | (sf << 31) | ((sh & 1) << 22) | ((imm12 & 0xfff) << 10) |
- ((Rn & 0x1f) << 5) | (Rd & 0x1f);
-}
+/* ADD/SUB immediate (aa64_add_imm / aa64_sub_imm) live in
+ * arch/aa64_isa.h alongside the rest of the immediate-encoding family.
+ * Rd/Rn = 31 means SP for these encodings (not ZR). */
/* STP/LDP signed offset, X registers. Offset is byte offset, must be a
* multiple of 8; encoded value = byte_offset / 8 in a signed 7-bit field
@@ -1279,10 +1272,7 @@ static void emit_addr_adjust(MCEmitter* mc, u32 Rd, u32 base, i32 off);
* symbols whose kind isn't known yet; the decl pass mints externs
* with their intended SK_OBJ / SK_FUNC kind plus OBJ_SEC_NONE. */
static int use_got_for_sym(CGTarget* t, ObjSymId sym) {
- const ObjSym* s;
- if (!obj_format_extern_via_got(t->c)) return 0;
- s = obj_symbol_get(t->obj, sym);
- return s && s->section_id == OBJ_SEC_NONE;
+ return obj_symbol_extern_via_got(t->c, t->obj, sym);
}
/* Emit `ADRP dst, sym@GOTPAGE ; LDR Xdst, [dst, #sym@GOTPAGEOFF]`,
diff --git a/src/arch/rv64.c b/src/arch/rv64.c
@@ -1071,15 +1071,8 @@ static u32 addr_base(CGTarget* t, Operand addr, i32* out_off, u32 tmp_reg) {
(int)addr.kind);
}
-/* True when the symbol must be reached via the GOT at this site: an
- * undefined external on a target format that binds extern data through
- * GOT indirection (Mach-O always; ELF when compiling -fPIC/-fPIE).
- * Mirrors aarch64.c:use_got_for_sym / x64.c:x64_use_got_for_sym. */
static int rv64_use_got_for_sym(CGTarget* t, ObjSymId sym) {
- const ObjSym* s;
- if (!obj_format_extern_via_got(t->c)) return 0;
- s = obj_symbol_get(t->obj, sym);
- return s && s->section_id == OBJ_SEC_NONE;
+ return obj_symbol_extern_via_got(t->c, t->obj, sym);
}
/* Anchor symbol management for PCREL_LO12_*. Each AUIPC site gets a
diff --git a/src/arch/rv64_isa.h b/src/arch/rv64_isa.h
@@ -191,9 +191,10 @@ static inline u32 rv_bgeu(u32 rs1, u32 rs2, i32 imm) { return rv_b(imm, rs2, rs1
static inline u32 rv_jal(u32 rd, i32 imm21) { return rv_j(imm21, rd, RV_JAL); }
static inline u32 rv_jalr(u32 rd, u32 rs1, i32 imm) { return rv_i(imm, rs1, 0x0, rd, RV_JALR); }
-/* Convenience: jr / ret / j */
+/* Convenience: jr / ret / j / nop */
static inline u32 rv_jr(u32 rs1) { return rv_jalr(RV_ZERO, rs1, 0); }
static inline u32 rv_ret_(void) { return rv_jalr(RV_ZERO, RV_RA, 0); }
+static inline u32 rv_nop(void) { return RV_NOP; }
/* System */
static inline u32 rv_ecall(void) { return rv_i(0, 0, 0, 0, RV_SYSTEM); }
diff --git a/src/arch/x64.c b/src/arch/x64.c
@@ -1195,15 +1195,8 @@ static u32 addr_base(CGTarget* t, Operand addr, i32* out_off) {
(int)addr.kind);
}
-/* True when the symbol must be reached via the GOT at this site:
- * an undefined external on a format/PIC mode that binds extern data
- * through indirection (Mach-O always; ELF when compiling -fPIC/-fPIE).
- * Mirrors aarch64.c:use_got_for_sym. */
static int x64_use_got_for_sym(CGTarget* t, ObjSymId sym) {
- const ObjSym* s;
- if (!obj_format_extern_via_got(t->c)) return 0;
- s = obj_symbol_get(t->obj, sym);
- return s && s->section_id == OBJ_SEC_NONE;
+ return obj_symbol_extern_via_got(t->c, t->obj, sym);
}
/* Materialize `&sym + addend` into `dst_reg`. For locally-defined or
diff --git a/src/arch/x64_isa.h b/src/arch/x64_isa.h
@@ -9,6 +9,7 @@
#ifndef CFREE_X64_ISA_H
#define CFREE_X64_ISA_H
+#include "core/bytes.h"
#include "core/core.h"
/* ---- GPR numbering (DWARF / ABI matches HW encoding 0..15) ---- */
@@ -72,4 +73,56 @@ enum {
#define X64_REX_X 0x02u
#define X64_REX_B 0x01u
+/* ---- Branch / NOP encoding constants ----
+ *
+ * Used by the linker to emit PLT entries and IPLT stubs without
+ * sprinkling raw hex into link_arch_x64.c. The shape is always the
+ * same RIP-relative indirect JMP plus padding NOPs. */
+
+/* JMP r/m64 — opcode FF /4. ModR/M for the RIP+disp32 form is
+ * mod=00, reg=/4 (JMP m64), r/m=101 → 0x25. */
+#define X64_OP_JMP_RM64 0xFFu
+#define X64_MODRM_JMP_RIPREL 0x25u
+
+/* Single-byte NOP. */
+#define X64_NOP1 0x90u
+
+/* Intel multi-byte ("long") NOP forms. The 6-byte form is the
+ * canonical IPLT-stub tail pad (NOPW 0(%rax,%rax,1)). */
+#define X64_NOP6_BYTE0 0x66u
+#define X64_NOP6_BYTE1 0x0Fu
+#define X64_NOP6_BYTE2 0x1Fu
+#define X64_NOP6_BYTE3 0x44u
+#define X64_NOP6_BYTE4 0x00u
+#define X64_NOP6_BYTE5 0x00u
+
+/* Sizes of the encoded forms above. */
+#define X64_JMP_RIPREL_SIZE 6u
+#define X64_NOP6_SIZE 6u
+
+/* Write a 6-byte `jmp [rip + disp32]` (FF 25 disp32) at dst. */
+static inline void x64_write_jmp_riprel(u8* dst, i32 disp32) {
+ dst[0] = X64_OP_JMP_RM64;
+ dst[1] = X64_MODRM_JMP_RIPREL;
+ wr_u32_le(dst + 2, (u32)disp32);
+}
+
+/* Fill nbytes at dst with single-byte NOPs (0x90). Matches the
+ * existing memset-then-patch pattern used to pad PLT entries to 16. */
+static inline void x64_write_nop_pad(u8* dst, u32 nbytes) {
+ u32 i;
+ for (i = 0; i < nbytes; ++i) dst[i] = X64_NOP1;
+}
+
+/* Write the canonical 6-byte multi-byte NOP (66 0F 1F 44 00 00) at
+ * dst. Used to pad the IPLT stub from 6 → 12 bytes. */
+static inline void x64_write_nop6(u8* dst) {
+ dst[0] = X64_NOP6_BYTE0;
+ dst[1] = X64_NOP6_BYTE1;
+ dst[2] = X64_NOP6_BYTE2;
+ dst[3] = X64_NOP6_BYTE3;
+ dst[4] = X64_NOP6_BYTE4;
+ dst[5] = X64_NOP6_BYTE5;
+}
+
#endif
diff --git a/src/link/link.c b/src/link/link.c
@@ -74,10 +74,10 @@ Linker* link_new(Compiler* c) {
LinkArchives_init(&l->archives, h);
/* Default entry: ELF/static convention uses `_start`. Mach-O's
* LC_MAIN names main directly (dyld owns the C runtime startup),
- * so the on-disk symbol is `_main` (the mangled form of `main`). */
- l->entry_name = (c->target.obj == CFREE_OBJ_MACHO)
- ? pool_intern_cstr(c->global, "_main")
- : pool_intern_cstr(c->global, "_start");
+ * so the on-disk symbol is `_main` (the mangled form of `main`).
+ * Format choice lives in obj_format_default_entry_name. */
+ l->entry_name =
+ pool_intern_cstr(c->global, obj_format_default_entry_name(c));
/* Match the rest of libcfree's lifetime story: the new'd Linker is
* registered for cleanup in case a panic fires before link_free. */
l->deferred = compiler_defer(c, linker_cleanup, l);
@@ -300,27 +300,11 @@ LinkInputId link_add_archive_bytes(Linker* l, const char* name, const u8* data,
}
/* Intern a C-source-level symbol name in the format the input objects
- * use on the wire. Mach-O prepends `_` to every C symbol on disk
- * unconditionally (matching Apple cc / decl.c): "test_main" becomes
- * `_test_main`, `_start` becomes `__start`, `__init_array_start`
- * becomes `___init_array_start`. ELF / COFF / Wasm intern verbatim. */
+ * use on the wire. Format-specific mangling (Mach-O `_` prefix,
+ * verbatim everywhere else) lives in obj_format_c_mangle. */
Sym link_intern_c_name(Linker* l, const char* name) {
- Sym sym;
if (!l || !name) return 0;
- if (l->c->target.obj == CFREE_OBJ_MACHO) {
- size_t n = strlen(name);
- char* buf = (char*)l->heap->alloc(l->heap, n + 2, 1);
- if (!buf)
- compiler_panic(l->c, no_loc(),
- "link_intern_c_name: oom prefixing '%s'", name);
- buf[0] = '_';
- memcpy(buf + 1, name, n);
- buf[n + 1] = 0;
- sym = pool_intern(l->c->global, buf, (u32)(n + 1));
- l->heap->free(l->heap, buf, n + 2);
- return sym;
- }
- return pool_intern_cstr(l->c->global, name);
+ return obj_format_c_mangle(l->c, name);
}
void link_set_entry(Linker* l, const char* name) {
diff --git a/src/link/link_arch.c b/src/link/link_arch.c
@@ -0,0 +1,19 @@
+/* Per-arch link descriptor dispatcher. See link_arch.h for the
+ * struct contract. Concrete descriptors live in link_arch_<arch>.c. */
+
+#include "link/link_arch.h"
+
+#include "core/core.h"
+
+const LinkArchDesc* link_arch_desc_for(const Compiler* c) {
+ switch (c->target.arch) {
+ case CFREE_ARCH_ARM_64:
+ return &link_arch_aa64;
+ case CFREE_ARCH_X86_64:
+ return &link_arch_x64;
+ case CFREE_ARCH_RV64:
+ return &link_arch_rv64;
+ default:
+ return NULL;
+ }
+}
diff --git a/src/link/link_arch.h b/src/link/link_arch.h
@@ -0,0 +1,99 @@
+#ifndef CFREE_LINK_ARCH_H
+#define CFREE_LINK_ARCH_H
+
+/* Per-architecture link-time descriptor.
+ *
+ * Pure-data + a small set of stub-emit function pointers, indexed by
+ * Compiler.target.arch. Lets link_dyn.c / link_layout.c / link_elf.c
+ * stay arch-agnostic instead of branching on target.arch and hand-
+ * encoding instruction bytes inline. Each backend's descriptor lives
+ * in its own translation unit (link_arch_aa64.c / _x64.c / _rv64.c)
+ * and leans on the existing arch/<arch>_isa.h encoders for everything
+ * but small format-specific constants.
+ *
+ * The struct intentionally collects only fields the LINKER needs.
+ * Code-generation arch dispatch belongs in CGTarget (arch/arch.h);
+ * reloc-apply dispatch is keyed on RelocKind in link_reloc.c. */
+
+#include "core/core.h"
+#include "obj/obj.h"
+
+/* IPLT relocation slot reported by emit_iplt_stub. Some arches
+ * (aarch64) cannot encode the stub->slot displacement inline and need
+ * the linker to generate apply-time fixups; others (x64, rv64) bake
+ * the displacement directly into the stub bytes and report zero relocs.
+ *
+ * Fields are everything the emitter knows; the caller fills in
+ * link_section_id, write_vaddr / write_file_offset, target sym, addend
+ * from its own context. */
+typedef struct LinkArchIPltReloc {
+ u32 offset_in_stub; /* byte offset within the 12-byte stub */
+ u32 width;
+ RelocKind kind;
+} LinkArchIPltReloc;
+
+typedef struct LinkArchDesc {
+ /* ---- ELF identity ---- */
+ u32 e_machine; /* EM_AARCH64 / EM_X86_64 / EM_RISCV */
+
+ /* Default PT_INTERP (canonical musl loader for this arch). Drivers
+ * should override via link_set_interp_path; the default fires only
+ * when the caller leaves it unset and -static isn't in effect. */
+ const char* default_musl_interp;
+
+ /* ---- Dynamic-reloc type numbers (ELF) ----
+ * Used by .rela.dyn / .rela.plt emission. Reloc-type numbers are
+ * arch-specific: aarch64 starts at 1024, x86_64 in the low single
+ * digits, RISC-V uses its own encoding (and maps GLOB_DAT onto
+ * R_RISCV_64 since the psABI has no dedicated GLOB_DAT). */
+ u32 elf_r_relative;
+ u32 elf_r_glob_dat;
+ u32 elf_r_jump_slot;
+
+ /* ---- PLT geometry ----
+ * All three arches today use a 32-byte PLT0 + 16-byte per-import
+ * entry, but exposing the sizes keeps the linker free of magic
+ * numbers and lets a future port pick its own layout. */
+ u32 plt0_size;
+ u32 plt_entry_size;
+
+ /* ---- IPLT geometry (ifunc trampolines, layout_iplt) ---- */
+ u32 iplt_stub_size;
+
+ /* ---- Stub emitters ----
+ * Each writes its full byte range; callers do not need to pre-fill
+ * the buffer. All vaddrs are post-shift (final image addresses).
+ *
+ * emit_plt0: writes plt0_size bytes at dst. PLT0 is the
+ * lazy-resolve trampoline; under DF_1_NOW it's
+ * never executed but is emitted in canonical
+ * form for disassembler / unwinder consumption.
+ * gotplt_vaddr is the base of .got.plt.
+ *
+ * emit_plt_entry: writes plt_entry_size bytes at dst for one
+ * imported function. entry_vaddr is the
+ * absolute address of this entry; slot_vaddr is
+ * its corresponding .got.plt slot.
+ *
+ * emit_iplt_stub: writes iplt_stub_size bytes for one ifunc
+ * trampoline that loads .igot.plt[i] and tail-
+ * calls. Returns the number of LinkRelocApply
+ * records the caller must enqueue (0 or 2);
+ * the records' offset_in_stub / width / kind are
+ * populated, the caller fills the rest. */
+ void (*emit_plt0) (u8* dst, u64 plt0_vaddr, u64 gotplt_vaddr);
+ void (*emit_plt_entry)(u8* dst, u64 entry_vaddr, u64 slot_vaddr);
+ u32 (*emit_iplt_stub)(u8* dst, u64 stub_vaddr, u64 slot_vaddr,
+ LinkArchIPltReloc out[2]);
+} LinkArchDesc;
+
+/* Per-arch descriptors, defined in link_arch_<arch>.c. */
+extern const LinkArchDesc link_arch_aa64;
+extern const LinkArchDesc link_arch_x64;
+extern const LinkArchDesc link_arch_rv64;
+
+/* Returns NULL for an unsupported arch. Callers panic with their own
+ * context-rich message rather than this helper picking one. */
+const LinkArchDesc* link_arch_desc_for(const Compiler*);
+
+#endif
diff --git a/src/link/link_arch_aa64.c b/src/link/link_arch_aa64.c
@@ -0,0 +1,153 @@
+/* AArch64 link-time descriptor.
+ *
+ * Implements the LinkArchDesc contract from link_arch.h for the
+ * aarch64 ELF psABI: PLT0 + per-import PLT entries (lazy-resolve
+ * trampolines emitted in canonical form even under DF_1_NOW), and the
+ * 12-byte IPLT stub used by ifunc resolvers. All instruction bytes
+ * come from the encoders in arch/aa64_isa.h — no raw hex literals
+ * here.
+ *
+ * The byte layout matches the previous inline encodings in
+ * link_dyn.c (PLT) and link_layout.c (IPLT) so that switching the
+ * linker to descriptor dispatch is a no-op on the output image. */
+
+#include "arch/aa64_isa.h"
+#include "core/bytes.h"
+#include "core/core.h"
+#include "link/link_arch.h"
+#include "obj/elf.h"
+#include "obj/obj.h"
+
+/* Fixed register assignments mandated by the AArch64 PLT ABI. */
+#define AA64_PLT_SCRATCH_X16 16u /* PLT/IPLT scratch (slot address) */
+#define AA64_PLT_SCRATCH_X17 17u /* PLT scratch (loaded function ptr) */
+
+/* PLT geometry. Documented in link_arch.h; redeclared here as the
+ * descriptor table needs them at file scope. */
+#define AA64_PLT0_SIZE 32u
+#define AA64_PLT_ENTRY_SIZE 16u
+#define AA64_IPLT_STUB_SIZE 12u
+
+/* PLT0 references .got.plt[2] (the lazy-resolve hook); the per-import
+ * entries start at .got.plt[3]. */
+#define AA64_GOTPLT_RESOLVER_INDEX 2u
+
+/* Page mask for ADRP: ADRP encodes (page(target) - page(PC)) >> 12,
+ * where page(x) clears the low 12 bits. */
+#define AA64_PAGE_MASK ((u64)0xfffu)
+
+/* Compute the (immlo, immhi) ADRP immediate halves for the page-
+ * relative displacement from `pc` to `target`. Both addresses are
+ * post-shift final image vaddrs; ADRP discards the low 12 bits of
+ * each before subtracting, so the result is invariant under any
+ * segment-base shift that moves both endpoints by the same delta. */
+static inline void aa64_adrp_imm_halves(u64 pc, u64 target, u32* immlo,
+ u32* immhi) {
+ i64 page_disp =
+ (i64)(target & ~AA64_PAGE_MASK) - (i64)(pc & ~AA64_PAGE_MASK);
+ i64 imm21 = page_disp >> 12;
+ *immlo = (u32)(imm21 & 0x3);
+ *immhi = (u32)((imm21 >> 2) & 0x7ffff);
+}
+
+/* Emit one ADRP+LDR+ADD+BR sequence that materializes `slot_vaddr`
+ * (a .got.plt entry) into x16, loads the resolved function pointer
+ * into x17, and tail-calls it. Used by both PLT0 (after its STP) and
+ * each per-import entry — the only thing that varies is `pc`, which
+ * starts at the ADRP itself. */
+static void aa64_emit_adrp_load_br(u8* dst, u64 pc, u64 slot_vaddr) {
+ u32 immlo, immhi;
+ aa64_adrp_imm_halves(pc, slot_vaddr, &immlo, &immhi);
+ u32 lo12 = (u32)(slot_vaddr & AA64_PAGE_MASK);
+ /* LDR Xt encodes the byte offset divided by 8. .got.plt slots are
+ * 8-byte aligned so the low 3 bits of lo12 are always 0. */
+ u32 ldr_imm12 = (lo12 >> 3) & 0xfffu;
+
+ wr_u32_le(dst + 0, aa64_adrp(AA64_PLT_SCRATCH_X16, immlo, immhi));
+ wr_u32_le(dst + 4, aa64_ldr64_uimm12(AA64_PLT_SCRATCH_X17,
+ AA64_PLT_SCRATCH_X16, ldr_imm12));
+ wr_u32_le(dst + 8, aa64_add_imm(/*sf=*/1, AA64_PLT_SCRATCH_X16,
+ AA64_PLT_SCRATCH_X16, lo12, /*sh=*/0));
+ wr_u32_le(dst + 12, aa64_br(AA64_PLT_SCRATCH_X17));
+}
+
+static void aa64_emit_plt0(u8* dst, u64 plt0_vaddr, u64 gotplt_vaddr) {
+ /* PLT0:
+ * stp x16, x30, [sp, #-16]!
+ * adrp x16, page(.got.plt[2])
+ * ldr x17, [x16, #lo12(.got.plt[2])]
+ * add x16, x16, #lo12(.got.plt[2])
+ * br x17
+ * nop ; nop ; nop
+ *
+ * Under DF_1_NOW the loader patches every .got.plt slot from
+ * .rela.plt before running PLT0, so this trampoline never executes.
+ * It is still emitted in canonical form so disassemblers and
+ * unwinders see the layout the psABI specifies. */
+ u64 slot2 = gotplt_vaddr + 8u * AA64_GOTPLT_RESOLVER_INDEX;
+ /* The ADRP sits at plt0+4 (one instruction past the leading STP). */
+ u64 adrp_pc = plt0_vaddr + 4u;
+
+ /* `stp x16, x30, [sp, #-16]!` — pre-indexed pair store with imm7
+ * scaled by 8, so the encoded field is -16/8 = -2. */
+ wr_u32_le(dst + 0, aa64_stp64_pre(AA64_PLT_SCRATCH_X16, AA64_LR, AA64_SP,
+ /*imm7_scaled=*/-2));
+ aa64_emit_adrp_load_br(dst + 4, adrp_pc, slot2);
+ wr_u32_le(dst + 20, aa64_nop());
+ wr_u32_le(dst + 24, aa64_nop());
+ wr_u32_le(dst + 28, aa64_nop());
+}
+
+static void aa64_emit_plt_entry(u8* dst, u64 entry_vaddr, u64 slot_vaddr) {
+ /* Per-import 16-byte entry: ADRP+LDR+ADD+BR where ADRP's PC is the
+ * entry's first instruction (no leading STP here — the resolved
+ * function returns to the original caller, not into PLT0). */
+ aa64_emit_adrp_load_br(dst, entry_vaddr, slot_vaddr);
+}
+
+static u32 aa64_emit_iplt_stub(u8* dst, u64 stub_vaddr, u64 slot_vaddr,
+ LinkArchIPltReloc out[2]) {
+ /* IPLT stub: ADRP x16, %page(slot) ; LDR x16, [x16, :lo12:slot] ;
+ * BR x16.
+ *
+ * We deliberately emit the two address-bearing instructions with
+ * zero immediates: the linker enqueues an ADR_PREL_PG_HI21 reloc on
+ * the ADRP and an LDST64_ABS_LO12_NC reloc on the LDR, both
+ * targeting the slot's synthetic local symbol. Reloc-apply runs
+ * after final vaddr assignment, which is the only point at which
+ * both endpoints' page-relative displacement is known. */
+ (void)stub_vaddr;
+ (void)slot_vaddr;
+
+ wr_u32_le(dst + 0, aa64_adrp(AA64_PLT_SCRATCH_X16, /*immlo=*/0,
+ /*immhi=*/0));
+ wr_u32_le(dst + 4, aa64_ldr64_uimm12(AA64_PLT_SCRATCH_X16,
+ AA64_PLT_SCRATCH_X16,
+ /*imm12_scaled=*/0));
+ wr_u32_le(dst + 8, aa64_br(AA64_PLT_SCRATCH_X16));
+
+ out[0].offset_in_stub = 0;
+ out[0].width = 4;
+ out[0].kind = R_AARCH64_ADR_PREL_PG_HI21;
+ out[1].offset_in_stub = 4;
+ out[1].width = 4;
+ out[1].kind = R_AARCH64_LDST64_ABS_LO12_NC;
+ return 2;
+}
+
+const LinkArchDesc link_arch_aa64 = {
+ .e_machine = EM_AARCH64,
+ .default_musl_interp = "/lib/ld-musl-aarch64.so.1",
+
+ .elf_r_relative = ELF_R_AARCH64_RELATIVE,
+ .elf_r_glob_dat = ELF_R_AARCH64_GLOB_DAT,
+ .elf_r_jump_slot = ELF_R_AARCH64_JUMP_SLOT,
+
+ .plt0_size = AA64_PLT0_SIZE,
+ .plt_entry_size = AA64_PLT_ENTRY_SIZE,
+ .iplt_stub_size = AA64_IPLT_STUB_SIZE,
+
+ .emit_plt0 = aa64_emit_plt0,
+ .emit_plt_entry = aa64_emit_plt_entry,
+ .emit_iplt_stub = aa64_emit_iplt_stub,
+};
diff --git a/src/link/link_arch_rv64.c b/src/link/link_arch_rv64.c
@@ -0,0 +1,94 @@
+/* RV64 link-time arch descriptor. See link_arch.h for the contract.
+ *
+ * The PLT0/PLT-entry/IPLT-stub byte layouts here mirror what used to
+ * live inline in link_dyn.c (PLT) and link_layout.c (IPLT) before the
+ * vtable refactor; comments preserve the WHY (notably the +0x800 bias
+ * on AUIPC immediates). */
+
+#include "link/link_arch.h"
+
+#include "arch/rv64_isa.h"
+#include "core/bytes.h"
+#include "core/core.h"
+#include "obj/elf.h"
+
+/* PLT0 is 8 canonical NOPs (32 bytes); each PLT entry and IPLT stub is
+ * 4 instructions (16 bytes) / 3 instructions (12 bytes) respectively.
+ * Encoded once here so the descriptor and emitters stay in sync. */
+#define RV64_PLT0_SIZE 32u
+#define RV64_PLT_ENTRY_SIZE 16u
+#define RV64_IPLT_STUB_SIZE 12u
+
+/* Split a PC-relative displacement into the (hi20, lo12) pair consumed
+ * by the AUIPC + I-type sequence. The +0x800 bias is the standard
+ * RISC-V two-instruction PCREL trick: AUIPC adds an upper-20 immediate
+ * shifted left 12, then the second instruction adds a sign-extended
+ * 12-bit lo12. If we naively split disp into (disp>>12, disp&0xfff)
+ * the lo12 sign-extends as a *negative* number whenever bit 11 is set,
+ * which underflows the AUIPC result by 0x1000. Adding 0x800 before
+ * the shift rounds the high half up in exactly the cases that need it
+ * so AUIPC + sign-extended-lo12 reconstructs disp correctly. */
+static inline void rv64_split_pcrel(i64 disp, u32* hi20_out, u32* lo12_out) {
+ *hi20_out = (u32)(((u64)(disp + 0x800)) >> 12) & 0xfffffu;
+ *lo12_out = (u32)((u64)disp & 0xfffu);
+}
+
+/* PLT0 under DF_1_NOW is never executed — the loader resolves every
+ * JUMP_SLOT before transferring control — but we still emit it in
+ * canonical form (8 NOPs) so disassemblers and unwinders see a well-
+ * formed prologue at the top of .plt. */
+static void rv64_emit_plt0(u8* dst, u64 plt0_vaddr, u64 gotplt_vaddr) {
+ u32 i;
+ (void)plt0_vaddr;
+ (void)gotplt_vaddr;
+ for (i = 0; i < RV64_PLT0_SIZE; i += 4u) wr_u32_le(dst + i, rv_nop());
+}
+
+/* Per-import PLT entry: load the GOT slot pre-filled by the loader
+ * (R_RISCV_JUMP_SLOT) and tail-call through it. t1 is the standard
+ * psABI scratch for the trampoline return-address (clobbered by the
+ * lazy resolver in the non-BIND_NOW path); t3 holds the slot pointer. */
+static void rv64_emit_plt_entry(u8* dst, u64 entry_vaddr, u64 slot_vaddr) {
+ i64 disp = (i64)slot_vaddr - (i64)entry_vaddr;
+ u32 hi20;
+ u32 lo12;
+ rv64_split_pcrel(disp, &hi20, &lo12);
+ wr_u32_le(dst + 0, rv_auipc(RV_T3, hi20));
+ wr_u32_le(dst + 4, rv_ld(RV_T3, RV_T3, (i32)lo12));
+ wr_u32_le(dst + 8, rv_jalr(RV_T1, RV_T3, 0));
+ wr_u32_le(dst + 12, rv_nop());
+}
+
+/* IPLT stub: load .igot.plt[i] (filled at startup by the resolver) and
+ * tail-call to it. The stub->slot displacement is invariant under the
+ * segment-base shift (both addresses live in the same image), so we
+ * bake it directly into the instructions and report zero apply-time
+ * relocs — unlike aarch64, which cannot encode a 32-bit pcrel inline. */
+static u32 rv64_emit_iplt_stub(u8* dst, u64 stub_vaddr, u64 slot_vaddr,
+ LinkArchIPltReloc out[2]) {
+ i64 disp = (i64)slot_vaddr - (i64)stub_vaddr;
+ u32 hi20;
+ u32 lo12;
+ (void)out;
+ rv64_split_pcrel(disp, &hi20, &lo12);
+ wr_u32_le(dst + 0, rv_auipc(RV_T1, hi20));
+ wr_u32_le(dst + 4, rv_ld(RV_T1, RV_T1, (i32)lo12));
+ wr_u32_le(dst + 8, rv_jr(RV_T1));
+ return 0u;
+}
+
+const LinkArchDesc link_arch_rv64 = {
+ .e_machine = EM_RISCV,
+ .default_musl_interp = "/lib/ld-musl-riscv64.so.1",
+ /* RISC-V psABI has no dedicated GLOB_DAT — GOT-slot data imports
+ * use the generic absolute-64 reloc instead. */
+ .elf_r_relative = ELF_R_RISCV_RELATIVE,
+ .elf_r_glob_dat = ELF_R_RISCV_64,
+ .elf_r_jump_slot = ELF_R_RISCV_JUMP_SLOT,
+ .plt0_size = RV64_PLT0_SIZE,
+ .plt_entry_size = RV64_PLT_ENTRY_SIZE,
+ .iplt_stub_size = RV64_IPLT_STUB_SIZE,
+ .emit_plt0 = rv64_emit_plt0,
+ .emit_plt_entry = rv64_emit_plt_entry,
+ .emit_iplt_stub = rv64_emit_iplt_stub,
+};
diff --git a/src/link/link_arch_x64.c b/src/link/link_arch_x64.c
@@ -0,0 +1,77 @@
+/* x86_64 link-time arch descriptor.
+ *
+ * Implements the LinkArchDesc contract from link/link_arch.h for
+ * EM_X86_64. The PLT/IPLT byte sequences here mirror the inline
+ * encodings previously living in link_dyn.c (PLT0 + per-import entry)
+ * and link_layout.c (IPLT stub) — kept identical byte-for-byte so the
+ * descriptor switchover is a pure refactor. All raw byte values come
+ * from named constants / inline writers in arch/x64_isa.h. */
+
+#include "link/link_arch.h"
+
+#include "arch/x64_isa.h"
+#include "core/bytes.h"
+#include "core/core.h"
+#include "obj/elf.h"
+
+/* PLT0 layout under DF_1_NOW: never executed (loader pre-binds every
+ * slot via .rela.plt before user code runs), so we just emit 32 bytes
+ * of single-byte NOPs. Self-documenting and trivially well-formed for
+ * disassemblers and unwinders that walk the section. */
+static void x64_emit_plt0(u8* dst, u64 plt0_vaddr, u64 gotplt_vaddr) {
+ (void)plt0_vaddr;
+ (void)gotplt_vaddr;
+ x64_write_nop_pad(dst, 32u);
+}
+
+/* Per-import PLT entry (16 B):
+ *
+ * ff 25 disp32 ; jmpq *[rip + disp_to_slot] (6 B)
+ * 90 90 90 90 90 90 90 90 90 90 ; pad to 16 with single-byte NOPs
+ *
+ * disp32 is measured from the END of the JMP (entry_vaddr + 6) to the
+ * .got.plt slot. The 10-byte tail matches link_dyn.c's prior
+ * memset(0x90)+patch behavior exactly. */
+static void x64_emit_plt_entry(u8* dst, u64 entry_vaddr, u64 slot_vaddr) {
+ i64 disp = (i64)slot_vaddr - (i64)(entry_vaddr + X64_JMP_RIPREL_SIZE);
+ i32 disp32 = (i32)(u32)((u64)disp & 0xffffffffu);
+ x64_write_jmp_riprel(dst, disp32);
+ x64_write_nop_pad(dst + X64_JMP_RIPREL_SIZE,
+ 16u - X64_JMP_RIPREL_SIZE);
+}
+
+/* IPLT (ifunc) trampoline stub (12 B):
+ *
+ * ff 25 disp32 ; jmpq *[rip + disp_to_slot] (6 B)
+ * 66 0f 1f 44 00 00 ; 6-byte multibyte NOP (6 B)
+ *
+ * Like the PLT entry, disp32 is from the END of the JMP to the
+ * .igot.plt slot. The displacement is invariant under image-base
+ * shift (both ends move together), so it's encoded inline and we
+ * report zero apply-time relocations. */
+static u32 x64_emit_iplt_stub(u8* dst, u64 stub_vaddr, u64 slot_vaddr,
+ LinkArchIPltReloc out[2]) {
+ (void)out;
+ i64 disp = (i64)slot_vaddr - (i64)(stub_vaddr + X64_JMP_RIPREL_SIZE);
+ i32 disp32 = (i32)(u32)((u64)disp & 0xffffffffu);
+ x64_write_jmp_riprel(dst, disp32);
+ x64_write_nop6(dst + X64_JMP_RIPREL_SIZE);
+ return 0;
+}
+
+const LinkArchDesc link_arch_x64 = {
+ .e_machine = EM_X86_64,
+ .default_musl_interp = "/lib/ld-musl-x86_64.so.1",
+
+ .elf_r_relative = ELF_R_X86_64_RELATIVE,
+ .elf_r_glob_dat = ELF_R_X86_64_GLOB_DAT,
+ .elf_r_jump_slot = ELF_R_X86_64_JUMP_SLOT,
+
+ .plt0_size = 32u,
+ .plt_entry_size = 16u,
+ .iplt_stub_size = 12u,
+
+ .emit_plt0 = x64_emit_plt0,
+ .emit_plt_entry = x64_emit_plt_entry,
+ .emit_iplt_stub = x64_emit_iplt_stub,
+};
diff --git a/src/link/link_dyn.c b/src/link/link_dyn.c
@@ -39,6 +39,7 @@
#include "core/util.h"
#include "core/vec.h"
#include "link/link.h"
+#include "link/link_arch.h"
#include "link/link_internal.h"
#include "obj/elf.h"
@@ -498,21 +499,32 @@ void layout_dyn(Linker* l, LinkImage* img) {
ImportLists imports;
ByteBuf dynstr;
u64 page;
+ const LinkArchDesc* arch;
if (!l->emit_pie) return;
+ arch = link_arch_desc_for(l->c);
+ if (!arch)
+ compiler_panic(img->c, no_loc(),
+ "link: layout_dyn: no arch descriptor for arch %u",
+ (u32)l->c->target.arch);
+
dyn = (LinkDynState*)h->alloc(h, sizeof(*dyn), _Alignof(LinkDynState));
if (!dyn) compiler_panic(img->c, no_loc(), "link: oom on dyn state");
memset(dyn, 0, sizeof(*dyn));
img->dyn = dyn;
img->pie = 1;
- /* PT_INTERP path. Default to musl's aarch64 loader when not set;
- * the only target this cut supports is aarch64-linux. */
+ /* PT_INTERP path. Default to the canonical musl loader matching the
+ * target arch (per-arch table in link_arch_<arch>.c) when the caller
+ * didn't set one. Drivers like cfree-cc always override via
+ * link_set_interp_path; this default is correctness for direct
+ * libcfree consumers. glibc users have to set their interp
+ * explicitly — we don't pick a default for them. */
dyn->interp_path =
l->interp_path
? l->interp_path
- : pool_intern_cstr(l->c->global, "/lib/ld-musl-aarch64.so.1");
+ : pool_intern_cstr(l->c->global, arch->default_musl_interp);
/* Step 1: enumerate imports + DT_NEEDED. */
collect_imports(l, img, h, &imports);
@@ -579,19 +591,19 @@ void layout_dyn(Linker* l, LinkImage* img) {
size_t namelen;
const char* interp_str = pool_str(l->c->global, dyn->interp_path, &namelen);
u64 interp_bytes = (u64)namelen + 1u;
- u64 dynsym_bytes = (u64)dyn->ndynsym * 24u;
+ u64 dynsym_bytes = (u64)dyn->ndynsym * ELF64_SYM_SIZE;
u64 dynstr_bytes = (u64)dyn->dynstr_len;
u64 gnuhash_bytes = (u64)dyn->gnu_hash_len;
/* rela.dyn / rela.plt sized for full capacity; emit only writes
* what's populated, but the section's file_size matches capacity
* so PT_LOAD/.rela.dyn shdr sh_size add up. Trailing zero records
* are harmless to the loader (R_AARCH64_NONE). */
- u64 rela_dyn_bytes = (u64)dyn->cap_rela_dyn * 24u;
- u64 rela_plt_bytes = (u64)dyn->nrela_plt * 24u;
+ u64 rela_dyn_bytes = (u64)dyn->cap_rela_dyn * ELF64_RELA_SIZE;
+ u64 rela_plt_bytes = (u64)dyn->nrela_plt * ELF64_RELA_SIZE;
u64 plt_bytes = (u64)(imports.nfuncs ? 32u + 16u * imports.nfuncs : 0u);
u64 gotplt_bytes = (u64)(imports.nfuncs ? 8u * (3u + imports.nfuncs) : 0u);
dyn->ndyn_entries = count_dynamic_entries(dyn);
- u64 dynamic_bytes = (u64)dyn->ndyn_entries * 16u;
+ u64 dynamic_bytes = (u64)dyn->ndyn_entries * ELF64_DYN_SIZE;
/* Step 5: place segments, page-aligned after the existing image
* span. New segments:
@@ -957,7 +969,7 @@ void layout_dyn(Linker* l, LinkImage* img) {
{
u32 si;
for (si = 0; si < dyn->ndynsym; ++si) {
- u8* p = ro_bytes + dynsym_off + (u64)si * 24u;
+ u8* p = ro_bytes + dynsym_off + (u64)si * ELF64_SYM_SIZE;
const DynSymRec* r = &dyn->dynsym[si];
wr_u32_le(p + 0, r->st_name);
p[4] = r->st_info;
@@ -1014,7 +1026,7 @@ void layout_dyn(Linker* l, LinkImage* img) {
}
r->r_addend = 0;
/* Serialize into segment bytes (will be re-serialized post-shift). */
- u8* p = ro_bytes + rela_plt_off + (u64)ki * 24u;
+ u8* p = ro_bytes + rela_plt_off + (u64)ki * ELF64_RELA_SIZE;
wr_u64_le(p + 0, r->r_offset);
wr_u64_le(p + 8, r->r_info);
wr_u64_le(p + 16, (u64)r->r_addend);
diff --git a/src/link/link_elf.c b/src/link/link_elf.c
@@ -826,7 +826,7 @@ void link_emit_elf(LinkImage* img, Writer* w) {
u32 i;
for (i = 0; i < dyn->nrela_dyn; ++i) {
const DynRela* rr = &dyn->rela_dyn[i];
- u8* rp = rd_bytes + (u64)i * 24u;
+ u8* rp = rd_bytes + (u64)i * ELF64_RELA_SIZE;
wr_u64_le(rp + 0, rr->r_offset);
wr_u64_le(rp + 8, rr->r_info);
wr_u64_le(rp + 16, (u64)rr->r_addend);
@@ -844,7 +844,7 @@ void link_emit_elf(LinkImage* img, Writer* w) {
u32 i;
for (i = 0; i < dyn->nrela_plt; ++i) {
const DynRela* rr = &dyn->rela_plt[i];
- u8* rp = rp_bytes + (u64)i * 24u;
+ u8* rp = rp_bytes + (u64)i * ELF64_RELA_SIZE;
wr_u64_le(rp + 0, rr->r_offset);
wr_u64_le(rp + 8, rr->r_info);
wr_u64_le(rp + 16, (u64)rr->r_addend);
diff --git a/src/link/link_jit.c b/src/link/link_jit.c
@@ -295,23 +295,9 @@ void* cfree_jit_lookup(CfreeJit* jit, const char* name) {
LinkSymId id;
const LinkSymbol* s;
if (!jit || !name) return NULL;
- /* C-symbol mangling: Mach-O on-disk names carry a leading `_` for
- * every C source-level symbol unconditionally (matching decl.c and
- * Apple cc). "test_main" → `_test_main`; `__init_array_start` →
- * `___init_array_start`. Mirrors link_intern_c_name. */
- if (jit->c->target.obj == CFREE_OBJ_MACHO) {
- size_t n = strlen(name);
- Heap* heap = (Heap*)jit->c->env->heap;
- char* buf = (char*)heap->alloc(heap, n + 2, 1);
- if (!buf) return NULL;
- buf[0] = '_';
- memcpy(buf + 1, name, n);
- buf[n + 1] = 0;
- sym = pool_intern(jit->c->global, buf, (u32)(n + 1));
- heap->free(heap, buf, n + 2);
- } else {
- sym = pool_intern_cstr(jit->c->global, name);
- }
+ /* C-symbol mangling lives in obj_format_c_mangle so JIT lookups by
+ * source-level name find the symbol regardless of target format. */
+ sym = obj_format_c_mangle(jit->c, name);
id = symhash_get(&jit->image->globals, sym);
if (id == LINK_SYM_NONE) return NULL;
s = LinkSyms_at(&jit->image->syms, id - 1);
diff --git a/src/link/link_layout.c b/src/link/link_layout.c
@@ -17,6 +17,7 @@
#include "core/util.h"
#include "core/vec.h"
#include "link/link.h"
+#include "link/link_arch.h"
#include "link/link_internal.h"
LinkImage* link_image_alloc(Compiler*); /* defined in link.c */
@@ -379,13 +380,9 @@ static void resolve_undefs(Linker* l, LinkImage* img) {
size_t namelen;
const char* nm = s->name ? pool_str(l->c->global, s->name, &namelen)
: (namelen = 0, "");
- /* On Mach-O the on-disk name carries a leading `_` C-mangle
- * byte; strip it for display so the diagnostic surface matches
- * the source-level symbol name across formats. */
- if (l->c->target.obj == CFREE_OBJ_MACHO && namelen >= 1 && nm[0] == '_') {
- ++nm;
- --namelen;
- }
+ /* Strip the format's C-mangle byte so the diagnostic shows the
+ * source-level name (matches decl.c's emit policy). */
+ obj_format_demangle_c(l->c, &nm, &namelen);
compiler_panic(l->c, no_loc(), "link: undefined reference to '%.*s'",
(int)namelen, nm);
}
@@ -1042,26 +1039,11 @@ static u64 eval_link_expr(Linker* l, LinkImage* img, u64 dot,
}
}
-/* Format-aware C-symbol mangling for linker-synthesized boundaries:
- * Mach-O prefixes every C identifier with `_` on disk (so an
- * `extern void (*__init_array_start[])(void);` compiles to a reference
- * to `___init_array_start`). ELF and others use the name verbatim. */
+/* Format-aware C-symbol mangling for linker-synthesized boundaries.
+ * Defers to obj_format_c_mangle so the boundary symbols match the
+ * mangling decl.c emits for the corresponding `extern` references. */
static Sym boundary_name(Linker* l, const char* name) {
- Compiler* c = l->c;
- if (c->target.obj == CFREE_OBJ_MACHO) {
- Heap* h = c->env->heap;
- size_t n = strlen(name);
- char* buf = (char*)h->alloc(h, n + 2u, 1);
- Sym s;
- if (!buf) return pool_intern_cstr(c->global, name);
- buf[0] = '_';
- memcpy(buf + 1, name, n);
- buf[n + 1] = 0;
- s = pool_intern(c->global, buf, (u32)(n + 1u));
- h->free(h, buf, n + 2u);
- return s;
- }
- return pool_intern_cstr(c->global, name);
+ return obj_format_c_mangle(l->c, name);
}
static void emit_boundary_sym(Linker* l, LinkImage* img, const char* name,
@@ -2283,6 +2265,11 @@ static void layout_iplt(Linker* l, LinkImage* img) {
Sym ifunc_init_name = 0;
Sym pairs_section_name;
Sym init_section_name;
+ const LinkArchDesc* arch = link_arch_desc_for(l->c);
+ if (!arch)
+ compiler_panic(img->c, no_loc(),
+ "link: layout_iplt: no arch descriptor for arch %u",
+ (u32)l->c->target.arch);
/* Pass A: count canonical IFUNC defs. resolve_undefs copies
* the def's kind into each cross-TU undef LinkSymbol of the
@@ -2309,7 +2296,7 @@ static void layout_iplt(Linker* l, LinkImage* img) {
base_vaddr = ALIGN_UP(base_vaddr, (u64)(page));
iplt_vaddr = base_vaddr;
- iplt_size = (u64)nifunc * 12u;
+ iplt_size = (u64)nifunc * (u64)arch->iplt_stub_size;
igot_vaddr = ALIGN_UP(iplt_vaddr + iplt_size, (u64)(page));
igot_size = (u64)nifunc * 8u;
pairs_vaddr = ALIGN_UP(igot_vaddr + igot_size, (u64)(page));
@@ -2546,58 +2533,16 @@ static void layout_iplt(Linker* l, LinkImage* img) {
img->iplt_pairs[2u * slot_idx + 0] = resolver_vaddr;
img->iplt_pairs[2u * slot_idx + 1] = slot_vaddr;
- stub_dst = iplt_bytes + (size_t)(slot_idx * 12u);
- switch (img->c->target.arch) {
- case CFREE_ARCH_ARM_64: {
- /* AArch64: ADRP x16, %page(slot) ; LDR x16, [x16, :lo12:slot]
- * ; BR x16. The two immediates are zeroed and patched at apply
- * time by the ADR_PREL_PG_HI21 / LDST64_ABS_LO12_NC relocs
- * emitted below. */
- wr_u32_le(stub_dst + 0, 0x90000010u); /* ADRP x16, #0 */
- wr_u32_le(stub_dst + 4, 0xf9400210u); /* LDR x16, [x16] */
- wr_u32_le(stub_dst + 8, 0xd61f0200u); /* BR x16 */
- break;
- }
- case CFREE_ARCH_RV64: {
- /* RV64: AUIPC t1, %hi ; LD t1, %lo(t1) ; JR t1. Both
- * displacements are PC-relative differences between the
- * stub and its slot — invariant under the segment-base
- * shift — so we encode them directly without relocs. */
- i64 disp = (i64)slot_vaddr - (i64)stub_vaddr;
- u32 hi20 = (u32)(((u64)(disp + 0x800)) >> 12) & 0xfffffu;
- u32 lo12 = (u32)((u64)disp & 0xfffu);
- u32 auipc = 0x00000317u | (hi20 << 12); /* auipc t1, hi */
- u32 ld_t1 = 0x00033303u | (lo12 << 20); /* ld t1, lo(t1) */
- u32 jr_t1 = 0x00030067u; /* jalr x0,t1,0 */
- wr_u32_le(stub_dst + 0, auipc);
- wr_u32_le(stub_dst + 4, ld_t1);
- wr_u32_le(stub_dst + 8, jr_t1);
- break;
- }
- case CFREE_ARCH_X86_64: {
- /* x86_64: `jmpq *slot(%rip)` (FF 25 disp32), 6 bytes, padded
- * to 12 with a 6-byte multi-byte NOP. disp32 is measured from
- * the end of the JMP (stub+6) to the slot. Like RV64, the
- * stub→slot displacement is invariant under image-base shift
- * so we encode it directly without a relocation. */
- i64 disp = (i64)slot_vaddr - (i64)(stub_vaddr + 6u);
- stub_dst[0] = 0xffu; /* opcode: JMP r/m64 */
- stub_dst[1] = 0x25u; /* ModRM: 00 100 101 — RIP+disp32 */
- wr_u32_le(stub_dst + 2, (u32)((u64)disp & 0xffffffffu));
- /* 6-byte multibyte NOP: 66 0F 1F 44 00 00 (NOPW 0(%rax,%rax,1)). */
- stub_dst[6] = 0x66u;
- stub_dst[7] = 0x0fu;
- stub_dst[8] = 0x1fu;
- stub_dst[9] = 0x44u;
- stub_dst[10] = 0x00u;
- stub_dst[11] = 0x00u;
- break;
- }
- default:
- compiler_panic(img->c, no_loc(),
- "link: ifunc iplt stub not implemented for arch %u",
- (unsigned)img->c->target.arch);
- }
+ /* Stub bytes and any apply-time relocs are arch-specific; the
+ * descriptor's emit_iplt_stub returns the relocs (offset / width /
+ * kind within the stub) and the caller fills in the section /
+ * vaddr fields below. Arches that can encode the stub→slot
+ * displacement inline (x64, rv64) report 0 relocs; aa64 reports 2
+ * (ADR_PREL_PG_HI21 + LDST64_ABS_LO12_NC). */
+ stub_dst = iplt_bytes + (size_t)slot_idx * (size_t)arch->iplt_stub_size;
+ LinkArchIPltReloc iplt_relocs[2];
+ u32 niplt_relocs =
+ arch->emit_iplt_stub(stub_dst, stub_vaddr, slot_vaddr, iplt_relocs);
/* Synthetic local symbol for the .igot.plt slot. */
memset(&slot_rec, 0, sizeof(slot_rec));
@@ -2625,35 +2570,27 @@ static void layout_iplt(Linker* l, LinkImage* img) {
resolver_rec.size = 0;
resolver_id = append_symbol(img, &resolver_rec);
- if (img->c->target.arch == CFREE_ARCH_ARM_64) {
- /* Reloc on the ADRP at stub+0. RV64's stub is fully encoded
- * inline above and needs no apply-time fixups. */
- memset(&rrec, 0, sizeof(rrec));
- rrec.input_id = LINK_INPUT_NONE;
- rrec.section_id = OBJ_SEC_NONE;
- rrec.link_section_id = iplt_sec->id;
- rrec.offset = (u32)(slot_idx * 12u);
- rrec.width = 4;
- rrec.write_vaddr = stub_vaddr;
- rrec.write_file_offset = stub_vaddr;
- rrec.kind = R_AARCH64_ADR_PREL_PG_HI21;
- rrec.target = slot_id;
- rrec.addend = 0;
- *append_reloc_slot(img) = rrec;
-
- /* Reloc on the LDR at stub+4. */
- memset(&rrec, 0, sizeof(rrec));
- rrec.input_id = LINK_INPUT_NONE;
- rrec.section_id = OBJ_SEC_NONE;
- rrec.link_section_id = iplt_sec->id;
- rrec.offset = (u32)(slot_idx * 12u + 4u);
- rrec.width = 4;
- rrec.write_vaddr = stub_vaddr + 4u;
- rrec.write_file_offset = stub_vaddr + 4u;
- rrec.kind = R_AARCH64_LDST64_ABS_LO12_NC;
- rrec.target = slot_id;
- rrec.addend = 0;
- *append_reloc_slot(img) = rrec;
+ /* Apply-time fixups for arches that can't encode the stub→slot
+ * displacement inline. The arch reported (offset_in_stub, width,
+ * kind) for each; everything else (section, target, vaddrs) is
+ * the linker's job. */
+ {
+ u32 ri;
+ for (ri = 0; ri < niplt_relocs; ++ri) {
+ memset(&rrec, 0, sizeof(rrec));
+ rrec.input_id = LINK_INPUT_NONE;
+ rrec.section_id = OBJ_SEC_NONE;
+ rrec.link_section_id = iplt_sec->id;
+ rrec.offset = (u32)(slot_idx * arch->iplt_stub_size) +
+ iplt_relocs[ri].offset_in_stub;
+ rrec.width = iplt_relocs[ri].width;
+ rrec.write_vaddr = stub_vaddr + iplt_relocs[ri].offset_in_stub;
+ rrec.write_file_offset = rrec.write_vaddr;
+ rrec.kind = iplt_relocs[ri].kind;
+ rrec.target = slot_id;
+ rrec.addend = 0;
+ *append_reloc_slot(img) = rrec;
+ }
}
/* .iplt.pairs[i].resolver = &resolver (R_ABS64) */
@@ -2690,9 +2627,9 @@ static void layout_iplt(Linker* l, LinkImage* img) {
* hidden behind the stub. */
s->kind = SK_FUNC;
s->section_id = iplt_sec->id;
- s->value = (u64)(slot_idx * 12u);
+ s->value = (u64)slot_idx * (u64)arch->iplt_stub_size;
s->vaddr = stub_vaddr;
- s->size = 12;
+ s->size = arch->iplt_stub_size;
++slot_idx;
}
diff --git a/src/link/link_macho.c b/src/link/link_macho.c
@@ -39,6 +39,7 @@
#include <string.h>
+#include "arch/aa64_isa.h"
#include "core/bytes.h"
#include "core/heap.h"
#include "core/pool.h"
@@ -296,6 +297,10 @@ static LinkSymbol* sym_at(LinkImage* img, LinkSymId id) {
static void write_u32(u8* p, u32 v) { wr_u32_le(p, v); }
+/* Mach-O __stubs scratch register: x16 (matches the AArch64 PLT ABI's
+ * IP0 scratch convention used for both ADRP base and BR target). */
+#define MZ_STUB_SCRATCH_X16 16u
+
/* Encode a 12-byte arm64 stub:
* ADRP x16, __got_slot@PAGE
* LDR x16, [x16, #__got_slot@PAGEOFF]
@@ -309,12 +314,10 @@ static void encode_stub(u8* out, u64 stub_vaddr, u64 got_slot_vaddr) {
u32 immhi = (u32)((imm21 >> 2) & 0x7ffffu);
u32 lo12 = (u32)(got_slot_vaddr & 0xfffu);
u32 imm12_ldr = (lo12 >> 3) & 0xfffu; /* slot 8-byte aligned */
- /* ADRP x16, ... */
- write_u32(out + 0, 0x90000010u | (immlo << 29) | (immhi << 5));
- /* LDR x16, [x16, #imm] */
- write_u32(out + 4, 0xF9400210u | (imm12_ldr << 10));
- /* BR x16 */
- write_u32(out + 8, 0xD61F0200u);
+ write_u32(out + 0, aa64_adrp(MZ_STUB_SCRATCH_X16, immlo, immhi));
+ write_u32(out + 4, aa64_ldr64_uimm12(MZ_STUB_SCRATCH_X16,
+ MZ_STUB_SCRATCH_X16, imm12_ldr));
+ write_u32(out + 8, aa64_br(MZ_STUB_SCRATCH_X16));
}
/* ---- pass: collect imports ---- */
diff --git a/src/obj/elf.h b/src/obj/elf.h
@@ -52,12 +52,16 @@
#define EM_AARCH64 0xB7
#define EM_RISCV 0xF3
-/* ---- header sizes (also literal e_*size fields) ---- */
+/* ---- header sizes (also literal e_*size fields) ----
+ * On-disk sizes of the ELF64 records the linker emits. Wire-format
+ * constants (per spec); never compute via sizeof on a host struct
+ * since alignment / padding may diverge. */
#define ELF64_EHDR_SIZE 64
#define ELF64_SHDR_SIZE 64
#define ELF64_PHDR_SIZE 56
-#define ELF64_SYM_SIZE 24
-#define ELF64_RELA_SIZE 24
+#define ELF64_SYM_SIZE 24u
+#define ELF64_RELA_SIZE 24u
+#define ELF64_DYN_SIZE 16u
/* ---- special section indices ---- */
#define SHN_UNDEF 0u
diff --git a/src/obj/obj.h b/src/obj/obj.h
@@ -425,6 +425,35 @@ Sym obj_secname_tbss(Compiler*);
* patches the direct ADRP/ADD bytes in place. */
int obj_format_extern_via_got(const Compiler*);
+/* True when `sym` must be reached via the GOT at the current site: the
+ * format binds extern data through indirection
+ * (obj_format_extern_via_got) AND the symbol is undefined in this
+ * object (section_id == OBJ_SEC_NONE). Pure format/symbol policy with
+ * no per-arch behavior — shared by every backend that emits GOT loads. */
+int obj_symbol_extern_via_got(const Compiler*, ObjBuilder*, ObjSymId);
+
+/* Apply the active object format's C-symbol mangling to `name` (a
+ * NUL-terminated C string) and return the result interned in
+ * `c->global`. Mach-O prepends a single `_`; ELF / COFF / Wasm intern
+ * verbatim. Mirrors the on-disk policy that decl.c / cc.c emit, so
+ * link-time and JIT-time lookups by source-level name find the symbol
+ * regardless of target. Mach-O temp buffer is allocated from
+ * `c->env->heap`. */
+Sym obj_format_c_mangle(Compiler*, const char* name);
+
+/* Inverse of obj_format_c_mangle for diagnostic display: if `*name`
+ * carries the active format's leading C-mangle byte, advance the
+ * pointer past it and decrement `*len`. No-op for formats with no
+ * prefix. Lets diagnostics print the source-level symbol name across
+ * targets. */
+void obj_format_demangle_c(const Compiler*, const char** name, size_t* len);
+
+/* Default entry symbol name for a freshly created Linker on the active
+ * object format: `_main` for Mach-O (LC_MAIN names main, dyld owns
+ * startup), `_start` for ELF / COFF / Wasm (set by crt1.o). Returned
+ * as a NUL-terminated literal; the caller interns. */
+const char* obj_format_default_entry_name(const Compiler*);
+
/* ---- file format emitters ---- */
void emit_elf(Compiler*, ObjBuilder*, Writer*);
void emit_coff(Compiler*, ObjBuilder*, Writer*);
diff --git a/src/obj/obj_secnames.c b/src/obj/obj_secnames.c
@@ -18,7 +18,11 @@
* panics in the same way and is filled in later. */
#include "obj/obj.h"
+
+#include <string.h>
+
#include "core/core.h"
+#include "core/heap.h"
#include "core/pool.h"
static Sym secname_panic_unimpl(Compiler* c, const char* which) {
@@ -107,3 +111,63 @@ int obj_format_extern_via_got(const Compiler* c) {
return 1;
return 0;
}
+
+int obj_symbol_extern_via_got(const Compiler* c, ObjBuilder* obj,
+ ObjSymId sym) {
+ const ObjSym* s;
+ if (!obj_format_extern_via_got(c)) return 0;
+ s = obj_symbol_get(obj, sym);
+ return s && s->section_id == OBJ_SEC_NONE;
+}
+
+/* C-symbol mangling for the active object format. Mach-O prepends a
+ * single `_` to every C source-level symbol on disk (matching Apple cc
+ * and decl.c): "main" → `_main`, "_start" → `__start`,
+ * "__init_array_start" → `___init_array_start`. ELF / COFF / Wasm
+ * intern verbatim. The temp buffer for the Mach-O case comes from
+ * `c->env->heap`, the same allocator the existing call sites
+ * (boundary_name, cfree_jit_lookup, link_intern_c_name) already use. */
+Sym obj_format_c_mangle(Compiler* c, const char* name) {
+ size_t n;
+ Heap* h;
+ char* buf;
+ Sym s;
+ SrcLoc loc = {0, 0, 0};
+ if (!c || !name) return 0;
+ if (c->target.obj != CFREE_OBJ_MACHO)
+ return pool_intern_cstr(c->global, name);
+ n = strlen(name);
+ h = (Heap*)c->env->heap;
+ buf = (char*)h->alloc(h, n + 2u, 1);
+ if (!buf)
+ compiler_panic(c, loc, "obj_format_c_mangle: oom prefixing '%s'", name);
+ buf[0] = '_';
+ memcpy(buf + 1, name, n);
+ buf[n + 1] = 0;
+ s = pool_intern(c->global, buf, (u32)(n + 1u));
+ h->free(h, buf, n + 2u);
+ return s;
+}
+
+/* Inverse of obj_format_c_mangle for diagnostic display. Strips the
+ * format's leading C-mangle byte from `*name` (advancing the pointer
+ * and decrementing `*len`) so panic text shows the source-level name
+ * regardless of target format. No-op for formats with no prefix. */
+void obj_format_demangle_c(const Compiler* c, const char** name,
+ size_t* len) {
+ if (!c || !name || !len || !*name) return;
+ if (c->target.obj == CFREE_OBJ_MACHO && *len >= 1u && (*name)[0] == '_') {
+ ++(*name);
+ --(*len);
+ }
+}
+
+/* Default entry symbol name baked into a freshly created Linker for
+ * this object format. Mach-O uses `_main` because LC_MAIN names main
+ * directly (dyld owns C runtime startup); ELF / COFF / Wasm use the
+ * historical `_start` produced by crt1.o. Returned as a NUL-terminated
+ * literal; caller interns. */
+const char* obj_format_default_entry_name(const Compiler* c) {
+ if (c && c->target.obj == CFREE_OBJ_MACHO) return "_main";
+ return "_start";
+}
diff --git a/test/libc/run.sh b/test/libc/run.sh
@@ -279,9 +279,12 @@ configure_cell() {
# Podman image per (arch, libc). Pinning the arch-specific repo
# (arm64v8/, amd64/, riscv64/) avoids the manifest-lookup detour
# that --platform triggers on hosts whose podman cache is mixed.
+ # Tags are pinned to the same versions the sysroot extract.sh /
+ # Containerfile pulls — so the image is guaranteed cached, and
+ # --pull=never on `podman run` (below) skips a docker.io round-trip.
case "$libc:$arch" in
- musl:aarch64) PODMAN_IMAGE="docker.io/arm64v8/alpine:latest" ;;
- musl:x64) PODMAN_IMAGE="docker.io/amd64/alpine:latest" ;;
+ musl:aarch64) PODMAN_IMAGE="docker.io/arm64v8/alpine:3.20.10" ;;
+ musl:x64) PODMAN_IMAGE="docker.io/amd64/alpine:3.20.10" ;;
musl:rv64) PODMAN_IMAGE="docker.io/riscv64/alpine:edge" ;;
glibc:aarch64) PODMAN_IMAGE="docker.io/arm64v8/debian:bookworm-slim" ;;
glibc:x64) PODMAN_IMAGE="docker.io/amd64/debian:bookworm-slim" ;;
@@ -349,7 +352,11 @@ exec_one() {
local dir base
dir="$(cd "$(dirname "$exe")" && pwd)"
base="$(basename "$exe")"
- podman run --rm --platform "$PODMAN_PLATFORM" --net=none \
+ # --pull=never skips a docker.io manifest lookup that otherwise
+ # adds ~30 s per invocation even when the image is cached
+ # locally (per-cell images are pre-pulled by extract.sh).
+ podman run --rm --pull=never --platform "$PODMAN_PLATFORM" \
+ --net=none \
-v "$dir":/work:Z -w /work \
"$PODMAN_IMAGE" "./$base" \
>"$out" 2>"$err"