kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit f2a928e2f8901aec26bed7d2a6465e505109c280
parent 4fb9c5b2564b3755aea2b0f468b93988c849dd32
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun, 10 May 2026 10:41:02 -0700

arch/abi/obj: MULTIARCH Phase 2 — multi-arch seams + x64 stubs

Splits the AArch64-specific construction sites into per-arch TUs behind
arch-agnostic dispatchers, so adding a real x86_64 backend in phase 3 is
purely additive (new files only). aarch64 output is unchanged.

- arch/cgtarget.c owns the public cgtarget_new and switches on
  c->target.arch; aarch64.c::cgtarget_new renamed to aa64_cgtarget_new
  in arch/aa64.h. arch/x64.{h,c} adds the x64 CGTarget skeleton with a
  full vtable wired up to "x64: <method> not implemented" stubs.

- TargetABI carries an ABIVtable* (compute_func_info, va_list_type)
  selected by abi_init on (target.arch, target.os). The AAPCS64
  classifier moves out of abi.c into abi/abi_aapcs64.c; abi/abi_sysv_x64.c
  exposes sysv_x64_vtable with ABI_ARG_INDIRECT for everything plus the
  __va_list_tag struct (correct, slow, unblocks bring-up).

- RelocKind grows R_X64_PC8/PLT32/GOTPCREL/{GLOB_DAT,JUMP_SLOT,RELATIVE,
  COPY}. obj/elf_reloc_x86_64.c mirrors elf_reloc_aarch64.c. emit_elf
  and link_emit_elf (renamed from link_emit_elf_aarch64) replace their
  AArch64-only panic with a switch picking e_machine + reloc translator
  from c->target.arch.

Diffstat:
Msrc/abi/abi.c | 211++++++++++++++++++-------------------------------------------------------------
Asrc/abi/abi_aapcs64.c | 147+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/abi/abi_internal.h | 54++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/abi/abi_sysv_x64.c | 79+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/api/pipeline.c | 14++++++++++++++
Asrc/arch/aa64.h | 8++++++++
Msrc/arch/aarch64.c | 18+-----------------
Asrc/arch/cgtarget.c | 33+++++++++++++++++++++++++++++++++
Msrc/arch/mc.c | 12++++++++----
Asrc/arch/x64.c | 387+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/arch/x64.h | 8++++++++
Msrc/link/link.c | 2+-
Msrc/link/link_dyn.c | 4++--
Msrc/link/link_elf.c | 25++++++++++++++++++++-----
Msrc/link/link_internal.h | 5+++--
Msrc/link/link_layout.c | 2+-
Msrc/link/link_reloc.c | 2+-
Msrc/obj/elf.h | 33++++++++++++++++++++++++++++++---
Msrc/obj/elf_emit.c | 40+++++++++++++++++++++++++++-------------
Asrc/obj/elf_reloc_x86_64.c | 84+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/obj/obj.h | 10++++++++++
21 files changed, 964 insertions(+), 214 deletions(-)

diff --git a/src/abi/abi.c b/src/abi/abi.c @@ -1,46 +1,31 @@ -/* TargetABI for AArch64 SysV (AAPCS64). +/* TargetABI dispatch and shared C-standard layout. * - * Single authority for target-dependent C layout and calling convention - * decisions. Type stays structural and ABI-neutral; sizes, alignments, - * record layouts, and argument/return classification are derived here - * from Compiler.target. + * The single authority for target-dependent C layout and calling + * convention decisions. Type stays structural and ABI-neutral; sizes, + * alignments, record layouts, and argument/return classification are + * derived here from Compiler.target. * - * v1 implements only AArch64 SysV at the level needed for the cg test - * harness (Group A scalars + Group C arithmetic + the lowering surface - * for Group B). Other arches will land alongside their backends. */ + * Per-ABI bits (function classification, __va_list shape) live in + * abi_aapcs64.c, abi_sysv_x64.c, ... abi_init switches on + * (target.arch, target.os) and installs the right vtable. The C- + * standard-driven scalar profile and record layout stay here so all + * ABIs share one impl. */ #include "abi/abi.h" #include <cfree.h> #include <string.h> +#include "abi/abi_internal.h" #include "core/arena.h" #include "core/core.h" #include "core/pool.h" -typedef struct FuncInfoCacheEntry FuncInfoCacheEntry; -struct FuncInfoCacheEntry { - const Type* fn; - ABIFuncInfo* info; - FuncInfoCacheEntry* next; -}; - -typedef struct RecordLayoutCacheEntry RecordLayoutCacheEntry; -struct RecordLayoutCacheEntry { - const Type* ty; - ABIRecordLayout* layout; - RecordLayoutCacheEntry* next; -}; - -struct TargetABI { - Compiler* c; - /* Per-TU cached lookups. */ - FuncInfoCacheEntry* fn_cache; - RecordLayoutCacheEntry* rec_cache; - const Type* va_list_cache; -}; - -/* ---- scalar profile ---- */ +/* ---- scalar profile ---- + * + * Shared by all currently supported ABIs (LP64 on Linux for both + * aarch64 and x86_64). When a Windows-x64 (LLP64) or 32-bit ABI lands, + * promote prim_info into the vtable. */ static ABITypeInfo prim_info(TargetABI* a, TypeKind k) { ABITypeInfo r = {0, 0, ABI_SC_INT, 0, 0, 0}; @@ -166,7 +151,7 @@ ABITypeInfo abi_type_info(TargetABI* a, const Type* t) { return abi_type_info( a, t->enm.base ? t->enm.base : type_prim(a->c->global, TY_INT)); case TY_FUNC: - /* sizeof(function) is undefined in C; AAPCS uses 1 for arithmetic. */ + /* sizeof(function) is undefined in C; use 1 for arithmetic. */ r.size = 1; r.align = 1; return r; @@ -175,12 +160,20 @@ ABITypeInfo abi_type_info(TargetABI* a, const Type* t) { } } +ABITypeInfo abi_internal_type_info(TargetABI* a, const Type* t) { + return abi_type_info(a, t); +} + u32 abi_sizeof(TargetABI* a, const Type* t) { return abi_type_info(a, t).size; } u32 abi_alignof(TargetABI* a, const Type* t) { return abi_type_info(a, t).align; } -/* ---- record layout (struct/union) ---- */ +/* ---- record layout (struct/union) ---- + * + * Shared by all currently supported ABIs: storage-unit-based layout with + * natural alignment, no bitfield packing extensions. When a Windows-x64 + * (MSVC bitfield rules) ABI lands, promote this into the vtable. */ static ABIRecordLayout* compute_record_layout(TargetABI* a, const Type* t) { ABIRecordLayout* L = arena_new(a->c->tu, ABIRecordLayout); @@ -243,125 +236,14 @@ const ABIRecordLayout* abi_record_layout(TargetABI* a, const Type* t) { return L; } -/* ---- function classification (AArch64 SysV / AAPCS64) ---- - * - * v1 covers the cases the cg test harness exercises: - * void -> IGNORE - * integer ≤ 8B -> DIRECT, one INT part in a register - * integer 16B -> DIRECT, two INT parts (X0+X1) - * pointer -> DIRECT, one INT part in a register - * float/double -> DIRECT, one FP part in a register - * small struct -> DIRECT, INT parts (HFA/HVA refinement: TODO) - * large struct -> INDIRECT (sret for return; passed by reference) - * Variadics, HFA classification, and split GPR+stack tail still - * land with the parser. */ - -static void classify_scalar(TargetABI* a, const Type* t, ABIArgInfo* out) { - ABITypeInfo ti = abi_type_info(a, t); - out->kind = ABI_ARG_DIRECT; - out->flags = ABI_AF_NONE; - out->indirect_align = 0; - - ABIArgPart* parts = arena_new(a->c->tu, ABIArgPart); - memset(parts, 0, sizeof *parts); - parts->cls = (ti.scalar_kind == ABI_SC_FLOAT) ? ABI_CLASS_FP : ABI_CLASS_INT; - parts->loc = ABI_LOC_REG; - parts->size = ti.size; - parts->align = ti.align; - parts->src_offset = 0; - - out->parts = parts; - out->nparts = 1; -} - -static void classify_void(ABIArgInfo* out) { - memset(out, 0, sizeof *out); - out->kind = ABI_ARG_IGNORE; -} - -static void classify_aggregate(TargetABI* a, const Type* t, ABIArgInfo* out, - int is_return) { - ABITypeInfo ti = abi_type_info(a, t); - if (ti.size == 0) { - classify_void(out); - return; - } - /* AAPCS64: aggregates ≤ 16 bytes pass in up to 2 GPRs (or HFA in FP regs; - * v1 ignores HFA). Larger aggregates pass by reference (caller copy for - * args, sret pointer for return). */ - if (ti.size <= 16) { - u32 nparts = (ti.size + 7) / 8; - ABIArgPart* parts = arena_array(a->c->tu, ABIArgPart, nparts); - memset(parts, 0, sizeof(ABIArgPart) * nparts); - u32 off = 0; - for (u32 i = 0; i < nparts; ++i) { - u32 chunk = (ti.size - off > 8) ? 8 : (ti.size - off); - parts[i].cls = ABI_CLASS_INT; - parts[i].loc = ABI_LOC_REG; - parts[i].size = chunk; - parts[i].align = 8; - parts[i].src_offset = off; - off += chunk; - } - out->kind = ABI_ARG_DIRECT; - out->flags = ABI_AF_NONE; - out->parts = parts; - out->nparts = (u16)nparts; - out->indirect_align = 0; - } else { - out->kind = ABI_ARG_INDIRECT; - out->flags = is_return ? ABI_AF_SRET : ABI_AF_BYVAL; - out->indirect_align = ti.align; - out->parts = NULL; - out->nparts = 0; - } -} - -static void classify_one(TargetABI* a, const Type* t, ABIArgInfo* out, - int is_return) { - if (!t || t->kind == TY_VOID) { - classify_void(out); - return; - } - switch (t->kind) { - case TY_STRUCT: - case TY_UNION: - classify_aggregate(a, t, out, is_return); - return; - default: - classify_scalar(a, t, out); - return; - } -} - -static ABIFuncInfo* compute_func_info(TargetABI* a, const Type* fn) { - ABIFuncInfo* info = arena_new(a->c->tu, ABIFuncInfo); - memset(info, 0, sizeof *info); - - classify_one(a, fn->fn.ret, &info->ret, /*is_return=*/1); - info->has_sret = (info->ret.kind == ABI_ARG_INDIRECT) ? 1 : 0; - info->variadic = fn->fn.variadic; - - info->nparams = fn->fn.nparams; - if (fn->fn.nparams) { - ABIArgInfo* arr = arena_array(a->c->tu, ABIArgInfo, fn->fn.nparams); - memset(arr, 0, sizeof(ABIArgInfo) * fn->fn.nparams); - for (u16 i = 0; i < fn->fn.nparams; ++i) { - classify_one(a, fn->fn.params[i], &arr[i], /*is_return=*/0); - } - info->params = arr; - } else { - info->params = NULL; - } - return info; -} +/* ---- function classification (vtabled) ---- */ const ABIFuncInfo* abi_func_info(TargetABI* a, const Type* fn_type) { if (!fn_type || fn_type->kind != TY_FUNC) return NULL; for (FuncInfoCacheEntry* e = a->fn_cache; e; e = e->next) { if (e->fn == fn_type) return e->info; } - ABIFuncInfo* info = compute_func_info(a, fn_type); + ABIFuncInfo* info = a->vt->compute_func_info(a, fn_type); if (!info) return NULL; FuncInfoCacheEntry* e = arena_new(a->c->tu, FuncInfoCacheEntry); e->fn = fn_type; @@ -392,35 +274,33 @@ const Type* abi_intptr_type(TargetABI* a, Pool* p) { const Type* abi_uintptr_type(TargetABI* a, Pool* p) { return size_or_uintptr(a, p); } + const Type* abi_va_list_type(TargetABI* a, Pool* p) { - /* AAPCS64 __va_list: 3 pointers (__stack, __gr_top, __vr_top) followed - * by 2 ints (__gr_offs, __vr_offs). Total 32 bytes, 8-aligned. */ if (a->va_list_cache) return a->va_list_cache; - const Type* vp = type_ptr(p, type_void(p)); - const Type* it = type_prim(p, TY_INT); - Sym name = pool_intern_cstr(p, "__va_list"); - SrcLoc nl = {0, 0, 0}; - TagId tg = type_tag_new(p, TAG_STRUCT, name, nl); - TypeRecordBuilder* b = type_record_begin(p, TY_STRUCT, tg, name); - type_record_field( - b, (Field){.name = pool_intern_cstr(p, "__stack"), .type = vp}); - type_record_field( - b, (Field){.name = pool_intern_cstr(p, "__gr_top"), .type = vp}); - type_record_field( - b, (Field){.name = pool_intern_cstr(p, "__vr_top"), .type = vp}); - type_record_field( - b, (Field){.name = pool_intern_cstr(p, "__gr_offs"), .type = it}); - type_record_field( - b, (Field){.name = pool_intern_cstr(p, "__vr_offs"), .type = it}); - a->va_list_cache = type_record_end(p, b); + a->va_list_cache = a->vt->va_list_type(a, p); return a->va_list_cache; } /* ---- lifecycle ---- */ +static const ABIVtable* select_vtable(Compiler* c) { + switch (c->target.arch) { + case CFREE_ARCH_ARM_64: + return &aapcs64_vtable; + case CFREE_ARCH_X86_64: + return &sysv_x64_vtable; + default: { + SrcLoc loc = {0, 0, 0}; + compiler_panic(c, loc, "abi_init: unsupported target arch %d", + (int)c->target.arch); + } + } +} + void abi_init(TargetABI* a, Compiler* c) { memset(a, 0, sizeof *a); a->c = c; + a->vt = select_vtable(c); } void abi_fini(TargetABI* a) { @@ -429,6 +309,7 @@ void abi_fini(TargetABI* a) { a->fn_cache = NULL; a->rec_cache = NULL; a->va_list_cache = NULL; + a->vt = NULL; a->c = NULL; } diff --git a/src/abi/abi_aapcs64.c b/src/abi/abi_aapcs64.c @@ -0,0 +1,147 @@ +/* AAPCS64 (AArch64 SysV) ABI dispatch. + * + * v1 covers the cases the cg test harness exercises: + * void -> IGNORE + * integer ≤ 8B -> DIRECT, one INT part in a register + * integer 16B -> DIRECT, two INT parts (X0+X1) + * pointer -> DIRECT, one INT part in a register + * float/double -> DIRECT, one FP part in a register + * small struct -> DIRECT, INT parts (HFA/HVA refinement: TODO) + * large struct -> INDIRECT (sret for return; passed by reference) + * Variadics, HFA classification, and split GPR+stack tail still + * land with the parser. */ + +#include <string.h> + +#include "abi/abi_internal.h" +#include "core/arena.h" +#include "core/core.h" +#include "core/pool.h" + +static void classify_scalar(TargetABI* a, const Type* t, ABIArgInfo* out) { + ABITypeInfo ti = abi_internal_type_info(a, t); + out->kind = ABI_ARG_DIRECT; + out->flags = ABI_AF_NONE; + out->indirect_align = 0; + + ABIArgPart* parts = arena_new(a->c->tu, ABIArgPart); + memset(parts, 0, sizeof *parts); + parts->cls = (ti.scalar_kind == ABI_SC_FLOAT) ? ABI_CLASS_FP : ABI_CLASS_INT; + parts->loc = ABI_LOC_REG; + parts->size = ti.size; + parts->align = ti.align; + parts->src_offset = 0; + + out->parts = parts; + out->nparts = 1; +} + +static void classify_void(ABIArgInfo* out) { + memset(out, 0, sizeof *out); + out->kind = ABI_ARG_IGNORE; +} + +static void classify_aggregate(TargetABI* a, const Type* t, ABIArgInfo* out, + int is_return) { + ABITypeInfo ti = abi_internal_type_info(a, t); + if (ti.size == 0) { + classify_void(out); + return; + } + /* AAPCS64: aggregates ≤ 16 bytes pass in up to 2 GPRs (or HFA in FP regs; + * v1 ignores HFA). Larger aggregates pass by reference (caller copy for + * args, sret pointer for return). */ + if (ti.size <= 16) { + u32 nparts = (ti.size + 7) / 8; + ABIArgPart* parts = arena_array(a->c->tu, ABIArgPart, nparts); + memset(parts, 0, sizeof(ABIArgPart) * nparts); + u32 off = 0; + for (u32 i = 0; i < nparts; ++i) { + u32 chunk = (ti.size - off > 8) ? 8 : (ti.size - off); + parts[i].cls = ABI_CLASS_INT; + parts[i].loc = ABI_LOC_REG; + parts[i].size = chunk; + parts[i].align = 8; + parts[i].src_offset = off; + off += chunk; + } + out->kind = ABI_ARG_DIRECT; + out->flags = ABI_AF_NONE; + out->parts = parts; + out->nparts = (u16)nparts; + out->indirect_align = 0; + } else { + out->kind = ABI_ARG_INDIRECT; + out->flags = is_return ? ABI_AF_SRET : ABI_AF_BYVAL; + out->indirect_align = ti.align; + out->parts = NULL; + out->nparts = 0; + } +} + +static void classify_one(TargetABI* a, const Type* t, ABIArgInfo* out, + int is_return) { + if (!t || t->kind == TY_VOID) { + classify_void(out); + return; + } + switch (t->kind) { + case TY_STRUCT: + case TY_UNION: + classify_aggregate(a, t, out, is_return); + return; + default: + classify_scalar(a, t, out); + return; + } +} + +static ABIFuncInfo* aapcs64_compute_func_info(TargetABI* a, const Type* fn) { + ABIFuncInfo* info = arena_new(a->c->tu, ABIFuncInfo); + memset(info, 0, sizeof *info); + + classify_one(a, fn->fn.ret, &info->ret, /*is_return=*/1); + info->has_sret = (info->ret.kind == ABI_ARG_INDIRECT) ? 1 : 0; + info->variadic = fn->fn.variadic; + + info->nparams = fn->fn.nparams; + if (fn->fn.nparams) { + ABIArgInfo* arr = arena_array(a->c->tu, ABIArgInfo, fn->fn.nparams); + memset(arr, 0, sizeof(ABIArgInfo) * fn->fn.nparams); + for (u16 i = 0; i < fn->fn.nparams; ++i) { + classify_one(a, fn->fn.params[i], &arr[i], /*is_return=*/0); + } + info->params = arr; + } else { + info->params = NULL; + } + return info; +} + +static const Type* aapcs64_va_list_type(TargetABI* a, Pool* p) { + /* AAPCS64 __va_list: 3 pointers (__stack, __gr_top, __vr_top) followed + * by 2 ints (__gr_offs, __vr_offs). Total 32 bytes, 8-aligned. */ + (void)a; + const Type* vp = type_ptr(p, type_void(p)); + const Type* it = type_prim(p, TY_INT); + Sym name = pool_intern_cstr(p, "__va_list"); + SrcLoc nl = {0, 0, 0}; + TagId tg = type_tag_new(p, TAG_STRUCT, name, nl); + TypeRecordBuilder* b = type_record_begin(p, TY_STRUCT, tg, name); + type_record_field( + b, (Field){.name = pool_intern_cstr(p, "__stack"), .type = vp}); + type_record_field( + b, (Field){.name = pool_intern_cstr(p, "__gr_top"), .type = vp}); + type_record_field( + b, (Field){.name = pool_intern_cstr(p, "__vr_top"), .type = vp}); + type_record_field( + b, (Field){.name = pool_intern_cstr(p, "__gr_offs"), .type = it}); + type_record_field( + b, (Field){.name = pool_intern_cstr(p, "__vr_offs"), .type = it}); + return type_record_end(p, b); +} + +const ABIVtable aapcs64_vtable = { + .compute_func_info = aapcs64_compute_func_info, + .va_list_type = aapcs64_va_list_type, +}; diff --git a/src/abi/abi_internal.h b/src/abi/abi_internal.h @@ -0,0 +1,54 @@ +#ifndef CFREE_ABI_INTERNAL_H +#define CFREE_ABI_INTERNAL_H + +#include "abi/abi.h" + +/* Internal: per-ABI dispatch table. + * + * Selected by abi_init based on (target.arch, target.os). The pieces here + * are the parts that vary by ABI; the C-standard-driven scalar profile and + * record layout live in abi.c and are shared. */ + +typedef struct ABIVtable { + /* Compute the ABIFuncInfo for a function type. The cache wrapper in + * abi.c calls this once per Type and memoizes the result. */ + ABIFuncInfo* (*compute_func_info)(TargetABI*, const Type* fn); + /* Build the per-ABI __va_list type. The wrapper in abi.c memoizes. */ + const Type* (*va_list_type)(TargetABI*, Pool*); +} ABIVtable; + +/* Per-ABI vtables exposed by their TUs. */ +extern const ABIVtable aapcs64_vtable; +extern const ABIVtable sysv_x64_vtable; + +/* Shared TargetABI internals. The struct definition is here so each ABI + * TU can reach into the per-TU caches via TargetABI*. abi.c owns the + * cache plumbing; the per-ABI TUs only allocate ABIFuncInfo / record + * builders out of c->tu. */ +typedef struct FuncInfoCacheEntry FuncInfoCacheEntry; +typedef struct RecordLayoutCacheEntry RecordLayoutCacheEntry; + +struct FuncInfoCacheEntry { + const Type* fn; + ABIFuncInfo* info; + FuncInfoCacheEntry* next; +}; + +struct RecordLayoutCacheEntry { + const Type* ty; + ABIRecordLayout* layout; + RecordLayoutCacheEntry* next; +}; + +struct TargetABI { + Compiler* c; + const ABIVtable* vt; + FuncInfoCacheEntry* fn_cache; + RecordLayoutCacheEntry* rec_cache; + const Type* va_list_cache; +}; + +/* Shared helpers exposed to per-ABI TUs. */ +ABITypeInfo abi_internal_type_info(TargetABI*, const Type*); + +#endif diff --git a/src/abi/abi_sysv_x64.c b/src/abi/abi_sysv_x64.c @@ -0,0 +1,79 @@ +/* SysV AMD64 ABI — phase-2 stub. + * + * Initial classifier returns ABI_ARG_INDIRECT for everything: correct + * (every value passes through memory), slow, but unblocks bring-up of + * the x64 codegen path. Phase 3 replaces this with the real eight-byte + * INTEGER/SSE classifier (see doc/MULTIARCH.md §4 phase 3 step 2). */ + +#include <string.h> + +#include "abi/abi_internal.h" +#include "core/arena.h" +#include "core/core.h" +#include "core/pool.h" + +static void classify_indirect(TargetABI* a, const Type* t, ABIArgInfo* out, + int is_return) { + if (!t || t->kind == TY_VOID) { + memset(out, 0, sizeof *out); + out->kind = ABI_ARG_IGNORE; + return; + } + ABITypeInfo ti = abi_internal_type_info(a, t); + out->kind = ABI_ARG_INDIRECT; + out->flags = is_return ? ABI_AF_SRET : ABI_AF_BYVAL; + out->indirect_align = ti.align ? ti.align : 8; + out->parts = NULL; + out->nparts = 0; +} + +static ABIFuncInfo* sysv_x64_compute_func_info(TargetABI* a, const Type* fn) { + ABIFuncInfo* info = arena_new(a->c->tu, ABIFuncInfo); + memset(info, 0, sizeof *info); + + classify_indirect(a, fn->fn.ret, &info->ret, /*is_return=*/1); + info->has_sret = (info->ret.kind == ABI_ARG_INDIRECT) ? 1 : 0; + info->variadic = fn->fn.variadic; + + info->nparams = fn->fn.nparams; + if (fn->fn.nparams) { + ABIArgInfo* arr = arena_array(a->c->tu, ABIArgInfo, fn->fn.nparams); + memset(arr, 0, sizeof(ABIArgInfo) * fn->fn.nparams); + for (u16 i = 0; i < fn->fn.nparams; ++i) { + classify_indirect(a, fn->fn.params[i], &arr[i], /*is_return=*/0); + } + info->params = arr; + } else { + info->params = NULL; + } + return info; +} + +static const Type* sysv_x64_va_list_type(TargetABI* a, Pool* p) { + /* SysV AMD64 __va_list_tag: { unsigned gp_offset; unsigned fp_offset; + * void* overflow_arg_area; void* reg_save_area; }. 24 bytes, 8-aligned. + * The va_list type is an array of one __va_list_tag element so taking + * its address yields a pointer, matching the macro semantics. */ + (void)a; + const Type* vp = type_ptr(p, type_void(p)); + const Type* uit = type_prim(p, TY_UINT); + Sym name = pool_intern_cstr(p, "__va_list_tag"); + SrcLoc nl = {0, 0, 0}; + TagId tg = type_tag_new(p, TAG_STRUCT, name, nl); + TypeRecordBuilder* b = type_record_begin(p, TY_STRUCT, tg, name); + type_record_field( + b, (Field){.name = pool_intern_cstr(p, "gp_offset"), .type = uit}); + type_record_field( + b, (Field){.name = pool_intern_cstr(p, "fp_offset"), .type = uit}); + type_record_field( + b, + (Field){.name = pool_intern_cstr(p, "overflow_arg_area"), .type = vp}); + type_record_field( + b, (Field){.name = pool_intern_cstr(p, "reg_save_area"), .type = vp}); + return type_record_end(p, b); +} + +const ABIVtable sysv_x64_vtable = { + .compute_func_info = sysv_x64_compute_func_info, + .va_list_type = sysv_x64_va_list_type, +}; diff --git a/src/api/pipeline.c b/src/api/pipeline.c @@ -1040,6 +1040,20 @@ static const char* reloc_kind_name(u16 kind) { return "R_AARCH64_RELATIVE"; case R_AARCH64_COPY: return "R_AARCH64_COPY"; + case R_X64_PC8: + return "R_X86_64_PC8"; + case R_X64_PLT32: + return "R_X86_64_PLT32"; + case R_X64_GOTPCREL: + return "R_X86_64_GOTPCREL"; + case R_X64_GLOB_DAT: + return "R_X86_64_GLOB_DAT"; + case R_X64_JUMP_SLOT: + return "R_X86_64_JUMP_SLOT"; + case R_X64_RELATIVE: + return "R_X86_64_RELATIVE"; + case R_X64_COPY: + return "R_X86_64_COPY"; case R_RV_HI20: return "R_RISCV_HI20"; case R_RV_LO12_I: diff --git a/src/arch/aa64.h b/src/arch/aa64.h @@ -0,0 +1,8 @@ +#ifndef CFREE_ARCH_AA64_H +#define CFREE_ARCH_AA64_H + +#include "arch/arch.h" + +CGTarget* aa64_cgtarget_new(Compiler*, ObjBuilder*, MCEmitter*); + +#endif diff --git a/src/arch/aarch64.c b/src/arch/aarch64.c @@ -2995,14 +2995,7 @@ static void aa_destroy(CGTarget* t) { (void)t; /* arena-backed */ } static void cgt_cleanup(void* arg) { cgtarget_free((CGTarget*)arg); } -CGTarget* cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) { - if (c->target.arch != CFREE_ARCH_ARM_64) { - SrcLoc loc = {0, 0, 0}; - compiler_panic(c, loc, - "cgtarget_new: only AArch64 implemented in v1 (got arch %d)", - (int)c->target.arch); - } - +CGTarget* aa64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) { AAImpl* a = arena_new(c->tu, AAImpl); memset(a, 0, sizeof *a); @@ -3081,12 +3074,3 @@ CGTarget* cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) { compiler_defer(c, cgt_cleanup, t); return t; } - -void cgtarget_finalize(CGTarget* t) { - if (t && t->finalize) t->finalize(t); -} - -void cgtarget_free(CGTarget* t) { - if (!t) return; - /* Arena-backed; nothing to free. */ -} diff --git a/src/arch/cgtarget.c b/src/arch/cgtarget.c @@ -0,0 +1,33 @@ +/* Public CGTarget constructor — dispatches by Compiler.target.arch. + * + * Per-arch constructors live in their own files (aa64.c, x64.c). The + * lifecycle helpers (cgtarget_finalize, cgtarget_free) are arch-agnostic + * shims over the vtable. */ + +#include "arch/aa64.h" +#include "arch/arch.h" +#include "arch/x64.h" + +CGTarget* cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) { + switch (c->target.arch) { + case CFREE_ARCH_ARM_64: + return aa64_cgtarget_new(c, o, m); + case CFREE_ARCH_X86_64: + return x64_cgtarget_new(c, o, m); + default: { + SrcLoc loc = {0, 0, 0}; + compiler_panic(c, loc, + "cgtarget_new: unsupported target arch %d", + (int)c->target.arch); + } + } +} + +void cgtarget_finalize(CGTarget* t) { + if (t && t->finalize) t->finalize(t); +} + +void cgtarget_free(CGTarget* t) { + if (!t) return; + /* Arena-backed; nothing to free. */ +} diff --git a/src/arch/mc.c b/src/arch/mc.c @@ -5,10 +5,14 @@ * relocations / source-location stamps. Encoding is the caller's job — * MCEmitter writes whatever bytes it's handed. * - * This implementation is target-agnostic: every supported arch shares - * one MCEmitter; arch-specific differences live in CGTarget. Per-arch - * MCEmitter subclasses can layer on later if encoding cache or - * peephole-merging need shared state with the emitter. + * One MCEmitter serves every supported arch — arch-specific differences + * live in CGTarget. The fixup encoder (apply_fixup) is the union of all + * known arches' label-ref reloc encodings rather than a generic library: + * each new arch adds cases to that switch. The cost of an arch enum + * here is one switch case; promoting fixup application to a per-arch + * vtable would be premature. Per-arch MCEmitter subclasses can layer on + * later if encoding cache or peephole-merging need shared state with + * the emitter. * * MCLabel handling: ids are 1-based (0 = MC_LABEL_NONE). Each label * carries either a placement (sec_id, offset) or a list of pending diff --git a/src/arch/x64.c b/src/arch/x64.c @@ -0,0 +1,387 @@ +/* x86_64 CGTarget skeleton. + * + * Phase-2 placeholder: the vtable is wired up but every method panics. + * This proves the cgtarget_new dispatch reaches an x64-shaped target. + * Phase 3 fills in real codegen — see doc/MULTIARCH.md §4. */ + +#include <string.h> + +#include "arch/arch.h" +#include "arch/x64.h" +#include "core/arena.h" + +typedef struct XImpl { + CGTarget base; + SrcLoc loc; +} XImpl; + +static SrcLoc xx_loc(void) { return (SrcLoc){0, 0, 0}; } + +_Noreturn static void xx_panic(CGTarget* t, const char* what) { + compiler_panic(t->c, xx_loc(), "x64: %s not implemented", what); +} + +static void xx_func_begin(CGTarget* t, const CGFuncDesc* d) { + (void)d; + xx_panic(t, "func_begin"); +} +static void xx_func_end(CGTarget* t) { xx_panic(t, "func_end"); } + +static Reg xx_alloc_reg(CGTarget* t, RegClass cls, const Type* ty) { + (void)cls; + (void)ty; + xx_panic(t, "alloc_reg"); +} +static void xx_free_reg(CGTarget* t, Reg r) { + (void)r; + xx_panic(t, "free_reg"); +} +static FrameSlot xx_frame_slot(CGTarget* t, const FrameSlotDesc* d) { + (void)d; + xx_panic(t, "frame_slot"); +} +static void xx_param(CGTarget* t, const CGParamDesc* d) { + (void)d; + xx_panic(t, "param"); +} +static const Reg* xx_clobbers(CGTarget* t, RegClass cls, u32* nregs) { + (void)cls; + (void)nregs; + xx_panic(t, "clobbers"); +} +static void xx_spill_reg(CGTarget* t, Operand a, FrameSlot s, MemAccess m) { + (void)a; + (void)s; + (void)m; + xx_panic(t, "spill_reg"); +} +static void xx_reload_reg(CGTarget* t, Operand a, FrameSlot s, MemAccess m) { + (void)a; + (void)s; + (void)m; + xx_panic(t, "reload_reg"); +} + +static Label xx_label_new(CGTarget* t) { xx_panic(t, "label_new"); } +static void xx_label_place(CGTarget* t, Label l) { + (void)l; + xx_panic(t, "label_place"); +} +static void xx_jump(CGTarget* t, Label l) { + (void)l; + xx_panic(t, "jump"); +} +static void xx_cmp_branch(CGTarget* t, CmpOp op, Operand a, Operand b, + Label l) { + (void)op; + (void)a; + (void)b; + (void)l; + xx_panic(t, "cmp_branch"); +} + +static CGScope xx_scope_begin(CGTarget* t, const CGScopeDesc* d) { + (void)d; + xx_panic(t, "scope_begin"); +} +static void xx_scope_else(CGTarget* t, CGScope s) { + (void)s; + xx_panic(t, "scope_else"); +} +static void xx_scope_end(CGTarget* t, CGScope s) { + (void)s; + xx_panic(t, "scope_end"); +} +static void xx_break_to(CGTarget* t, CGScope s) { + (void)s; + xx_panic(t, "break_to"); +} +static void xx_continue_to(CGTarget* t, CGScope s) { + (void)s; + xx_panic(t, "continue_to"); +} + +static void xx_load_imm(CGTarget* t, Operand d, i64 i) { + (void)d; + (void)i; + xx_panic(t, "load_imm"); +} +static void xx_load_const(CGTarget* t, Operand d, ConstBytes b) { + (void)d; + (void)b; + xx_panic(t, "load_const"); +} +static void xx_copy(CGTarget* t, Operand d, Operand s) { + (void)d; + (void)s; + xx_panic(t, "copy"); +} +static void xx_load(CGTarget* t, Operand d, Operand a, MemAccess m) { + (void)d; + (void)a; + (void)m; + xx_panic(t, "load"); +} +static void xx_store(CGTarget* t, Operand a, Operand s, MemAccess m) { + (void)a; + (void)s; + (void)m; + xx_panic(t, "store"); +} +static void xx_addr_of(CGTarget* t, Operand d, Operand l) { + (void)d; + (void)l; + xx_panic(t, "addr_of"); +} +static void xx_tls_addr_of(CGTarget* t, Operand d, ObjSymId s, i64 a) { + (void)d; + (void)s; + (void)a; + xx_panic(t, "tls_addr_of"); +} +static void xx_copy_bytes(CGTarget* t, Operand da, Operand sa, + AggregateAccess g) { + (void)da; + (void)sa; + (void)g; + xx_panic(t, "copy_bytes"); +} +static void xx_set_bytes(CGTarget* t, Operand da, Operand bv, + AggregateAccess g) { + (void)da; + (void)bv; + (void)g; + xx_panic(t, "set_bytes"); +} +static void xx_bitfield_load(CGTarget* t, Operand d, Operand ra, + BitFieldAccess b) { + (void)d; + (void)ra; + (void)b; + xx_panic(t, "bitfield_load"); +} +static void xx_bitfield_store(CGTarget* t, Operand ra, Operand s, + BitFieldAccess b) { + (void)ra; + (void)s; + (void)b; + xx_panic(t, "bitfield_store"); +} + +static void xx_binop(CGTarget* t, BinOp op, Operand d, Operand a, Operand b) { + (void)op; + (void)d; + (void)a; + (void)b; + xx_panic(t, "binop"); +} +static void xx_unop(CGTarget* t, UnOp op, Operand d, Operand a) { + (void)op; + (void)d; + (void)a; + xx_panic(t, "unop"); +} +static void xx_cmp(CGTarget* t, CmpOp op, Operand d, Operand a, Operand b) { + (void)op; + (void)d; + (void)a; + (void)b; + xx_panic(t, "cmp"); +} +static void xx_convert(CGTarget* t, ConvKind k, Operand d, Operand s) { + (void)k; + (void)d; + (void)s; + xx_panic(t, "convert"); +} + +static void xx_call(CGTarget* t, const CGCallDesc* d) { + (void)d; + xx_panic(t, "call"); +} +static void xx_ret(CGTarget* t, const CGABIValue* v) { + (void)v; + xx_panic(t, "ret"); +} + +static void xx_alloca_(CGTarget* t, Operand d, Operand s, u32 a) { + (void)d; + (void)s; + (void)a; + xx_panic(t, "alloca"); +} +static void xx_va_start_(CGTarget* t, Operand a) { + (void)a; + xx_panic(t, "va_start"); +} +static void xx_va_arg_(CGTarget* t, Operand d, Operand a, const Type* ty) { + (void)d; + (void)a; + (void)ty; + xx_panic(t, "va_arg"); +} +static void xx_va_end_(CGTarget* t, Operand a) { + (void)a; + xx_panic(t, "va_end"); +} +static void xx_va_copy_(CGTarget* t, Operand d, Operand s) { + (void)d; + (void)s; + xx_panic(t, "va_copy"); +} + +static void xx_atomic_load(CGTarget* t, Operand d, Operand a, MemAccess m, + MemOrder o) { + (void)d; + (void)a; + (void)m; + (void)o; + xx_panic(t, "atomic_load"); +} +static void xx_atomic_store(CGTarget* t, Operand a, Operand s, MemAccess m, + MemOrder o) { + (void)a; + (void)s; + (void)m; + (void)o; + xx_panic(t, "atomic_store"); +} +static void xx_atomic_rmw(CGTarget* t, AtomicOp op, Operand d, Operand a, + Operand v, MemAccess m, MemOrder o) { + (void)op; + (void)d; + (void)a; + (void)v; + (void)m; + (void)o; + xx_panic(t, "atomic_rmw"); +} +static void xx_atomic_cas(CGTarget* t, Operand p, Operand ok, Operand a, + Operand e, Operand des, MemAccess m, MemOrder so, + MemOrder fo) { + (void)p; + (void)ok; + (void)a; + (void)e; + (void)des; + (void)m; + (void)so; + (void)fo; + xx_panic(t, "atomic_cas"); +} +static void xx_fence(CGTarget* t, MemOrder o) { + (void)o; + xx_panic(t, "fence"); +} + +static void xx_intrinsic(CGTarget* t, IntrinKind k, Operand* d, u32 nd, + const Operand* a, u32 na) { + (void)k; + (void)d; + (void)nd; + (void)a; + (void)na; + xx_panic(t, "intrinsic"); +} +static void xx_asm_block(CGTarget* t, const char* tmpl, + const AsmConstraint* outs, u32 no, Operand* oo, + const AsmConstraint* ins, u32 ni, const Operand* io, + const Sym* clobs, u32 nc) { + (void)tmpl; + (void)outs; + (void)no; + (void)oo; + (void)ins; + (void)ni; + (void)io; + (void)clobs; + (void)nc; + xx_panic(t, "asm_block"); +} + +static void xx_set_loc(CGTarget* t, SrcLoc l) { + ((XImpl*)t)->loc = l; + if (t->mc) t->mc->set_loc(t->mc, l); +} + +static void xx_finalize(CGTarget* t) { (void)t; } +static void xx_destroy(CGTarget* t) { (void)t; } + +static void cgt_cleanup(void* arg) { cgtarget_free((CGTarget*)arg); } + +CGTarget* x64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) { + XImpl* x = arena_new(c->tu, XImpl); + memset(x, 0, sizeof *x); + + CGTarget* t = &x->base; + t->c = c; + t->obj = o; + t->mc = m; + + t->func_begin = xx_func_begin; + t->func_end = xx_func_end; + + t->alloc_reg = xx_alloc_reg; + t->free_reg = xx_free_reg; + t->frame_slot = xx_frame_slot; + t->param = xx_param; + t->clobbers = xx_clobbers; + t->spill_reg = xx_spill_reg; + t->reload_reg = xx_reload_reg; + + t->label_new = xx_label_new; + t->label_place = xx_label_place; + t->jump = xx_jump; + t->cmp_branch = xx_cmp_branch; + + t->scope_begin = xx_scope_begin; + t->scope_else = xx_scope_else; + t->scope_end = xx_scope_end; + t->break_to = xx_break_to; + t->continue_to = xx_continue_to; + + t->load_imm = xx_load_imm; + t->load_const = xx_load_const; + t->copy = xx_copy; + t->load = xx_load; + t->store = xx_store; + t->addr_of = xx_addr_of; + t->tls_addr_of = xx_tls_addr_of; + t->copy_bytes = xx_copy_bytes; + t->set_bytes = xx_set_bytes; + t->bitfield_load = xx_bitfield_load; + t->bitfield_store = xx_bitfield_store; + + t->binop = xx_binop; + t->unop = xx_unop; + t->cmp = xx_cmp; + t->convert = xx_convert; + + t->call = xx_call; + t->ret = xx_ret; + + t->alloca_ = xx_alloca_; + t->va_start_ = xx_va_start_; + t->va_arg_ = xx_va_arg_; + t->va_end_ = xx_va_end_; + t->va_copy_ = xx_va_copy_; + + t->setjmp_ = NULL; + t->longjmp_ = NULL; + + t->atomic_load = xx_atomic_load; + t->atomic_store = xx_atomic_store; + t->atomic_rmw = xx_atomic_rmw; + t->atomic_cas = xx_atomic_cas; + t->fence = xx_fence; + + t->intrinsic = xx_intrinsic; + t->asm_block = xx_asm_block; + + t->set_loc = xx_set_loc; + t->finalize = xx_finalize; + t->destroy = xx_destroy; + + compiler_defer(c, cgt_cleanup, t); + return t; +} diff --git a/src/arch/x64.h b/src/arch/x64.h @@ -0,0 +1,8 @@ +#ifndef CFREE_ARCH_X64_H +#define CFREE_ARCH_X64_H + +#include "arch/arch.h" + +CGTarget* x64_cgtarget_new(Compiler*, ObjBuilder*, MCEmitter*); + +#endif diff --git a/src/link/link.c b/src/link/link.c @@ -423,7 +423,7 @@ void link_emit_image_writer(LinkImage* img, Writer* w) { if (!img || !w) return; switch (img->c->target.obj) { case CFREE_OBJ_ELF: - link_emit_elf_aarch64(img, w); + link_emit_elf(img, w); return; default: compiler_panic(img->c, no_loc(), diff --git a/src/link/link_dyn.c b/src/link/link_dyn.c @@ -798,7 +798,7 @@ void layout_dyn(Linker* l, LinkImage* img) { compiler_panic(img->c, no_loc(), "link: oom on rw dyn segment"); /* Zero-initialize. .got.plt[0] (&.dynamic) is filled later, after * shift_image_addresses has bumped dyn->dynamic_vaddr. .dynamic - * body is built post-shift in link_emit_elf_aarch64. Loader + * body is built post-shift in link_emit_elf. Loader * patches all .got.plt slots from .rela.plt before user code * under DF_1_NOW. */ memset(img->segment_bytes[rw_seg_idx], 0, (size_t)rw_seg_size); @@ -944,7 +944,7 @@ void layout_dyn(Linker* l, LinkImage* img) { * RELATIVE for PIE internal abs fixups) are emitted by * apply_all_relocs as it walks every relocation. layout_dyn * leaves .rela.dyn empty here; the bytes are written post-shift in - * link_emit_elf_aarch64. */ + * link_emit_elf. */ /* .got.plt prelude: for BIND_NOW we leave the body zero — the * loader patches every slot from .rela.plt before user code. Some diff --git a/src/link/link_elf.c b/src/link/link_elf.c @@ -1,6 +1,10 @@ -/* link_emit_elf_aarch64: write a static ET_EXEC ELF64 image to the +/* link_emit_elf: write a static ET_EXEC ELF64 image to the * caller-provided Writer. * + * 64-bit little-endian only. The per-arch ELF reloc-type tables in + * obj/elf_reloc_<arch>.c handle RelocKind <-> ELF translation; this + * file picks e_machine from Compiler.target.arch. + * * File layout (in write order): * * [headers PT_LOAD, PF_R, mapped at IMAGE_BASE] @@ -568,12 +572,23 @@ static u64 sec_flags_to_shf(u32 flags) { return r; } -void link_emit_elf_aarch64(LinkImage* img, Writer* w) { +void link_emit_elf(LinkImage* img, Writer* w) { Heap* heap = img->heap; Compiler* c = img->c; - if (c->target.arch != CFREE_ARCH_ARM_64) - compiler_panic(c, no_loc(), "link_emit_elf: only AArch64 is implemented"); + u32 e_machine; + switch (c->target.arch) { + case CFREE_ARCH_ARM_64: + e_machine = EM_AARCH64; + break; + case CFREE_ARCH_X86_64: + e_machine = EM_X86_64; + break; + default: + compiler_panic(c, no_loc(), + "link_emit_elf: unsupported target arch %u", + (u32)c->target.arch); + } if (img->entry_sym == LINK_SYM_NONE) compiler_panic(c, no_loc(), "link_emit_elf: no resolved entry symbol"); /* IFUNC trampolines: layout_iplt builds the .iplt stubs + .igot.plt @@ -1109,7 +1124,7 @@ void link_emit_elf_aarch64(LinkImage* img, Writer* w) { ehdr.e_ident[6] = EV_CURRENT; ehdr.e_ident[7] = ELFOSABI_NONE; ehdr.e_type = pie ? ET_DYN : ET_EXEC; - ehdr.e_machine = EM_AARCH64; + ehdr.e_machine = (u16)e_machine; ehdr.e_version = EV_CURRENT; ehdr.e_entry = img_base + LinkSyms_at(&img->syms, img->entry_sym - 1)->vaddr; ehdr.e_phoff = sizeof(Ehdr64); diff --git a/src/link/link_internal.h b/src/link/link_internal.h @@ -306,7 +306,8 @@ struct LinkImage { void link_reloc_apply(Compiler*, RelocKind, u8* P_bytes, u64 S, i64 A, u64 P); /* Public link_emit_image_writer dispatches by Compiler.target.obj. The - * ELF AArch64 implementation lives in link_elf.c. */ -void link_emit_elf_aarch64(LinkImage*, Writer*); + * ELF implementation lives in link_elf.c and dispatches internally on + * Compiler.target.arch for e_machine and reloc translation. */ +void link_emit_elf(LinkImage*, Writer*); #endif diff --git a/src/link/link_layout.c b/src/link/link_layout.c @@ -1,7 +1,7 @@ /* link_resolve: builds a fresh LinkImage from the Linker's inputs. * * Image-relative discipline: every vaddr / file_offset on the produced - * image treats the image as based at 0. Consumers (link_emit_elf_aarch64, + * image treats the image as based at 0. Consumers (link_emit_elf, * cfree_jit_from_image) add their own runtime base before patching * relocations or writing PT_LOAD headers. Segment byte buffers hold raw * input section bytes — no relocations are applied here, in line with diff --git a/src/link/link_reloc.c b/src/link/link_reloc.c @@ -2,7 +2,7 @@ * * Pure function: takes the resolved final addresses (S, P) and the * addend (A), and patches `width` bytes at the relocation site. - * Callers (link_emit_elf_aarch64, cfree_jit_from_image) compute the + * Callers (link_emit_elf, cfree_jit_from_image) compute the * runtime base offset themselves; this routine sees only final values. * * Encoding references: ARM ARMv8-A "ELF for the ARM 64-bit Architecture diff --git a/src/obj/elf.h b/src/obj/elf.h @@ -5,9 +5,9 @@ * (obj/obj.h, link/link.h); the ELF spelling of those abstractions only * exists inside libcfree. * - * Scope: 64-bit little-endian only. AArch64 today; the per-arch reloc - * mapping in elf_aarch64_reloc_{to,from} is the place to extend when - * x86_64/RISC-V/etc. land. */ + * Scope: 64-bit little-endian only. The per-arch reloc mapping is split + * across elf_reloc_<arch>.c (one TU per arch); emit_elf and the linker + * dispatch to the right table by Compiler.target.arch. */ #ifndef CFREE_OBJ_ELF_H #define CFREE_OBJ_ELF_H @@ -236,6 +236,33 @@ u32 elf_aarch64_reloc_to(u32 kind /* RelocKind */); u32 elf_aarch64_reloc_from(u32 elf_type); +/* ---- x86_64 ELF reloc types ---- + * + * Subset matching the cfree-canonical RelocKind R_X64_* entries. The + * full SysV-x86_64 ABI table has more entries (TLS, GOT variants, ...) + * — only the ones the codegen and linker actually need today are + * represented here. */ +#define ELF_R_X86_64_NONE 0 +#define ELF_R_X86_64_64 1 +#define ELF_R_X86_64_PC32 2 +#define ELF_R_X86_64_GOT32 3 +#define ELF_R_X86_64_PLT32 4 +#define ELF_R_X86_64_COPY 5 +#define ELF_R_X86_64_GLOB_DAT 6 +#define ELF_R_X86_64_JUMP_SLOT 7 +#define ELF_R_X86_64_RELATIVE 8 +#define ELF_R_X86_64_GOTPCREL 9 +#define ELF_R_X86_64_32 10 +#define ELF_R_X86_64_32S 11 +#define ELF_R_X86_64_16 12 +#define ELF_R_X86_64_PC16 13 +#define ELF_R_X86_64_8 14 +#define ELF_R_X86_64_PC8 15 +#define ELF_R_X86_64_PC64 24 + +u32 elf_x86_64_reloc_to(u32 kind /* RelocKind */); +u32 elf_x86_64_reloc_from(u32 elf_type); + /* ---- little-endian byte writers/readers (Writer-based) ---- * Reads use rd_u*_le from core/bytes.h directly; only writes need the * Writer-bridging wrappers below. */ diff --git a/src/obj/elf_emit.c b/src/obj/elf_emit.c @@ -8,15 +8,17 @@ * 2. build .symtab + .strtab content (locals first — STT_SECTION * synthesized for every input section, then ordinary locals, then * globals/weaks); - * 3. build .rela.* content using the AArch64 reloc map; + * 3. build .rela.* content using the per-arch reloc map (selected + * by Compiler.target.arch); * 4. build .shstrtab; * 5. assign file offsets sequentially, respecting per-section * addralign; * 6. write Ehdr, then each section's bytes (seeking to its sh_offset), * then the section header table. * - * AArch64 little-endian only. Other archs / endianness panic at entry — - * the per-arch reloc table is the place to extend, not this file. + * 64-bit little-endian only. Per-arch reloc tables (elf_reloc_<arch>.c) + * supply the RelocKind -> ELF type mapping; e_machine is selected from + * Compiler.target.arch. Big-endian / 32-bit ELF panic at entry. * * See doc/DESIGN.md §5.5 for the round-trip invariant: read_elf of this * output must produce an ObjBuilder shape-equivalent to the input, @@ -243,13 +245,24 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) { Heap* h = (Heap*)c->env->heap; /* ---- target validation ------------------------------------------ */ - if (c->target.arch != CFREE_ARCH_ARM_64) { - compiler_panic(c, no_loc(), - "emit_elf: only AArch64 is implemented (target arch=%u)", - (u32)c->target.arch); + u32 e_machine; + u32 (*reloc_to)(u32); + switch (c->target.arch) { + case CFREE_ARCH_ARM_64: + e_machine = EM_AARCH64; + reloc_to = elf_aarch64_reloc_to; + break; + case CFREE_ARCH_X86_64: + e_machine = EM_X86_64; + reloc_to = elf_x86_64_reloc_to; + break; + default: + compiler_panic(c, no_loc(), + "emit_elf: unsupported target arch %u", + (u32)c->target.arch); } if (c->target.big_endian) { - compiler_panic(c, no_loc(), "emit_elf: big-endian AArch64 not supported"); + compiler_panic(c, no_loc(), "emit_elf: big-endian ELF not supported"); } if (c->target.ptr_size != 8) { compiler_panic(c, no_loc(), "emit_elf: ptr_size %u (expected 8)", @@ -483,11 +496,12 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) { for (u32 i = 0; i < total_relocs; ++i) { const Reloc* r = obj_reloc_at(ob, i); if (r->section_id != si) continue; - u32 etype = elf_aarch64_reloc_to(r->kind); - if (etype == ELF_R_AARCH64_NONE && r->kind != R_NONE) { + u32 etype = reloc_to(r->kind); + if (etype == ELF_R_AARCH64_NONE /* == ELF_R_X86_64_NONE == 0 */ && + r->kind != R_NONE) { compiler_panic(c, no_loc(), - "emit_elf: unsupported relocation kind %u for AArch64", - (u32)r->kind); + "emit_elf: unsupported relocation kind %u for arch %u", + (u32)r->kind, (u32)c->target.arch); } u32 sym_elf_idx; if (r->sym == OBJ_SYM_NONE) { @@ -672,7 +686,7 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) { cfree_writer_seek(w, 0); cfree_writer_write(w, ident, EI_NIDENT); elf_wr_u16(w, ET_REL); - elf_wr_u16(w, EM_AARCH64); + elf_wr_u16(w, (u16)e_machine); elf_wr_u32(w, EV_CURRENT); elf_wr_u64(w, 0); /* e_entry */ elf_wr_u64(w, 0); /* e_phoff */ diff --git a/src/obj/elf_reloc_x86_64.c b/src/obj/elf_reloc_x86_64.c @@ -0,0 +1,84 @@ +/* RelocKind <-> x86_64 ELF reloc-type mapping. + * + * Mirror of elf_reloc_aarch64.c for the x86_64 SysV ABI. The arch- + * agnostic R_ABS / R_PC / R_REL RelocKind entries fan out to the + * native x86_64 codes; the x86_64-only encodings (R_X64_PC8, PLT32, + * GOTPCREL, dynamic-only entries) live in the lower band. + * + * Returning ELF_R_X86_64_NONE for an unsupported kind is the signal + * to the caller to either panic (emit) or panic (read with diagnostic). */ + +#include "obj/elf.h" + +u32 elf_x86_64_reloc_to(u32 kind /* RelocKind */) { + switch (kind) { + case R_NONE: + return ELF_R_X86_64_NONE; + case R_ABS64: + return ELF_R_X86_64_64; + case R_ABS32: + return ELF_R_X86_64_32; + case R_PC32: + return ELF_R_X86_64_PC32; + case R_PC64: + return ELF_R_X86_64_PC64; + case R_REL32: + return ELF_R_X86_64_PC32; + case R_REL64: + return ELF_R_X86_64_PC64; + case R_X64_PC8: + return ELF_R_X86_64_PC8; + case R_PLT32: + case R_X64_PLT32: + return ELF_R_X86_64_PLT32; + case R_GOT32: + return ELF_R_X86_64_GOT32; + case R_X64_GOTPCREL: + return ELF_R_X86_64_GOTPCREL; + case R_X64_GLOB_DAT: + return ELF_R_X86_64_GLOB_DAT; + case R_X64_JUMP_SLOT: + return ELF_R_X86_64_JUMP_SLOT; + case R_X64_RELATIVE: + return ELF_R_X86_64_RELATIVE; + case R_X64_COPY: + return ELF_R_X86_64_COPY; + default: + return ELF_R_X86_64_NONE; + } +} + +u32 elf_x86_64_reloc_from(u32 elf_type) { + switch (elf_type) { + case ELF_R_X86_64_NONE: + return R_NONE; + case ELF_R_X86_64_64: + return R_ABS64; + case ELF_R_X86_64_32: + return R_ABS32; + case ELF_R_X86_64_32S: + return R_ABS32; + case ELF_R_X86_64_PC32: + return R_PC32; + case ELF_R_X86_64_PC64: + return R_PC64; + case ELF_R_X86_64_PC8: + return R_X64_PC8; + case ELF_R_X86_64_PLT32: + return R_X64_PLT32; + case ELF_R_X86_64_GOT32: + return R_GOT32; + case ELF_R_X86_64_GOTPCREL: + return R_X64_GOTPCREL; + case ELF_R_X86_64_GLOB_DAT: + return R_X64_GLOB_DAT; + case ELF_R_X86_64_JUMP_SLOT: + return R_X64_JUMP_SLOT; + case ELF_R_X86_64_RELATIVE: + return R_X64_RELATIVE; + case ELF_R_X86_64_COPY: + return R_X64_COPY; + default: + return (u32)-1; /* sentinel */ + } +} diff --git a/src/obj/obj.h b/src/obj/obj.h @@ -147,6 +147,16 @@ typedef enum RelocKind { R_AARCH64_JUMP_SLOT, R_AARCH64_RELATIVE, R_AARCH64_COPY, + /* x86_64 reloc kinds. Most map directly to the existing R_ABS and + * R_PC entries; the few here are the x86_64-only encodings (8-bit + * displacements, GOT/PLT, dynamic linker-only entries). */ + R_X64_PC8, + R_X64_PLT32, + R_X64_GOTPCREL, + R_X64_GLOB_DAT, + R_X64_JUMP_SLOT, + R_X64_RELATIVE, + R_X64_COPY, R_RV_HI20, R_RV_LO12_I, R_RV_LO12_S,