arch/abi/obj: MULTIARCH Phase 2 — multi-arch seams + x64 stubs - kit

commit f2a928e2f8901aec26bed7d2a6465e505109c280
parent 4fb9c5b2564b3755aea2b0f468b93988c849dd32
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun, 10 May 2026 10:41:02 -0700

arch/abi/obj: MULTIARCH Phase 2 — multi-arch seams + x64 stubs

Splits the AArch64-specific construction sites into per-arch TUs behind
arch-agnostic dispatchers, so adding a real x86_64 backend in phase 3 is
purely additive (new files only). aarch64 output is unchanged.

- arch/cgtarget.c owns the public cgtarget_new and switches on
  c->target.arch; aarch64.c::cgtarget_new renamed to aa64_cgtarget_new
  in arch/aa64.h. arch/x64.{h,c} adds the x64 CGTarget skeleton with a
  full vtable wired up to "x64: <method> not implemented" stubs.

- TargetABI carries an ABIVtable* (compute_func_info, va_list_type)
  selected by abi_init on (target.arch, target.os). The AAPCS64
  classifier moves out of abi.c into abi/abi_aapcs64.c; abi/abi_sysv_x64.c
  exposes sysv_x64_vtable with ABI_ARG_INDIRECT for everything plus the
  __va_list_tag struct (correct, slow, unblocks bring-up).

- RelocKind grows R_X64_PC8/PLT32/GOTPCREL/{GLOB_DAT,JUMP_SLOT,RELATIVE,
  COPY}. obj/elf_reloc_x86_64.c mirrors elf_reloc_aarch64.c. emit_elf
  and link_emit_elf (renamed from link_emit_elf_aarch64) replace their
  AArch64-only panic with a switch picking e_machine + reloc translator
  from c->target.arch.

Diffstat:
M src/abi/abi.c  | 211 ++++++++++++++++++-------------------------------------------------------------
A src/abi/abi_aapcs64.c  | 147 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/abi/abi_internal.h  | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/abi/abi_sysv_x64.c  | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M src/api/pipeline.c  | 14 ++++++++++++++
A src/arch/aa64.h  | 8 ++++++++
M src/arch/aarch64.c  | 18 +-----------------
A src/arch/cgtarget.c  | 33 +++++++++++++++++++++++++++++++++
M src/arch/mc.c  | 12 ++++++++----
A src/arch/x64.c  | 387 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/arch/x64.h  | 8 ++++++++
M src/link/link.c  | 2 +-
M src/link/link_dyn.c  | 4 ++--
M src/link/link_elf.c  | 25 ++++++++++++++++++++-----
M src/link/link_internal.h  | 5 +++--
M src/link/link_layout.c  | 2 +-
M src/link/link_reloc.c  | 2 +-
M src/obj/elf.h  | 33 ++++++++++++++++++++++++++++++---
M src/obj/elf_emit.c  | 40 +++++++++++++++++++++++++++-------------
A src/obj/elf_reloc_x86_64.c  | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M src/obj/obj.h  | 10 ++++++++++

21 files changed, 964 insertions(+), 214 deletions(-)
diff --git a/src/abi/abi.c b/src/abi/abi.c
@@ -1,46 +1,31 @@
-/* TargetABI for AArch64 SysV (AAPCS64).
+/* TargetABI dispatch and shared C-standard layout.
  *
- * Single authority for target-dependent C layout and calling convention
- * decisions. Type stays structural and ABI-neutral; sizes, alignments,
- * record layouts, and argument/return classification are derived here
- * from Compiler.target.
+ * The single authority for target-dependent C layout and calling
+ * convention decisions. Type stays structural and ABI-neutral; sizes,
+ * alignments, record layouts, and argument/return classification are
+ * derived here from Compiler.target.
  *
- * v1 implements only AArch64 SysV at the level needed for the cg test
- * harness (Group A scalars + Group C arithmetic + the lowering surface
- * for Group B). Other arches will land alongside their backends. */
+ * Per-ABI bits (function classification, __va_list shape) live in
+ * abi_aapcs64.c, abi_sysv_x64.c, ... abi_init switches on
+ * (target.arch, target.os) and installs the right vtable. The C-
+ * standard-driven scalar profile and record layout stay here so all
+ * ABIs share one impl. */
 
 #include "abi/abi.h"
 
 #include <cfree.h>
 #include <string.h>
 
+#include "abi/abi_internal.h"
 #include "core/arena.h"
 #include "core/core.h"
 #include "core/pool.h"
 
-typedef struct FuncInfoCacheEntry FuncInfoCacheEntry;
-struct FuncInfoCacheEntry {
-  const Type* fn;
-  ABIFuncInfo* info;
-  FuncInfoCacheEntry* next;
-};
-
-typedef struct RecordLayoutCacheEntry RecordLayoutCacheEntry;
-struct RecordLayoutCacheEntry {
-  const Type* ty;
-  ABIRecordLayout* layout;
-  RecordLayoutCacheEntry* next;
-};
-
-struct TargetABI {
-  Compiler* c;
-  /* Per-TU cached lookups. */
-  FuncInfoCacheEntry* fn_cache;
-  RecordLayoutCacheEntry* rec_cache;
-  const Type* va_list_cache;
-};
-
-/* ---- scalar profile ---- */
+/* ---- scalar profile ----
+ *
+ * Shared by all currently supported ABIs (LP64 on Linux for both
+ * aarch64 and x86_64). When a Windows-x64 (LLP64) or 32-bit ABI lands,
+ * promote prim_info into the vtable. */
 
 static ABITypeInfo prim_info(TargetABI* a, TypeKind k) {
   ABITypeInfo r = {0, 0, ABI_SC_INT, 0, 0, 0};
@@ -166,7 +151,7 @@ ABITypeInfo abi_type_info(TargetABI* a, const Type* t) {
       return abi_type_info(
           a, t->enm.base ? t->enm.base : type_prim(a->c->global, TY_INT));
     case TY_FUNC:
-      /* sizeof(function) is undefined in C; AAPCS uses 1 for arithmetic. */
+      /* sizeof(function) is undefined in C; use 1 for arithmetic. */
       r.size = 1;
       r.align = 1;
       return r;
@@ -175,12 +160,20 @@ ABITypeInfo abi_type_info(TargetABI* a, const Type* t) {
   }
 }
 
+ABITypeInfo abi_internal_type_info(TargetABI* a, const Type* t) {
+  return abi_type_info(a, t);
+}
+
 u32 abi_sizeof(TargetABI* a, const Type* t) { return abi_type_info(a, t).size; }
 u32 abi_alignof(TargetABI* a, const Type* t) {
   return abi_type_info(a, t).align;
 }
 
-/* ---- record layout (struct/union) ---- */
+/* ---- record layout (struct/union) ----
+ *
+ * Shared by all currently supported ABIs: storage-unit-based layout with
+ * natural alignment, no bitfield packing extensions. When a Windows-x64
+ * (MSVC bitfield rules) ABI lands, promote this into the vtable. */
 
 static ABIRecordLayout* compute_record_layout(TargetABI* a, const Type* t) {
   ABIRecordLayout* L = arena_new(a->c->tu, ABIRecordLayout);
@@ -243,125 +236,14 @@ const ABIRecordLayout* abi_record_layout(TargetABI* a, const Type* t) {
   return L;
 }
 
-/* ---- function classification (AArch64 SysV / AAPCS64) ----
- *
- * v1 covers the cases the cg test harness exercises:
- *   void          -> IGNORE
- *   integer ≤ 8B  -> DIRECT, one INT part in a register
- *   integer 16B   -> DIRECT, two INT parts (X0+X1)
- *   pointer       -> DIRECT, one INT part in a register
- *   float/double  -> DIRECT, one FP part in a register
- *   small struct  -> DIRECT, INT parts (HFA/HVA refinement: TODO)
- *   large struct  -> INDIRECT (sret for return; passed by reference)
- * Variadics, HFA classification, and split GPR+stack tail still
- * land with the parser. */
-
-static void classify_scalar(TargetABI* a, const Type* t, ABIArgInfo* out) {
-  ABITypeInfo ti = abi_type_info(a, t);
-  out->kind = ABI_ARG_DIRECT;
-  out->flags = ABI_AF_NONE;
-  out->indirect_align = 0;
-
-  ABIArgPart* parts = arena_new(a->c->tu, ABIArgPart);
-  memset(parts, 0, sizeof *parts);
-  parts->cls = (ti.scalar_kind == ABI_SC_FLOAT) ? ABI_CLASS_FP : ABI_CLASS_INT;
-  parts->loc = ABI_LOC_REG;
-  parts->size = ti.size;
-  parts->align = ti.align;
-  parts->src_offset = 0;
-
-  out->parts = parts;
-  out->nparts = 1;
-}
-
-static void classify_void(ABIArgInfo* out) {
-  memset(out, 0, sizeof *out);
-  out->kind = ABI_ARG_IGNORE;
-}
-
-static void classify_aggregate(TargetABI* a, const Type* t, ABIArgInfo* out,
-                               int is_return) {
-  ABITypeInfo ti = abi_type_info(a, t);
-  if (ti.size == 0) {
-    classify_void(out);
-    return;
-  }
-  /* AAPCS64: aggregates ≤ 16 bytes pass in up to 2 GPRs (or HFA in FP regs;
-   * v1 ignores HFA). Larger aggregates pass by reference (caller copy for
-   * args, sret pointer for return). */
-  if (ti.size <= 16) {
-    u32 nparts = (ti.size + 7) / 8;
-    ABIArgPart* parts = arena_array(a->c->tu, ABIArgPart, nparts);
-    memset(parts, 0, sizeof(ABIArgPart) * nparts);
-    u32 off = 0;
-    for (u32 i = 0; i < nparts; ++i) {
-      u32 chunk = (ti.size - off > 8) ? 8 : (ti.size - off);
-      parts[i].cls = ABI_CLASS_INT;
-      parts[i].loc = ABI_LOC_REG;
-      parts[i].size = chunk;
-      parts[i].align = 8;
-      parts[i].src_offset = off;
-      off += chunk;
-    }
-    out->kind = ABI_ARG_DIRECT;
-    out->flags = ABI_AF_NONE;
-    out->parts = parts;
-    out->nparts = (u16)nparts;
-    out->indirect_align = 0;
-  } else {
-    out->kind = ABI_ARG_INDIRECT;
-    out->flags = is_return ? ABI_AF_SRET : ABI_AF_BYVAL;
-    out->indirect_align = ti.align;
-    out->parts = NULL;
-    out->nparts = 0;
-  }
-}
-
-static void classify_one(TargetABI* a, const Type* t, ABIArgInfo* out,
-                         int is_return) {
-  if (!t || t->kind == TY_VOID) {
-    classify_void(out);
-    return;
-  }
-  switch (t->kind) {
-    case TY_STRUCT:
-    case TY_UNION:
-      classify_aggregate(a, t, out, is_return);
-      return;
-    default:
-      classify_scalar(a, t, out);
-      return;
-  }
-}
-
-static ABIFuncInfo* compute_func_info(TargetABI* a, const Type* fn) {
-  ABIFuncInfo* info = arena_new(a->c->tu, ABIFuncInfo);
-  memset(info, 0, sizeof *info);
-
-  classify_one(a, fn->fn.ret, &info->ret, /*is_return=*/1);
-  info->has_sret = (info->ret.kind == ABI_ARG_INDIRECT) ? 1 : 0;
-  info->variadic = fn->fn.variadic;
-
-  info->nparams = fn->fn.nparams;
-  if (fn->fn.nparams) {
-    ABIArgInfo* arr = arena_array(a->c->tu, ABIArgInfo, fn->fn.nparams);
-    memset(arr, 0, sizeof(ABIArgInfo) * fn->fn.nparams);
-    for (u16 i = 0; i < fn->fn.nparams; ++i) {
-      classify_one(a, fn->fn.params[i], &arr[i], /*is_return=*/0);
-    }
-    info->params = arr;
-  } else {
-    info->params = NULL;
-  }
-  return info;
-}
+/* ---- function classification (vtabled) ---- */
 
 const ABIFuncInfo* abi_func_info(TargetABI* a, const Type* fn_type) {
   if (!fn_type || fn_type->kind != TY_FUNC) return NULL;
   for (FuncInfoCacheEntry* e = a->fn_cache; e; e = e->next) {
     if (e->fn == fn_type) return e->info;
   }
-  ABIFuncInfo* info = compute_func_info(a, fn_type);
+  ABIFuncInfo* info = a->vt->compute_func_info(a, fn_type);
   if (!info) return NULL;
   FuncInfoCacheEntry* e = arena_new(a->c->tu, FuncInfoCacheEntry);
   e->fn = fn_type;
@@ -392,35 +274,33 @@ const Type* abi_intptr_type(TargetABI* a, Pool* p) {
 const Type* abi_uintptr_type(TargetABI* a, Pool* p) {
   return size_or_uintptr(a, p);
 }
+
 const Type* abi_va_list_type(TargetABI* a, Pool* p) {
-  /* AAPCS64 __va_list: 3 pointers (__stack, __gr_top, __vr_top) followed
-   * by 2 ints (__gr_offs, __vr_offs). Total 32 bytes, 8-aligned. */
   if (a->va_list_cache) return a->va_list_cache;
-  const Type* vp = type_ptr(p, type_void(p));
-  const Type* it = type_prim(p, TY_INT);
-  Sym name = pool_intern_cstr(p, "__va_list");
-  SrcLoc nl = {0, 0, 0};
-  TagId tg = type_tag_new(p, TAG_STRUCT, name, nl);
-  TypeRecordBuilder* b = type_record_begin(p, TY_STRUCT, tg, name);
-  type_record_field(
-      b, (Field){.name = pool_intern_cstr(p, "__stack"), .type = vp});
-  type_record_field(
-      b, (Field){.name = pool_intern_cstr(p, "__gr_top"), .type = vp});
-  type_record_field(
-      b, (Field){.name = pool_intern_cstr(p, "__vr_top"), .type = vp});
-  type_record_field(
-      b, (Field){.name = pool_intern_cstr(p, "__gr_offs"), .type = it});
-  type_record_field(
-      b, (Field){.name = pool_intern_cstr(p, "__vr_offs"), .type = it});
-  a->va_list_cache = type_record_end(p, b);
+  a->va_list_cache = a->vt->va_list_type(a, p);
   return a->va_list_cache;
 }
 
 /* ---- lifecycle ---- */
 
+static const ABIVtable* select_vtable(Compiler* c) {
+  switch (c->target.arch) {
+    case CFREE_ARCH_ARM_64:
+      return &aapcs64_vtable;
+    case CFREE_ARCH_X86_64:
+      return &sysv_x64_vtable;
+    default: {
+      SrcLoc loc = {0, 0, 0};
+      compiler_panic(c, loc, "abi_init: unsupported target arch %d",
+                     (int)c->target.arch);
+    }
+  }
+}
+
 void abi_init(TargetABI* a, Compiler* c) {
   memset(a, 0, sizeof *a);
   a->c = c;
+  a->vt = select_vtable(c);
 }
 
 void abi_fini(TargetABI* a) {
@@ -429,6 +309,7 @@ void abi_fini(TargetABI* a) {
   a->fn_cache = NULL;
   a->rec_cache = NULL;
   a->va_list_cache = NULL;
+  a->vt = NULL;
   a->c = NULL;
 }
 
diff --git a/src/abi/abi_aapcs64.c b/src/abi/abi_aapcs64.c
@@ -0,0 +1,147 @@
+/* AAPCS64 (AArch64 SysV) ABI dispatch.
+ *
+ * v1 covers the cases the cg test harness exercises:
+ *   void          -> IGNORE
+ *   integer ≤ 8B  -> DIRECT, one INT part in a register
+ *   integer 16B   -> DIRECT, two INT parts (X0+X1)
+ *   pointer       -> DIRECT, one INT part in a register
+ *   float/double  -> DIRECT, one FP part in a register
+ *   small struct  -> DIRECT, INT parts (HFA/HVA refinement: TODO)
+ *   large struct  -> INDIRECT (sret for return; passed by reference)
+ * Variadics, HFA classification, and split GPR+stack tail still
+ * land with the parser. */
+
+#include <string.h>
+
+#include "abi/abi_internal.h"
+#include "core/arena.h"
+#include "core/core.h"
+#include "core/pool.h"
+
+static void classify_scalar(TargetABI* a, const Type* t, ABIArgInfo* out) {
+  ABITypeInfo ti = abi_internal_type_info(a, t);
+  out->kind = ABI_ARG_DIRECT;
+  out->flags = ABI_AF_NONE;
+  out->indirect_align = 0;
+
+  ABIArgPart* parts = arena_new(a->c->tu, ABIArgPart);
+  memset(parts, 0, sizeof *parts);
+  parts->cls = (ti.scalar_kind == ABI_SC_FLOAT) ? ABI_CLASS_FP : ABI_CLASS_INT;
+  parts->loc = ABI_LOC_REG;
+  parts->size = ti.size;
+  parts->align = ti.align;
+  parts->src_offset = 0;
+
+  out->parts = parts;
+  out->nparts = 1;
+}
+
+static void classify_void(ABIArgInfo* out) {
+  memset(out, 0, sizeof *out);
+  out->kind = ABI_ARG_IGNORE;
+}
+
+static void classify_aggregate(TargetABI* a, const Type* t, ABIArgInfo* out,
+                               int is_return) {
+  ABITypeInfo ti = abi_internal_type_info(a, t);
+  if (ti.size == 0) {
+    classify_void(out);
+    return;
+  }
+  /* AAPCS64: aggregates ≤ 16 bytes pass in up to 2 GPRs (or HFA in FP regs;
+   * v1 ignores HFA). Larger aggregates pass by reference (caller copy for
+   * args, sret pointer for return). */
+  if (ti.size <= 16) {
+    u32 nparts = (ti.size + 7) / 8;
+    ABIArgPart* parts = arena_array(a->c->tu, ABIArgPart, nparts);
+    memset(parts, 0, sizeof(ABIArgPart) * nparts);
+    u32 off = 0;
+    for (u32 i = 0; i < nparts; ++i) {
+      u32 chunk = (ti.size - off > 8) ? 8 : (ti.size - off);
+      parts[i].cls = ABI_CLASS_INT;
+      parts[i].loc = ABI_LOC_REG;
+      parts[i].size = chunk;
+      parts[i].align = 8;
+      parts[i].src_offset = off;
+      off += chunk;
+    }
+    out->kind = ABI_ARG_DIRECT;
+    out->flags = ABI_AF_NONE;
+    out->parts = parts;
+    out->nparts = (u16)nparts;
+    out->indirect_align = 0;
+  } else {
+    out->kind = ABI_ARG_INDIRECT;
+    out->flags = is_return ? ABI_AF_SRET : ABI_AF_BYVAL;
+    out->indirect_align = ti.align;
+    out->parts = NULL;
+    out->nparts = 0;
+  }
+}
+
+static void classify_one(TargetABI* a, const Type* t, ABIArgInfo* out,
+                         int is_return) {
+  if (!t || t->kind == TY_VOID) {
+    classify_void(out);
+    return;
+  }
+  switch (t->kind) {
+    case TY_STRUCT:
+    case TY_UNION:
+      classify_aggregate(a, t, out, is_return);
+      return;
+    default:
+      classify_scalar(a, t, out);
+      return;
+  }
+}
+
+static ABIFuncInfo* aapcs64_compute_func_info(TargetABI* a, const Type* fn) {
+  ABIFuncInfo* info = arena_new(a->c->tu, ABIFuncInfo);
+  memset(info, 0, sizeof *info);
+
+  classify_one(a, fn->fn.ret, &info->ret, /*is_return=*/1);
+  info->has_sret = (info->ret.kind == ABI_ARG_INDIRECT) ? 1 : 0;
+  info->variadic = fn->fn.variadic;
+
+  info->nparams = fn->fn.nparams;
+  if (fn->fn.nparams) {
+    ABIArgInfo* arr = arena_array(a->c->tu, ABIArgInfo, fn->fn.nparams);
+    memset(arr, 0, sizeof(ABIArgInfo) * fn->fn.nparams);
+    for (u16 i = 0; i < fn->fn.nparams; ++i) {
+      classify_one(a, fn->fn.params[i], &arr[i], /*is_return=*/0);
+    }
+    info->params = arr;
+  } else {
+    info->params = NULL;
+  }
+  return info;
+}
+
+static const Type* aapcs64_va_list_type(TargetABI* a, Pool* p) {
+  /* AAPCS64 __va_list: 3 pointers (__stack, __gr_top, __vr_top) followed
+   * by 2 ints (__gr_offs, __vr_offs). Total 32 bytes, 8-aligned. */
+  (void)a;
+  const Type* vp = type_ptr(p, type_void(p));
+  const Type* it = type_prim(p, TY_INT);
+  Sym name = pool_intern_cstr(p, "__va_list");
+  SrcLoc nl = {0, 0, 0};
+  TagId tg = type_tag_new(p, TAG_STRUCT, name, nl);
+  TypeRecordBuilder* b = type_record_begin(p, TY_STRUCT, tg, name);
+  type_record_field(
+      b, (Field){.name = pool_intern_cstr(p, "__stack"), .type = vp});
+  type_record_field(
+      b, (Field){.name = pool_intern_cstr(p, "__gr_top"), .type = vp});
+  type_record_field(
+      b, (Field){.name = pool_intern_cstr(p, "__vr_top"), .type = vp});
+  type_record_field(
+      b, (Field){.name = pool_intern_cstr(p, "__gr_offs"), .type = it});
+  type_record_field(
+      b, (Field){.name = pool_intern_cstr(p, "__vr_offs"), .type = it});
+  return type_record_end(p, b);
+}
+
+const ABIVtable aapcs64_vtable = {
+    .compute_func_info = aapcs64_compute_func_info,
+    .va_list_type = aapcs64_va_list_type,
+};
diff --git a/src/abi/abi_internal.h b/src/abi/abi_internal.h
@@ -0,0 +1,54 @@
+#ifndef CFREE_ABI_INTERNAL_H
+#define CFREE_ABI_INTERNAL_H
+
+#include "abi/abi.h"
+
+/* Internal: per-ABI dispatch table.
+ *
+ * Selected by abi_init based on (target.arch, target.os). The pieces here
+ * are the parts that vary by ABI; the C-standard-driven scalar profile and
+ * record layout live in abi.c and are shared. */
+
+typedef struct ABIVtable {
+  /* Compute the ABIFuncInfo for a function type. The cache wrapper in
+   * abi.c calls this once per Type and memoizes the result. */
+  ABIFuncInfo* (*compute_func_info)(TargetABI*, const Type* fn);
+  /* Build the per-ABI __va_list type. The wrapper in abi.c memoizes. */
+  const Type* (*va_list_type)(TargetABI*, Pool*);
+} ABIVtable;
+
+/* Per-ABI vtables exposed by their TUs. */
+extern const ABIVtable aapcs64_vtable;
+extern const ABIVtable sysv_x64_vtable;
+
+/* Shared TargetABI internals. The struct definition is here so each ABI
+ * TU can reach into the per-TU caches via TargetABI*. abi.c owns the
+ * cache plumbing; the per-ABI TUs only allocate ABIFuncInfo / record
+ * builders out of c->tu. */
+typedef struct FuncInfoCacheEntry FuncInfoCacheEntry;
+typedef struct RecordLayoutCacheEntry RecordLayoutCacheEntry;
+
+struct FuncInfoCacheEntry {
+  const Type* fn;
+  ABIFuncInfo* info;
+  FuncInfoCacheEntry* next;
+};
+
+struct RecordLayoutCacheEntry {
+  const Type* ty;
+  ABIRecordLayout* layout;
+  RecordLayoutCacheEntry* next;
+};
+
+struct TargetABI {
+  Compiler* c;
+  const ABIVtable* vt;
+  FuncInfoCacheEntry* fn_cache;
+  RecordLayoutCacheEntry* rec_cache;
+  const Type* va_list_cache;
+};
+
+/* Shared helpers exposed to per-ABI TUs. */
+ABITypeInfo abi_internal_type_info(TargetABI*, const Type*);
+
+#endif
diff --git a/src/abi/abi_sysv_x64.c b/src/abi/abi_sysv_x64.c
@@ -0,0 +1,79 @@
+/* SysV AMD64 ABI — phase-2 stub.
+ *
+ * Initial classifier returns ABI_ARG_INDIRECT for everything: correct
+ * (every value passes through memory), slow, but unblocks bring-up of
+ * the x64 codegen path. Phase 3 replaces this with the real eight-byte
+ * INTEGER/SSE classifier (see doc/MULTIARCH.md §4 phase 3 step 2). */
+
+#include <string.h>
+
+#include "abi/abi_internal.h"
+#include "core/arena.h"
+#include "core/core.h"
+#include "core/pool.h"
+
+static void classify_indirect(TargetABI* a, const Type* t, ABIArgInfo* out,
+                              int is_return) {
+  if (!t || t->kind == TY_VOID) {
+    memset(out, 0, sizeof *out);
+    out->kind = ABI_ARG_IGNORE;
+    return;
+  }
+  ABITypeInfo ti = abi_internal_type_info(a, t);
+  out->kind = ABI_ARG_INDIRECT;
+  out->flags = is_return ? ABI_AF_SRET : ABI_AF_BYVAL;
+  out->indirect_align = ti.align ? ti.align : 8;
+  out->parts = NULL;
+  out->nparts = 0;
+}
+
+static ABIFuncInfo* sysv_x64_compute_func_info(TargetABI* a, const Type* fn) {
+  ABIFuncInfo* info = arena_new(a->c->tu, ABIFuncInfo);
+  memset(info, 0, sizeof *info);
+
+  classify_indirect(a, fn->fn.ret, &info->ret, /*is_return=*/1);
+  info->has_sret = (info->ret.kind == ABI_ARG_INDIRECT) ? 1 : 0;
+  info->variadic = fn->fn.variadic;
+
+  info->nparams = fn->fn.nparams;
+  if (fn->fn.nparams) {
+    ABIArgInfo* arr = arena_array(a->c->tu, ABIArgInfo, fn->fn.nparams);
+    memset(arr, 0, sizeof(ABIArgInfo) * fn->fn.nparams);
+    for (u16 i = 0; i < fn->fn.nparams; ++i) {
+      classify_indirect(a, fn->fn.params[i], &arr[i], /*is_return=*/0);
+    }
+    info->params = arr;
+  } else {
+    info->params = NULL;
+  }
+  return info;
+}
+
+static const Type* sysv_x64_va_list_type(TargetABI* a, Pool* p) {
+  /* SysV AMD64 __va_list_tag: { unsigned gp_offset; unsigned fp_offset;
+   * void* overflow_arg_area; void* reg_save_area; }. 24 bytes, 8-aligned.
+   * The va_list type is an array of one __va_list_tag element so taking
+   * its address yields a pointer, matching the macro semantics. */
+  (void)a;
+  const Type* vp = type_ptr(p, type_void(p));
+  const Type* uit = type_prim(p, TY_UINT);
+  Sym name = pool_intern_cstr(p, "__va_list_tag");
+  SrcLoc nl = {0, 0, 0};
+  TagId tg = type_tag_new(p, TAG_STRUCT, name, nl);
+  TypeRecordBuilder* b = type_record_begin(p, TY_STRUCT, tg, name);
+  type_record_field(
+      b, (Field){.name = pool_intern_cstr(p, "gp_offset"), .type = uit});
+  type_record_field(
+      b, (Field){.name = pool_intern_cstr(p, "fp_offset"), .type = uit});
+  type_record_field(
+      b,
+      (Field){.name = pool_intern_cstr(p, "overflow_arg_area"), .type = vp});
+  type_record_field(
+      b, (Field){.name = pool_intern_cstr(p, "reg_save_area"), .type = vp});
+  return type_record_end(p, b);
+}
+
+const ABIVtable sysv_x64_vtable = {
+    .compute_func_info = sysv_x64_compute_func_info,
+    .va_list_type = sysv_x64_va_list_type,
+};
diff --git a/src/api/pipeline.c b/src/api/pipeline.c
@@ -1040,6 +1040,20 @@ static const char* reloc_kind_name(u16 kind) {
       return "R_AARCH64_RELATIVE";
     case R_AARCH64_COPY:
       return "R_AARCH64_COPY";
+    case R_X64_PC8:
+      return "R_X86_64_PC8";
+    case R_X64_PLT32:
+      return "R_X86_64_PLT32";
+    case R_X64_GOTPCREL:
+      return "R_X86_64_GOTPCREL";
+    case R_X64_GLOB_DAT:
+      return "R_X86_64_GLOB_DAT";
+    case R_X64_JUMP_SLOT:
+      return "R_X86_64_JUMP_SLOT";
+    case R_X64_RELATIVE:
+      return "R_X86_64_RELATIVE";
+    case R_X64_COPY:
+      return "R_X86_64_COPY";
     case R_RV_HI20:
       return "R_RISCV_HI20";
     case R_RV_LO12_I:
diff --git a/src/arch/aa64.h b/src/arch/aa64.h
@@ -0,0 +1,8 @@
+#ifndef CFREE_ARCH_AA64_H
+#define CFREE_ARCH_AA64_H
+
+#include "arch/arch.h"
+
+CGTarget* aa64_cgtarget_new(Compiler*, ObjBuilder*, MCEmitter*);
+
+#endif
diff --git a/src/arch/aarch64.c b/src/arch/aarch64.c
@@ -2995,14 +2995,7 @@ static void aa_destroy(CGTarget* t) { (void)t; /* arena-backed */ }
 
 static void cgt_cleanup(void* arg) { cgtarget_free((CGTarget*)arg); }
 
-CGTarget* cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) {
-  if (c->target.arch != CFREE_ARCH_ARM_64) {
-    SrcLoc loc = {0, 0, 0};
-    compiler_panic(c, loc,
-                   "cgtarget_new: only AArch64 implemented in v1 (got arch %d)",
-                   (int)c->target.arch);
-  }
-
+CGTarget* aa64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) {
   AAImpl* a = arena_new(c->tu, AAImpl);
   memset(a, 0, sizeof *a);
 
@@ -3081,12 +3074,3 @@ CGTarget* cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) {
   compiler_defer(c, cgt_cleanup, t);
   return t;
 }
-
-void cgtarget_finalize(CGTarget* t) {
-  if (t && t->finalize) t->finalize(t);
-}
-
-void cgtarget_free(CGTarget* t) {
-  if (!t) return;
-  /* Arena-backed; nothing to free. */
-}
diff --git a/src/arch/cgtarget.c b/src/arch/cgtarget.c
@@ -0,0 +1,33 @@
+/* Public CGTarget constructor — dispatches by Compiler.target.arch.
+ *
+ * Per-arch constructors live in their own files (aa64.c, x64.c). The
+ * lifecycle helpers (cgtarget_finalize, cgtarget_free) are arch-agnostic
+ * shims over the vtable. */
+
+#include "arch/aa64.h"
+#include "arch/arch.h"
+#include "arch/x64.h"
+
+CGTarget* cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) {
+  switch (c->target.arch) {
+    case CFREE_ARCH_ARM_64:
+      return aa64_cgtarget_new(c, o, m);
+    case CFREE_ARCH_X86_64:
+      return x64_cgtarget_new(c, o, m);
+    default: {
+      SrcLoc loc = {0, 0, 0};
+      compiler_panic(c, loc,
+                     "cgtarget_new: unsupported target arch %d",
+                     (int)c->target.arch);
+    }
+  }
+}
+
+void cgtarget_finalize(CGTarget* t) {
+  if (t && t->finalize) t->finalize(t);
+}
+
+void cgtarget_free(CGTarget* t) {
+  if (!t) return;
+  /* Arena-backed; nothing to free. */
+}
diff --git a/src/arch/mc.c b/src/arch/mc.c
@@ -5,10 +5,14 @@
  * relocations / source-location stamps. Encoding is the caller's job —
  * MCEmitter writes whatever bytes it's handed.
  *
- * This implementation is target-agnostic: every supported arch shares
- * one MCEmitter; arch-specific differences live in CGTarget. Per-arch
- * MCEmitter subclasses can layer on later if encoding cache or
- * peephole-merging need shared state with the emitter.
+ * One MCEmitter serves every supported arch — arch-specific differences
+ * live in CGTarget. The fixup encoder (apply_fixup) is the union of all
+ * known arches' label-ref reloc encodings rather than a generic library:
+ * each new arch adds cases to that switch. The cost of an arch enum
+ * here is one switch case; promoting fixup application to a per-arch
+ * vtable would be premature. Per-arch MCEmitter subclasses can layer on
+ * later if encoding cache or peephole-merging need shared state with
+ * the emitter.
  *
  * MCLabel handling: ids are 1-based (0 = MC_LABEL_NONE). Each label
  * carries either a placement (sec_id, offset) or a list of pending
diff --git a/src/arch/x64.c b/src/arch/x64.c
@@ -0,0 +1,387 @@
+/* x86_64 CGTarget skeleton.
+ *
+ * Phase-2 placeholder: the vtable is wired up but every method panics.
+ * This proves the cgtarget_new dispatch reaches an x64-shaped target.
+ * Phase 3 fills in real codegen — see doc/MULTIARCH.md §4. */
+
+#include <string.h>
+
+#include "arch/arch.h"
+#include "arch/x64.h"
+#include "core/arena.h"
+
+typedef struct XImpl {
+  CGTarget base;
+  SrcLoc loc;
+} XImpl;
+
+static SrcLoc xx_loc(void) { return (SrcLoc){0, 0, 0}; }
+
+_Noreturn static void xx_panic(CGTarget* t, const char* what) {
+  compiler_panic(t->c, xx_loc(), "x64: %s not implemented", what);
+}
+
+static void xx_func_begin(CGTarget* t, const CGFuncDesc* d) {
+  (void)d;
+  xx_panic(t, "func_begin");
+}
+static void xx_func_end(CGTarget* t) { xx_panic(t, "func_end"); }
+
+static Reg xx_alloc_reg(CGTarget* t, RegClass cls, const Type* ty) {
+  (void)cls;
+  (void)ty;
+  xx_panic(t, "alloc_reg");
+}
+static void xx_free_reg(CGTarget* t, Reg r) {
+  (void)r;
+  xx_panic(t, "free_reg");
+}
+static FrameSlot xx_frame_slot(CGTarget* t, const FrameSlotDesc* d) {
+  (void)d;
+  xx_panic(t, "frame_slot");
+}
+static void xx_param(CGTarget* t, const CGParamDesc* d) {
+  (void)d;
+  xx_panic(t, "param");
+}
+static const Reg* xx_clobbers(CGTarget* t, RegClass cls, u32* nregs) {
+  (void)cls;
+  (void)nregs;
+  xx_panic(t, "clobbers");
+}
+static void xx_spill_reg(CGTarget* t, Operand a, FrameSlot s, MemAccess m) {
+  (void)a;
+  (void)s;
+  (void)m;
+  xx_panic(t, "spill_reg");
+}
+static void xx_reload_reg(CGTarget* t, Operand a, FrameSlot s, MemAccess m) {
+  (void)a;
+  (void)s;
+  (void)m;
+  xx_panic(t, "reload_reg");
+}
+
+static Label xx_label_new(CGTarget* t) { xx_panic(t, "label_new"); }
+static void xx_label_place(CGTarget* t, Label l) {
+  (void)l;
+  xx_panic(t, "label_place");
+}
+static void xx_jump(CGTarget* t, Label l) {
+  (void)l;
+  xx_panic(t, "jump");
+}
+static void xx_cmp_branch(CGTarget* t, CmpOp op, Operand a, Operand b,
+                          Label l) {
+  (void)op;
+  (void)a;
+  (void)b;
+  (void)l;
+  xx_panic(t, "cmp_branch");
+}
+
+static CGScope xx_scope_begin(CGTarget* t, const CGScopeDesc* d) {
+  (void)d;
+  xx_panic(t, "scope_begin");
+}
+static void xx_scope_else(CGTarget* t, CGScope s) {
+  (void)s;
+  xx_panic(t, "scope_else");
+}
+static void xx_scope_end(CGTarget* t, CGScope s) {
+  (void)s;
+  xx_panic(t, "scope_end");
+}
+static void xx_break_to(CGTarget* t, CGScope s) {
+  (void)s;
+  xx_panic(t, "break_to");
+}
+static void xx_continue_to(CGTarget* t, CGScope s) {
+  (void)s;
+  xx_panic(t, "continue_to");
+}
+
+static void xx_load_imm(CGTarget* t, Operand d, i64 i) {
+  (void)d;
+  (void)i;
+  xx_panic(t, "load_imm");
+}
+static void xx_load_const(CGTarget* t, Operand d, ConstBytes b) {
+  (void)d;
+  (void)b;
+  xx_panic(t, "load_const");
+}
+static void xx_copy(CGTarget* t, Operand d, Operand s) {
+  (void)d;
+  (void)s;
+  xx_panic(t, "copy");
+}
+static void xx_load(CGTarget* t, Operand d, Operand a, MemAccess m) {
+  (void)d;
+  (void)a;
+  (void)m;
+  xx_panic(t, "load");
+}
+static void xx_store(CGTarget* t, Operand a, Operand s, MemAccess m) {
+  (void)a;
+  (void)s;
+  (void)m;
+  xx_panic(t, "store");
+}
+static void xx_addr_of(CGTarget* t, Operand d, Operand l) {
+  (void)d;
+  (void)l;
+  xx_panic(t, "addr_of");
+}
+static void xx_tls_addr_of(CGTarget* t, Operand d, ObjSymId s, i64 a) {
+  (void)d;
+  (void)s;
+  (void)a;
+  xx_panic(t, "tls_addr_of");
+}
+static void xx_copy_bytes(CGTarget* t, Operand da, Operand sa,
+                          AggregateAccess g) {
+  (void)da;
+  (void)sa;
+  (void)g;
+  xx_panic(t, "copy_bytes");
+}
+static void xx_set_bytes(CGTarget* t, Operand da, Operand bv,
+                         AggregateAccess g) {
+  (void)da;
+  (void)bv;
+  (void)g;
+  xx_panic(t, "set_bytes");
+}
+static void xx_bitfield_load(CGTarget* t, Operand d, Operand ra,
+                             BitFieldAccess b) {
+  (void)d;
+  (void)ra;
+  (void)b;
+  xx_panic(t, "bitfield_load");
+}
+static void xx_bitfield_store(CGTarget* t, Operand ra, Operand s,
+                              BitFieldAccess b) {
+  (void)ra;
+  (void)s;
+  (void)b;
+  xx_panic(t, "bitfield_store");
+}
+
+static void xx_binop(CGTarget* t, BinOp op, Operand d, Operand a, Operand b) {
+  (void)op;
+  (void)d;
+  (void)a;
+  (void)b;
+  xx_panic(t, "binop");
+}
+static void xx_unop(CGTarget* t, UnOp op, Operand d, Operand a) {
+  (void)op;
+  (void)d;
+  (void)a;
+  xx_panic(t, "unop");
+}
+static void xx_cmp(CGTarget* t, CmpOp op, Operand d, Operand a, Operand b) {
+  (void)op;
+  (void)d;
+  (void)a;
+  (void)b;
+  xx_panic(t, "cmp");
+}
+static void xx_convert(CGTarget* t, ConvKind k, Operand d, Operand s) {
+  (void)k;
+  (void)d;
+  (void)s;
+  xx_panic(t, "convert");
+}
+
+static void xx_call(CGTarget* t, const CGCallDesc* d) {
+  (void)d;
+  xx_panic(t, "call");
+}
+static void xx_ret(CGTarget* t, const CGABIValue* v) {
+  (void)v;
+  xx_panic(t, "ret");
+}
+
+static void xx_alloca_(CGTarget* t, Operand d, Operand s, u32 a) {
+  (void)d;
+  (void)s;
+  (void)a;
+  xx_panic(t, "alloca");
+}
+static void xx_va_start_(CGTarget* t, Operand a) {
+  (void)a;
+  xx_panic(t, "va_start");
+}
+static void xx_va_arg_(CGTarget* t, Operand d, Operand a, const Type* ty) {
+  (void)d;
+  (void)a;
+  (void)ty;
+  xx_panic(t, "va_arg");
+}
+static void xx_va_end_(CGTarget* t, Operand a) {
+  (void)a;
+  xx_panic(t, "va_end");
+}
+static void xx_va_copy_(CGTarget* t, Operand d, Operand s) {
+  (void)d;
+  (void)s;
+  xx_panic(t, "va_copy");
+}
+
+static void xx_atomic_load(CGTarget* t, Operand d, Operand a, MemAccess m,
+                           MemOrder o) {
+  (void)d;
+  (void)a;
+  (void)m;
+  (void)o;
+  xx_panic(t, "atomic_load");
+}
+static void xx_atomic_store(CGTarget* t, Operand a, Operand s, MemAccess m,
+                            MemOrder o) {
+  (void)a;
+  (void)s;
+  (void)m;
+  (void)o;
+  xx_panic(t, "atomic_store");
+}
+static void xx_atomic_rmw(CGTarget* t, AtomicOp op, Operand d, Operand a,
+                          Operand v, MemAccess m, MemOrder o) {
+  (void)op;
+  (void)d;
+  (void)a;
+  (void)v;
+  (void)m;
+  (void)o;
+  xx_panic(t, "atomic_rmw");
+}
+static void xx_atomic_cas(CGTarget* t, Operand p, Operand ok, Operand a,
+                          Operand e, Operand des, MemAccess m, MemOrder so,
+                          MemOrder fo) {
+  (void)p;
+  (void)ok;
+  (void)a;
+  (void)e;
+  (void)des;
+  (void)m;
+  (void)so;
+  (void)fo;
+  xx_panic(t, "atomic_cas");
+}
+static void xx_fence(CGTarget* t, MemOrder o) {
+  (void)o;
+  xx_panic(t, "fence");
+}
+
+static void xx_intrinsic(CGTarget* t, IntrinKind k, Operand* d, u32 nd,
+                         const Operand* a, u32 na) {
+  (void)k;
+  (void)d;
+  (void)nd;
+  (void)a;
+  (void)na;
+  xx_panic(t, "intrinsic");
+}
+static void xx_asm_block(CGTarget* t, const char* tmpl,
+                         const AsmConstraint* outs, u32 no, Operand* oo,
+                         const AsmConstraint* ins, u32 ni, const Operand* io,
+                         const Sym* clobs, u32 nc) {
+  (void)tmpl;
+  (void)outs;
+  (void)no;
+  (void)oo;
+  (void)ins;
+  (void)ni;
+  (void)io;
+  (void)clobs;
+  (void)nc;
+  xx_panic(t, "asm_block");
+}
+
+static void xx_set_loc(CGTarget* t, SrcLoc l) {
+  ((XImpl*)t)->loc = l;
+  if (t->mc) t->mc->set_loc(t->mc, l);
+}
+
+static void xx_finalize(CGTarget* t) { (void)t; }
+static void xx_destroy(CGTarget* t) { (void)t; }
+
+static void cgt_cleanup(void* arg) { cgtarget_free((CGTarget*)arg); }
+
+CGTarget* x64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) {
+  XImpl* x = arena_new(c->tu, XImpl);
+  memset(x, 0, sizeof *x);
+
+  CGTarget* t = &x->base;
+  t->c = c;
+  t->obj = o;
+  t->mc = m;
+
+  t->func_begin = xx_func_begin;
+  t->func_end = xx_func_end;
+
+  t->alloc_reg = xx_alloc_reg;
+  t->free_reg = xx_free_reg;
+  t->frame_slot = xx_frame_slot;
+  t->param = xx_param;
+  t->clobbers = xx_clobbers;
+  t->spill_reg = xx_spill_reg;
+  t->reload_reg = xx_reload_reg;
+
+  t->label_new = xx_label_new;
+  t->label_place = xx_label_place;
+  t->jump = xx_jump;
+  t->cmp_branch = xx_cmp_branch;
+
+  t->scope_begin = xx_scope_begin;
+  t->scope_else = xx_scope_else;
+  t->scope_end = xx_scope_end;
+  t->break_to = xx_break_to;
+  t->continue_to = xx_continue_to;
+
+  t->load_imm = xx_load_imm;
+  t->load_const = xx_load_const;
+  t->copy = xx_copy;
+  t->load = xx_load;
+  t->store = xx_store;
+  t->addr_of = xx_addr_of;
+  t->tls_addr_of = xx_tls_addr_of;
+  t->copy_bytes = xx_copy_bytes;
+  t->set_bytes = xx_set_bytes;
+  t->bitfield_load = xx_bitfield_load;
+  t->bitfield_store = xx_bitfield_store;
+
+  t->binop = xx_binop;
+  t->unop = xx_unop;
+  t->cmp = xx_cmp;
+  t->convert = xx_convert;
+
+  t->call = xx_call;
+  t->ret = xx_ret;
+
+  t->alloca_ = xx_alloca_;
+  t->va_start_ = xx_va_start_;
+  t->va_arg_ = xx_va_arg_;
+  t->va_end_ = xx_va_end_;
+  t->va_copy_ = xx_va_copy_;
+
+  t->setjmp_ = NULL;
+  t->longjmp_ = NULL;
+
+  t->atomic_load = xx_atomic_load;
+  t->atomic_store = xx_atomic_store;
+  t->atomic_rmw = xx_atomic_rmw;
+  t->atomic_cas = xx_atomic_cas;
+  t->fence = xx_fence;
+
+  t->intrinsic = xx_intrinsic;
+  t->asm_block = xx_asm_block;
+
+  t->set_loc = xx_set_loc;
+  t->finalize = xx_finalize;
+  t->destroy = xx_destroy;
+
+  compiler_defer(c, cgt_cleanup, t);
+  return t;
+}
diff --git a/src/arch/x64.h b/src/arch/x64.h
@@ -0,0 +1,8 @@
+#ifndef CFREE_ARCH_X64_H
+#define CFREE_ARCH_X64_H
+
+#include "arch/arch.h"
+
+CGTarget* x64_cgtarget_new(Compiler*, ObjBuilder*, MCEmitter*);
+
+#endif
diff --git a/src/link/link.c b/src/link/link.c
@@ -423,7 +423,7 @@ void link_emit_image_writer(LinkImage* img, Writer* w) {
   if (!img || !w) return;
   switch (img->c->target.obj) {
     case CFREE_OBJ_ELF:
-      link_emit_elf_aarch64(img, w);
+      link_emit_elf(img, w);
       return;
     default:
       compiler_panic(img->c, no_loc(),
diff --git a/src/link/link_dyn.c b/src/link/link_dyn.c
@@ -798,7 +798,7 @@ void layout_dyn(Linker* l, LinkImage* img) {
       compiler_panic(img->c, no_loc(), "link: oom on rw dyn segment");
     /* Zero-initialize. .got.plt[0] (&.dynamic) is filled later, after
      * shift_image_addresses has bumped dyn->dynamic_vaddr. .dynamic
-     * body is built post-shift in link_emit_elf_aarch64. Loader
+     * body is built post-shift in link_emit_elf. Loader
      * patches all .got.plt slots from .rela.plt before user code
      * under DF_1_NOW. */
     memset(img->segment_bytes[rw_seg_idx], 0, (size_t)rw_seg_size);
@@ -944,7 +944,7 @@ void layout_dyn(Linker* l, LinkImage* img) {
    * RELATIVE for PIE internal abs fixups) are emitted by
    * apply_all_relocs as it walks every relocation.  layout_dyn
    * leaves .rela.dyn empty here; the bytes are written post-shift in
-   * link_emit_elf_aarch64. */
+   * link_emit_elf. */
 
   /* .got.plt prelude: for BIND_NOW we leave the body zero — the
    * loader patches every slot from .rela.plt before user code. Some
diff --git a/src/link/link_elf.c b/src/link/link_elf.c
@@ -1,6 +1,10 @@
-/* link_emit_elf_aarch64: write a static ET_EXEC ELF64 image to the
+/* link_emit_elf: write a static ET_EXEC ELF64 image to the
  * caller-provided Writer.
  *
+ * 64-bit little-endian only. The per-arch ELF reloc-type tables in
+ * obj/elf_reloc_<arch>.c handle RelocKind <-> ELF translation; this
+ * file picks e_machine from Compiler.target.arch.
+ *
  * File layout (in write order):
  *
  *   [headers PT_LOAD, PF_R, mapped at IMAGE_BASE]
@@ -568,12 +572,23 @@ static u64 sec_flags_to_shf(u32 flags) {
   return r;
 }
 
-void link_emit_elf_aarch64(LinkImage* img, Writer* w) {
+void link_emit_elf(LinkImage* img, Writer* w) {
   Heap* heap = img->heap;
   Compiler* c = img->c;
 
-  if (c->target.arch != CFREE_ARCH_ARM_64)
-    compiler_panic(c, no_loc(), "link_emit_elf: only AArch64 is implemented");
+  u32 e_machine;
+  switch (c->target.arch) {
+    case CFREE_ARCH_ARM_64:
+      e_machine = EM_AARCH64;
+      break;
+    case CFREE_ARCH_X86_64:
+      e_machine = EM_X86_64;
+      break;
+    default:
+      compiler_panic(c, no_loc(),
+                     "link_emit_elf: unsupported target arch %u",
+                     (u32)c->target.arch);
+  }
   if (img->entry_sym == LINK_SYM_NONE)
     compiler_panic(c, no_loc(), "link_emit_elf: no resolved entry symbol");
   /* IFUNC trampolines: layout_iplt builds the .iplt stubs + .igot.plt
@@ -1109,7 +1124,7 @@ void link_emit_elf_aarch64(LinkImage* img, Writer* w) {
   ehdr.e_ident[6] = EV_CURRENT;
   ehdr.e_ident[7] = ELFOSABI_NONE;
   ehdr.e_type = pie ? ET_DYN : ET_EXEC;
-  ehdr.e_machine = EM_AARCH64;
+  ehdr.e_machine = (u16)e_machine;
   ehdr.e_version = EV_CURRENT;
   ehdr.e_entry = img_base + LinkSyms_at(&img->syms, img->entry_sym - 1)->vaddr;
   ehdr.e_phoff = sizeof(Ehdr64);
diff --git a/src/link/link_internal.h b/src/link/link_internal.h
@@ -306,7 +306,8 @@ struct LinkImage {
 void link_reloc_apply(Compiler*, RelocKind, u8* P_bytes, u64 S, i64 A, u64 P);
 
 /* Public link_emit_image_writer dispatches by Compiler.target.obj. The
- * ELF AArch64 implementation lives in link_elf.c. */
-void link_emit_elf_aarch64(LinkImage*, Writer*);
+ * ELF implementation lives in link_elf.c and dispatches internally on
+ * Compiler.target.arch for e_machine and reloc translation. */
+void link_emit_elf(LinkImage*, Writer*);
 
 #endif
diff --git a/src/link/link_layout.c b/src/link/link_layout.c
@@ -1,7 +1,7 @@
 /* link_resolve: builds a fresh LinkImage from the Linker's inputs.
  *
  * Image-relative discipline: every vaddr / file_offset on the produced
- * image treats the image as based at 0. Consumers (link_emit_elf_aarch64,
+ * image treats the image as based at 0. Consumers (link_emit_elf,
  * cfree_jit_from_image) add their own runtime base before patching
  * relocations or writing PT_LOAD headers. Segment byte buffers hold raw
  * input section bytes — no relocations are applied here, in line with
diff --git a/src/link/link_reloc.c b/src/link/link_reloc.c
@@ -2,7 +2,7 @@
  *
  * Pure function: takes the resolved final addresses (S, P) and the
  * addend (A), and patches `width` bytes at the relocation site.
- * Callers (link_emit_elf_aarch64, cfree_jit_from_image) compute the
+ * Callers (link_emit_elf, cfree_jit_from_image) compute the
  * runtime base offset themselves; this routine sees only final values.
  *
  * Encoding references: ARM ARMv8-A "ELF for the ARM 64-bit Architecture
diff --git a/src/obj/elf.h b/src/obj/elf.h
@@ -5,9 +5,9 @@
  * (obj/obj.h, link/link.h); the ELF spelling of those abstractions only
  * exists inside libcfree.
  *
- * Scope: 64-bit little-endian only. AArch64 today; the per-arch reloc
- * mapping in elf_aarch64_reloc_{to,from} is the place to extend when
- * x86_64/RISC-V/etc. land. */
+ * Scope: 64-bit little-endian only. The per-arch reloc mapping is split
+ * across elf_reloc_<arch>.c (one TU per arch); emit_elf and the linker
+ * dispatch to the right table by Compiler.target.arch. */
 
 #ifndef CFREE_OBJ_ELF_H
 #define CFREE_OBJ_ELF_H
@@ -236,6 +236,33 @@
 u32 elf_aarch64_reloc_to(u32 kind /* RelocKind */);
 u32 elf_aarch64_reloc_from(u32 elf_type);
 
+/* ---- x86_64 ELF reloc types ----
+ *
+ * Subset matching the cfree-canonical RelocKind R_X64_* entries. The
+ * full SysV-x86_64 ABI table has more entries (TLS, GOT variants, ...)
+ * — only the ones the codegen and linker actually need today are
+ * represented here. */
+#define ELF_R_X86_64_NONE 0
+#define ELF_R_X86_64_64 1
+#define ELF_R_X86_64_PC32 2
+#define ELF_R_X86_64_GOT32 3
+#define ELF_R_X86_64_PLT32 4
+#define ELF_R_X86_64_COPY 5
+#define ELF_R_X86_64_GLOB_DAT 6
+#define ELF_R_X86_64_JUMP_SLOT 7
+#define ELF_R_X86_64_RELATIVE 8
+#define ELF_R_X86_64_GOTPCREL 9
+#define ELF_R_X86_64_32 10
+#define ELF_R_X86_64_32S 11
+#define ELF_R_X86_64_16 12
+#define ELF_R_X86_64_PC16 13
+#define ELF_R_X86_64_8 14
+#define ELF_R_X86_64_PC8 15
+#define ELF_R_X86_64_PC64 24
+
+u32 elf_x86_64_reloc_to(u32 kind /* RelocKind */);
+u32 elf_x86_64_reloc_from(u32 elf_type);
+
 /* ---- little-endian byte writers/readers (Writer-based) ----
  * Reads use rd_u*_le from core/bytes.h directly; only writes need the
  * Writer-bridging wrappers below. */
diff --git a/src/obj/elf_emit.c b/src/obj/elf_emit.c
@@ -8,15 +8,17 @@
  *   2. build .symtab + .strtab content (locals first — STT_SECTION
  *      synthesized for every input section, then ordinary locals, then
  *      globals/weaks);
- *   3. build .rela.* content using the AArch64 reloc map;
+ *   3. build .rela.* content using the per-arch reloc map (selected
+ *      by Compiler.target.arch);
  *   4. build .shstrtab;
  *   5. assign file offsets sequentially, respecting per-section
  *      addralign;
  *   6. write Ehdr, then each section's bytes (seeking to its sh_offset),
  *      then the section header table.
  *
- * AArch64 little-endian only. Other archs / endianness panic at entry —
- * the per-arch reloc table is the place to extend, not this file.
+ * 64-bit little-endian only. Per-arch reloc tables (elf_reloc_<arch>.c)
+ * supply the RelocKind -> ELF type mapping; e_machine is selected from
+ * Compiler.target.arch. Big-endian / 32-bit ELF panic at entry.
  *
  * See doc/DESIGN.md §5.5 for the round-trip invariant: read_elf of this
  * output must produce an ObjBuilder shape-equivalent to the input,
@@ -243,13 +245,24 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) {
   Heap* h = (Heap*)c->env->heap;
 
   /* ---- target validation ------------------------------------------ */
-  if (c->target.arch != CFREE_ARCH_ARM_64) {
-    compiler_panic(c, no_loc(),
-                   "emit_elf: only AArch64 is implemented (target arch=%u)",
-                   (u32)c->target.arch);
+  u32 e_machine;
+  u32 (*reloc_to)(u32);
+  switch (c->target.arch) {
+    case CFREE_ARCH_ARM_64:
+      e_machine = EM_AARCH64;
+      reloc_to = elf_aarch64_reloc_to;
+      break;
+    case CFREE_ARCH_X86_64:
+      e_machine = EM_X86_64;
+      reloc_to = elf_x86_64_reloc_to;
+      break;
+    default:
+      compiler_panic(c, no_loc(),
+                     "emit_elf: unsupported target arch %u",
+                     (u32)c->target.arch);
   }
   if (c->target.big_endian) {
-    compiler_panic(c, no_loc(), "emit_elf: big-endian AArch64 not supported");
+    compiler_panic(c, no_loc(), "emit_elf: big-endian ELF not supported");
   }
   if (c->target.ptr_size != 8) {
     compiler_panic(c, no_loc(), "emit_elf: ptr_size %u (expected 8)",
@@ -483,11 +496,12 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) {
     for (u32 i = 0; i < total_relocs; ++i) {
       const Reloc* r = obj_reloc_at(ob, i);
       if (r->section_id != si) continue;
-      u32 etype = elf_aarch64_reloc_to(r->kind);
-      if (etype == ELF_R_AARCH64_NONE && r->kind != R_NONE) {
+      u32 etype = reloc_to(r->kind);
+      if (etype == ELF_R_AARCH64_NONE /* == ELF_R_X86_64_NONE == 0 */ &&
+          r->kind != R_NONE) {
         compiler_panic(c, no_loc(),
-                       "emit_elf: unsupported relocation kind %u for AArch64",
-                       (u32)r->kind);
+                       "emit_elf: unsupported relocation kind %u for arch %u",
+                       (u32)r->kind, (u32)c->target.arch);
       }
       u32 sym_elf_idx;
       if (r->sym == OBJ_SYM_NONE) {
@@ -672,7 +686,7 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) {
   cfree_writer_seek(w, 0);
   cfree_writer_write(w, ident, EI_NIDENT);
   elf_wr_u16(w, ET_REL);
-  elf_wr_u16(w, EM_AARCH64);
+  elf_wr_u16(w, (u16)e_machine);
   elf_wr_u32(w, EV_CURRENT);
   elf_wr_u64(w, 0);                 /* e_entry */
   elf_wr_u64(w, 0);                 /* e_phoff */
diff --git a/src/obj/elf_reloc_x86_64.c b/src/obj/elf_reloc_x86_64.c
@@ -0,0 +1,84 @@
+/* RelocKind <-> x86_64 ELF reloc-type mapping.
+ *
+ * Mirror of elf_reloc_aarch64.c for the x86_64 SysV ABI. The arch-
+ * agnostic R_ABS / R_PC / R_REL RelocKind entries fan out to the
+ * native x86_64 codes; the x86_64-only encodings (R_X64_PC8, PLT32,
+ * GOTPCREL, dynamic-only entries) live in the lower band.
+ *
+ * Returning ELF_R_X86_64_NONE for an unsupported kind is the signal
+ * to the caller to either panic (emit) or panic (read with diagnostic). */
+
+#include "obj/elf.h"
+
+u32 elf_x86_64_reloc_to(u32 kind /* RelocKind */) {
+  switch (kind) {
+    case R_NONE:
+      return ELF_R_X86_64_NONE;
+    case R_ABS64:
+      return ELF_R_X86_64_64;
+    case R_ABS32:
+      return ELF_R_X86_64_32;
+    case R_PC32:
+      return ELF_R_X86_64_PC32;
+    case R_PC64:
+      return ELF_R_X86_64_PC64;
+    case R_REL32:
+      return ELF_R_X86_64_PC32;
+    case R_REL64:
+      return ELF_R_X86_64_PC64;
+    case R_X64_PC8:
+      return ELF_R_X86_64_PC8;
+    case R_PLT32:
+    case R_X64_PLT32:
+      return ELF_R_X86_64_PLT32;
+    case R_GOT32:
+      return ELF_R_X86_64_GOT32;
+    case R_X64_GOTPCREL:
+      return ELF_R_X86_64_GOTPCREL;
+    case R_X64_GLOB_DAT:
+      return ELF_R_X86_64_GLOB_DAT;
+    case R_X64_JUMP_SLOT:
+      return ELF_R_X86_64_JUMP_SLOT;
+    case R_X64_RELATIVE:
+      return ELF_R_X86_64_RELATIVE;
+    case R_X64_COPY:
+      return ELF_R_X86_64_COPY;
+    default:
+      return ELF_R_X86_64_NONE;
+  }
+}
+
+u32 elf_x86_64_reloc_from(u32 elf_type) {
+  switch (elf_type) {
+    case ELF_R_X86_64_NONE:
+      return R_NONE;
+    case ELF_R_X86_64_64:
+      return R_ABS64;
+    case ELF_R_X86_64_32:
+      return R_ABS32;
+    case ELF_R_X86_64_32S:
+      return R_ABS32;
+    case ELF_R_X86_64_PC32:
+      return R_PC32;
+    case ELF_R_X86_64_PC64:
+      return R_PC64;
+    case ELF_R_X86_64_PC8:
+      return R_X64_PC8;
+    case ELF_R_X86_64_PLT32:
+      return R_X64_PLT32;
+    case ELF_R_X86_64_GOT32:
+      return R_GOT32;
+    case ELF_R_X86_64_GOTPCREL:
+      return R_X64_GOTPCREL;
+    case ELF_R_X86_64_GLOB_DAT:
+      return R_X64_GLOB_DAT;
+    case ELF_R_X86_64_JUMP_SLOT:
+      return R_X64_JUMP_SLOT;
+    case ELF_R_X86_64_RELATIVE:
+      return R_X64_RELATIVE;
+    case ELF_R_X86_64_COPY:
+      return R_X64_COPY;
+    default:
+      return (u32)-1; /* sentinel */
+  }
+}
diff --git a/src/obj/obj.h b/src/obj/obj.h
@@ -147,6 +147,16 @@ typedef enum RelocKind {
   R_AARCH64_JUMP_SLOT,
   R_AARCH64_RELATIVE,
   R_AARCH64_COPY,
+  /* x86_64 reloc kinds. Most map directly to the existing R_ABS and
+   * R_PC entries; the few here are the x86_64-only encodings (8-bit
+   * displacements, GOT/PLT, dynamic linker-only entries). */
+  R_X64_PC8,
+  R_X64_PLT32,
+  R_X64_GOTPCREL,
+  R_X64_GLOB_DAT,
+  R_X64_JUMP_SLOT,
+  R_X64_RELATIVE,
+  R_X64_COPY,
   R_RV_HI20,
   R_RV_LO12_I,
   R_RV_LO12_S,

	kit kit
	git clone https://git.ryansepassi.com/git/kit.git
	Log \| Files \| Refs \| README

M	src/abi/abi.c	\|	211	++++++++++++++++++-------------------------------------------------------------
A	src/abi/abi_aapcs64.c	\|	147	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/abi/abi_internal.h	\|	54	++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/abi/abi_sysv_x64.c	\|	79	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	src/api/pipeline.c	\|	14	++++++++++++++
A	src/arch/aa64.h	\|	8	++++++++
M	src/arch/aarch64.c	\|	18	+-----------------
A	src/arch/cgtarget.c	\|	33	+++++++++++++++++++++++++++++++++
M	src/arch/mc.c	\|	12	++++++++----
A	src/arch/x64.c	\|	387	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/arch/x64.h	\|	8	++++++++
M	src/link/link.c	\|	2	+-
M	src/link/link_dyn.c	\|	4	++--
M	src/link/link_elf.c	\|	25	++++++++++++++++++++-----
M	src/link/link_internal.h	\|	5	+++--
M	src/link/link_layout.c	\|	2	+-
M	src/link/link_reloc.c	\|	2	+-
M	src/obj/elf.h	\|	33	++++++++++++++++++++++++++++++---
M	src/obj/elf_emit.c	\|	40	+++++++++++++++++++++++++++-------------
A	src/obj/elf_reloc_x86_64.c	\|	84	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	src/obj/obj.h	\|	10	++++++++++