kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 7e395f14c533274c31be05b48f775225c0d64d37
parent 557ef189d7571446bc3e5130a23aadeed4825e0d
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 14 May 2026 12:36:40 -0700

Route object relocation formats through arch descriptors

Diffstat:
Mdoc/arch-registration-plan.md | 2+-
Msrc/arch/aarch64/arch.c | 20++++++++++++++++++++
Msrc/arch/arch.h | 20++++++++++++++++++++
Msrc/arch/registry.c | 34+++++++++++++++++++++++++---------
Msrc/arch/rv64/arch.c | 10++++++++++
Msrc/arch/x64/arch.c | 10++++++++++
Msrc/obj/elf_emit.c | 35+++++++++--------------------------
Msrc/obj/elf_read.c | 25++++++++-----------------
Msrc/obj/macho_emit.c | 24++++++++++++------------
Msrc/obj/macho_read.c | 15++++++++-------
10 files changed, 123 insertions(+), 72 deletions(-)

diff --git a/doc/arch-registration-plan.md b/doc/arch-registration-plan.md @@ -5,7 +5,7 @@ - [x] Introduce one internal arch descriptor and registry lookup. - [x] Route existing arch dispatchers through that descriptor without changing behavior. - [x] Move ABI selection behind the arch descriptor. -- [ ] Move object-format relocation translators behind the arch descriptor. +- [x] Move object-format relocation translators behind the arch descriptor. - [ ] Move linker-only arch constants and stub emitters fully behind the descriptor. - [ ] Move assembler/disassembler/register helpers behind arch-owned implementation files. - [ ] Make `MCEmitter` delegate label fixup encoding to the arch descriptor. diff --git a/src/arch/aarch64/arch.c b/src/arch/aarch64/arch.c @@ -5,6 +5,8 @@ #include "arch/aa64_disasm.h" #include "arch/aa64_regs.h" #include "link/link_arch.h" +#include "obj/elf.h" +#include "obj/macho.h" static const ABIVtable* aa64_abi_vtable(Compiler* c, CfreeOSKind os) { (void)c; @@ -21,6 +23,22 @@ static int aa64_register_at_public(uint32_t idx, CfreeArchReg* out) { return aa64_register_iter_get(idx, &out->dwarf_idx, &out->name); } +static const ArchElfOps aa64_elf_ops = { + .e_machine = EM_AARCH64, + .e_flags = 0, + .reloc_to = elf_aarch64_reloc_to, + .reloc_from = elf_aarch64_reloc_from, +}; + +static const ArchMachoOps aa64_macho_ops = { + .cputype = CPU_TYPE_ARM64, + .cpusubtype = CPU_SUBTYPE_ARM64_ALL, + .reloc_to = macho_aarch64_reloc_to, + .reloc_pcrel = macho_aarch64_reloc_pcrel, + .reloc_length = macho_aarch64_reloc_length, + .reloc_from = macho_aarch64_reloc_from, +}; + const ArchImpl arch_impl_aa64 = { .kind = CFREE_ARCH_ARM_64, .name = "aa64", @@ -28,6 +46,8 @@ const ArchImpl arch_impl_aa64 = { .cgtarget_new = aa64_cgtarget_new, .disasm_new = aa64_disasm_new, .link = &link_arch_aa64, + .elf = &aa64_elf_ops, + .macho = &aa64_macho_ops, .register_name = aa64_register_name, .register_index = aa64_register_index, .register_count = aa64_register_iter_size, diff --git a/src/arch/arch.h b/src/arch/arch.h @@ -703,6 +703,22 @@ struct ArchDisasm { typedef struct LinkArchDesc LinkArchDesc; typedef struct ABIVtable ABIVtable; +typedef struct ArchElfOps { + u32 e_machine; + u32 e_flags; + u32 (*reloc_to)(u32 kind); + u32 (*reloc_from)(u32 wire_type); +} ArchElfOps; + +typedef struct ArchMachoOps { + u32 cputype; + u32 cpusubtype; + u32 (*reloc_to)(u32 kind); + u32 (*reloc_pcrel)(u32 kind); + u32 (*reloc_length)(u32 kind); + u32 (*reloc_from)(u32 wire_type); +} ArchMachoOps; + typedef struct ArchImpl { CfreeArchKind kind; const char* name; @@ -712,6 +728,8 @@ typedef struct ArchImpl { ArchDisasm* (*disasm_new)(Compiler*); const LinkArchDesc* link; + const ArchElfOps* elf; + const ArchMachoOps* macho; const char* (*register_name)(uint32_t dwarf_idx); int (*register_index)(const char* name, uint32_t* idx_out); @@ -721,6 +739,8 @@ typedef struct ArchImpl { const ArchImpl* arch_lookup(CfreeArchKind); const ArchImpl* arch_for_compiler(const Compiler*); +const ArchImpl* arch_lookup_elf_machine(u32 e_machine); +const ArchImpl* arch_lookup_macho_cputype(u32 cputype); ArchDisasm* arch_disasm_new(Compiler*); u32 arch_disasm_decode(ArchDisasm*, const u8* bytes, size_t len, u64 vaddr, diff --git a/src/arch/registry.c b/src/arch/registry.c @@ -4,20 +4,36 @@ extern const ArchImpl arch_impl_aa64; extern const ArchImpl arch_impl_rv64; extern const ArchImpl arch_impl_x64; +static const ArchImpl* const arch_impls[] = { + &arch_impl_aa64, + &arch_impl_x64, + &arch_impl_rv64, +}; + const ArchImpl* arch_lookup(CfreeArchKind arch) { - switch (arch) { - case CFREE_ARCH_ARM_64: - return &arch_impl_aa64; - case CFREE_ARCH_X86_64: - return &arch_impl_x64; - case CFREE_ARCH_RV64: - return &arch_impl_rv64; - default: - return NULL; + for (u32 i = 0; i < (u32)(sizeof arch_impls / sizeof arch_impls[0]); ++i) { + if (arch_impls[i]->kind == arch) return arch_impls[i]; } + return NULL; } const ArchImpl* arch_for_compiler(const Compiler* c) { if (!c) return NULL; return arch_lookup(c->target.arch); } + +const ArchImpl* arch_lookup_elf_machine(u32 e_machine) { + for (u32 i = 0; i < (u32)(sizeof arch_impls / sizeof arch_impls[0]); ++i) { + const ArchImpl* impl = arch_impls[i]; + if (impl->elf && impl->elf->e_machine == e_machine) return impl; + } + return NULL; +} + +const ArchImpl* arch_lookup_macho_cputype(u32 cputype) { + for (u32 i = 0; i < (u32)(sizeof arch_impls / sizeof arch_impls[0]); ++i) { + const ArchImpl* impl = arch_impls[i]; + if (impl->macho && impl->macho->cputype == cputype) return impl; + } + return NULL; +} diff --git a/src/arch/rv64/arch.c b/src/arch/rv64/arch.c @@ -3,6 +3,7 @@ #include "abi/abi_internal.h" #include "arch/rv64.h" #include "link/link_arch.h" +#include "obj/elf.h" static const ABIVtable* rv64_abi_vtable(Compiler* c, CfreeOSKind os) { (void)c; @@ -10,6 +11,13 @@ static const ABIVtable* rv64_abi_vtable(Compiler* c, CfreeOSKind os) { return &rv64_vtable; } +static const ArchElfOps rv64_elf_ops = { + .e_machine = EM_RISCV, + .e_flags = EF_RISCV_RVC | EF_RISCV_FLOAT_ABI_DOUBLE, + .reloc_to = elf_riscv64_reloc_to, + .reloc_from = elf_riscv64_reloc_from, +}; + const ArchImpl arch_impl_rv64 = { .kind = CFREE_ARCH_RV64, .name = "rv64", @@ -17,6 +25,8 @@ const ArchImpl arch_impl_rv64 = { .cgtarget_new = rv64_cgtarget_new, .disasm_new = NULL, .link = &link_arch_rv64, + .elf = &rv64_elf_ops, + .macho = NULL, .register_name = NULL, .register_index = NULL, .register_count = NULL, diff --git a/src/arch/x64/arch.c b/src/arch/x64/arch.c @@ -3,6 +3,7 @@ #include "abi/abi_internal.h" #include "arch/x64.h" #include "link/link_arch.h" +#include "obj/elf.h" static const ABIVtable* x64_abi_vtable(Compiler* c, CfreeOSKind os) { (void)c; @@ -10,6 +11,13 @@ static const ABIVtable* x64_abi_vtable(Compiler* c, CfreeOSKind os) { return &sysv_x64_vtable; } +static const ArchElfOps x64_elf_ops = { + .e_machine = EM_X86_64, + .e_flags = 0, + .reloc_to = elf_x86_64_reloc_to, + .reloc_from = elf_x86_64_reloc_from, +}; + const ArchImpl arch_impl_x64 = { .kind = CFREE_ARCH_X86_64, .name = "x64", @@ -17,6 +25,8 @@ const ArchImpl arch_impl_x64 = { .cgtarget_new = x64_cgtarget_new, .disasm_new = NULL, .link = &link_arch_x64, + .elf = &x64_elf_ops, + .macho = NULL, .register_name = NULL, .register_index = NULL, .register_count = NULL, diff --git a/src/obj/elf_emit.c b/src/obj/elf_emit.c @@ -27,6 +27,7 @@ #include <string.h> +#include "arch/arch.h" #include "core/arena.h" #include "core/buf.h" #include "core/heap.h" @@ -245,26 +246,16 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) { Heap* h = (Heap*)c->env->heap; /* ---- target validation ------------------------------------------ */ + const ArchImpl* arch = arch_for_compiler(c); + const ArchElfOps* elf = arch ? arch->elf : NULL; u32 e_machine; u32 (*reloc_to)(u32); - switch (c->target.arch) { - case CFREE_ARCH_ARM_64: - e_machine = EM_AARCH64; - reloc_to = elf_aarch64_reloc_to; - break; - case CFREE_ARCH_X86_64: - e_machine = EM_X86_64; - reloc_to = elf_x86_64_reloc_to; - break; - case CFREE_ARCH_RV64: - e_machine = EM_RISCV; - reloc_to = elf_riscv64_reloc_to; - break; - default: - compiler_panic(c, no_loc(), - "emit_elf: unsupported target arch %u", - (u32)c->target.arch); + if (!elf || !elf->reloc_to) { + compiler_panic(c, no_loc(), "emit_elf: unsupported target arch %u", + (u32)c->target.arch); } + e_machine = elf->e_machine; + reloc_to = elf->reloc_to; if (c->target.big_endian) { compiler_panic(c, no_loc(), "emit_elf: big-endian ELF not supported"); } @@ -697,15 +688,7 @@ void emit_elf(Compiler* c, ObjBuilder* ob, Writer* w) { * else synthesize a sensible per-arch default. RV64 cfree targets the * Linux psABI's lp64d soft-relax convention (RVC + double-float ABI). */ u32 e_flags; - if (!obj_get_elf_e_flags(ob, &e_flags)) { - switch (e_machine) { - case EM_RISCV: - e_flags = EF_RISCV_RVC | EF_RISCV_FLOAT_ABI_DOUBLE; - break; - default: - e_flags = 0; - } - } + if (!obj_get_elf_e_flags(ob, &e_flags)) e_flags = elf->e_flags; cfree_writer_seek(w, 0); cfree_writer_write(w, ident, EI_NIDENT); diff --git a/src/obj/elf_read.c b/src/obj/elf_read.c @@ -10,6 +10,7 @@ #include <string.h> +#include "arch/arch.h" #include "core/heap.h" #include "core/pool.h" #include "obj/elf.h" @@ -222,21 +223,13 @@ ObjBuilder* read_elf(Compiler* c, const char* name, const u8* data, (u32)e_type); u16 e_machine = elf_rd_u16(data + 18); + const ArchImpl* arch = arch_lookup_elf_machine(e_machine); u32 (*reloc_from)(u32); - switch (e_machine) { - case EM_AARCH64: - reloc_from = elf_aarch64_reloc_from; - break; - case EM_X86_64: - reloc_from = elf_x86_64_reloc_from; - break; - case EM_RISCV: - reloc_from = elf_riscv64_reloc_from; - break; - default: - compiler_panic(c, no_loc(), - "read_elf: unsupported e_machine 0x%x", (u32)e_machine); + if (!arch || !arch->elf || !arch->elf->reloc_from) { + compiler_panic(c, no_loc(), "read_elf: unsupported e_machine 0x%x", + (u32)e_machine); } + reloc_from = arch->elf->reloc_from; u64 e_shoff = elf_rd_u64(data + 40); u32 e_flags = elf_rd_u32(data + 48); @@ -550,10 +543,8 @@ ObjBuilder* read_elf_dso(Compiler* c, const char* name, const u8* data, (u32)e_type); u16 e_machine = elf_rd_u16(data + 18); - if (e_machine != EM_AARCH64 && e_machine != EM_X86_64 && - e_machine != EM_RISCV) - compiler_panic(c, no_loc(), - "read_elf_dso: unsupported e_machine 0x%x", + if (!arch_lookup_elf_machine(e_machine)) + compiler_panic(c, no_loc(), "read_elf_dso: unsupported e_machine 0x%x", (u32)e_machine); u64 e_phoff = elf_rd_u64(data + 32); diff --git a/src/obj/macho_emit.c b/src/obj/macho_emit.c @@ -25,6 +25,7 @@ #include <string.h> +#include "arch/arch.h" #include "core/arena.h" #include "core/buf.h" #include "core/bytes.h" @@ -214,23 +215,22 @@ void emit_macho(Compiler* c, ObjBuilder* ob, Writer* w) { Heap* h = (Heap*)c->env->heap; /* ---- target validation ---------------------------------------- */ + const ArchImpl* arch = arch_for_compiler(c); + const ArchMachoOps* macho = arch ? arch->macho : NULL; u32 cputype, cpusubtype; u32 (*reloc_to)(u32); u32 (*reloc_pcrel)(u32); u32 (*reloc_length)(u32); - switch (c->target.arch) { - case CFREE_ARCH_ARM_64: - cputype = CPU_TYPE_ARM64; - cpusubtype = CPU_SUBTYPE_ARM64_ALL; - reloc_to = macho_aarch64_reloc_to; - reloc_pcrel = macho_aarch64_reloc_pcrel; - reloc_length = macho_aarch64_reloc_length; - break; - default: - compiler_panic(c, no_loc(), - "emit_macho: unsupported target arch %u (only arm64 today)", - (u32)c->target.arch); + if (!macho || !macho->reloc_to || !macho->reloc_pcrel || + !macho->reloc_length) { + compiler_panic(c, no_loc(), "emit_macho: unsupported target arch %u", + (u32)c->target.arch); } + cputype = macho->cputype; + cpusubtype = macho->cpusubtype; + reloc_to = macho->reloc_to; + reloc_pcrel = macho->reloc_pcrel; + reloc_length = macho->reloc_length; if (c->target.big_endian) { compiler_panic(c, no_loc(), "emit_macho: big-endian not supported"); } diff --git a/src/obj/macho_read.c b/src/obj/macho_read.c @@ -11,6 +11,7 @@ #include <string.h> +#include "arch/arch.h" #include "core/arena.h" #include "core/bytes.h" #include "core/heap.h" @@ -104,13 +105,14 @@ ObjBuilder* read_macho(Compiler* c, const char* name, const u8* data, compiler_panic(c, no_loc(), "read_macho: bad magic 0x%x", magic); u32 cputype = rd_u32_le(data + 4); + const ArchImpl* arch = arch_lookup_macho_cputype(cputype); + const ArchMachoOps* macho = arch ? arch->macho : NULL; u32 filetype = rd_u32_le(data + 12); u32 ncmds = rd_u32_le(data + 16); u32 sizeofcmds = rd_u32_le(data + 20); - if (cputype != CPU_TYPE_ARM64) - compiler_panic(c, no_loc(), - "read_macho: unsupported cputype 0x%x (only arm64 today)", + if (!macho || !macho->reloc_from) + compiler_panic(c, no_loc(), "read_macho: unsupported cputype 0x%x", cputype); if (filetype != MH_OBJECT) compiler_panic(c, no_loc(), @@ -339,7 +341,7 @@ ObjBuilder* read_macho(Compiler* c, const char* name, const u8* data, continue; } - u32 kind = macho_aarch64_reloc_from(r_type); + u32 kind = macho->reloc_from(r_type); if (kind == (u32)-1) compiler_panic(c, no_loc(), "read_macho: unsupported reloc type %u", r_type); @@ -446,9 +448,8 @@ ObjBuilder* read_macho_dso(Compiler* c, const char* name, const u8* data, u32 ncmds = rd_u32_le(data + 16); u32 sizeofcmds = rd_u32_le(data + 20); - if (cputype != CPU_TYPE_ARM64) - compiler_panic(c, no_loc(), - "read_macho_dso: unsupported cputype 0x%x (arm64 only)", + if (!arch_lookup_macho_cputype(cputype)) + compiler_panic(c, no_loc(), "read_macho_dso: unsupported cputype 0x%x", cputype); if (filetype != MH_DYLIB && filetype != MH_BUNDLE) compiler_panic(c, no_loc(),