commit 9f744cea2b46ba8b526001fd7c4a425e531732ea
parent 31020a257f10fb15cc5c70df86090ffc83a236d3
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 14 May 2026 13:03:28 -0700
Route arch helpers through descriptors
Diffstat:
9 files changed, 172 insertions(+), 106 deletions(-)
diff --git a/doc/arch-registration-plan.md b/doc/arch-registration-plan.md
@@ -7,8 +7,8 @@
- [x] Move ABI selection behind the arch descriptor.
- [x] Move object-format relocation translators behind the arch descriptor.
- [x] Move linker-only arch constants and stub emitters fully behind the descriptor.
-- [ ] Move assembler/disassembler/register helpers behind arch-owned implementation files.
-- [ ] Make `MCEmitter` delegate label fixup encoding to the arch descriptor.
+- [x] Move assembler/disassembler/register helpers behind arch-owned implementation files.
+- [x] Make `MCEmitter` delegate label fixup encoding to the arch descriptor.
- [ ] Consolidate files into `src/arch/{aa64,rv64,x64}/` with one exposed implementation object per arch.
- [ ] Teach the build to honor `CFREE_ARCHS` and compile only selected arch subtrees.
- [ ] Add targeted subset-build tests for `aa64`, `x64`, `rv64`, and mixed subsets.
diff --git a/src/arch/aa64_asm.c b/src/arch/aa64_asm.c
@@ -30,6 +30,7 @@
/* ---- public handle ---- */
struct AA64Asm {
+ ArchAsm base;
Compiler* c;
/* Inline-asm bound state (set by aa64_inline_bind, cleared otherwise).
@@ -48,15 +49,32 @@ struct AA64Asm {
u32 nclob;
};
+static void aa64_arch_asm_insn(ArchAsm* base, AsmDriver* d, Sym mnemonic);
+static void aa64_arch_asm_destroy(ArchAsm* base);
+
AA64Asm* aa64_asm_open(Compiler* c) {
AA64Asm* a = arena_new(c->tu, AA64Asm);
memset(a, 0, sizeof *a);
+ a->base.insn = aa64_arch_asm_insn;
+ a->base.destroy = aa64_arch_asm_destroy;
a->c = c;
return a;
}
void aa64_asm_close(AA64Asm* a) { (void)a; }
+ArchAsm* aa64_arch_asm_new(Compiler* c) {
+ return &aa64_asm_open(c)->base;
+}
+
+static void aa64_arch_asm_insn(ArchAsm* base, AsmDriver* d, Sym mnemonic) {
+ aa64_asm_insn((AA64Asm*)base, d, mnemonic);
+}
+
+static void aa64_arch_asm_destroy(ArchAsm* base) {
+ aa64_asm_close((AA64Asm*)base);
+}
+
void aa64_inline_bind(AA64Asm* a,
const AsmConstraint* outs, u32 nout, Operand* out_ops,
const AsmConstraint* ins, u32 nin, const Operand* in_ops,
diff --git a/src/arch/aa64_asm.h b/src/arch/aa64_asm.h
@@ -16,6 +16,7 @@
#include "asm/asm_lex.h"
typedef struct AsmDriver AsmDriver;
+typedef struct ArchAsm ArchAsm;
typedef struct AA64Asm AA64Asm;
@@ -23,6 +24,7 @@ typedef struct AA64Asm AA64Asm;
* itself (which lives on the compiler's TU arena). */
AA64Asm* aa64_asm_open(Compiler* c);
void aa64_asm_close(AA64Asm*);
+ArchAsm* aa64_arch_asm_new(Compiler* c);
/* Parse one mnemonic line. `mnemonic` is the first identifier on the
* line (or "b.cond" composite). The driver has already consumed the
diff --git a/src/arch/aarch64/arch.c b/src/arch/aarch64/arch.c
@@ -2,11 +2,14 @@
#include "abi/abi_internal.h"
#include "arch/aa64.h"
+#include "arch/aa64_asm.h"
#include "arch/aa64_disasm.h"
#include "arch/aa64_regs.h"
+#include "core/bytes.h"
#include "link/link_arch.h"
#include "obj/elf.h"
#include "obj/macho.h"
+#include "obj/obj.h"
static const ABIVtable* aa64_abi_vtable(Compiler* c, CfreeOSKind os) {
(void)c;
@@ -39,12 +42,49 @@ static const ArchMachoOps aa64_macho_ops = {
.reloc_from = macho_aarch64_reloc_from,
};
+static int aa64_apply_label_fixup(Compiler* c, const ArchLabelFixup* fx) {
+ const Section* s;
+ u8 cur[4];
+ u32 word;
+
+ (void)c;
+ if (!fx || fx->width != 4) return 1;
+ s = obj_section_get(fx->obj, fx->sec_id);
+ if (!s) return 0;
+ buf_read(&s->bytes, fx->offset, cur, 4);
+ word = rd_u32_le(cur);
+
+ switch (fx->kind) {
+ case R_AARCH64_JUMP26:
+ case R_AARCH64_CALL26: {
+ i64 idisp = fx->disp >> 2;
+ u32 imm26 = (u32)(idisp & 0x03ffffffu);
+ word = (word & ~0x03ffffffu) | imm26;
+ break;
+ }
+ case R_AARCH64_CONDBR19: {
+ i64 idisp = fx->disp >> 2;
+ u32 imm19 = (u32)(idisp & 0x7ffffu);
+ word = (word & ~(0x7ffffu << 5)) | (imm19 << 5);
+ break;
+ }
+ default:
+ return 1;
+ }
+
+ wr_u32_le(cur, word);
+ obj_patch(fx->obj, fx->sec_id, fx->offset, cur, 4);
+ return 0;
+}
+
const ArchImpl arch_impl_aa64 = {
.kind = CFREE_ARCH_ARM_64,
.name = "aa64",
.abi_vtable = aa64_abi_vtable,
.cgtarget_new = aa64_cgtarget_new,
+ .asm_new = aa64_arch_asm_new,
.disasm_new = aa64_disasm_new,
+ .apply_label_fixup = aa64_apply_label_fixup,
.link = &link_arch_aa64,
.elf = &aa64_elf_ops,
.macho = &aa64_macho_ops,
diff --git a/src/arch/arch.h b/src/arch/arch.h
@@ -396,6 +396,23 @@ typedef struct AsmConstraint {
typedef u32 MCLabel;
#define MC_LABEL_NONE 0u
+typedef struct AsmDriver AsmDriver;
+
+typedef struct ArchAsm ArchAsm;
+struct ArchAsm {
+ void (*insn)(ArchAsm*, AsmDriver*, Sym mnemonic);
+ void (*destroy)(ArchAsm*);
+};
+
+typedef struct ArchLabelFixup {
+ ObjBuilder* obj;
+ u32 sec_id;
+ u32 offset;
+ u32 width;
+ RelocKind kind;
+ i64 disp;
+} ArchLabelFixup;
+
typedef struct MCEmitter MCEmitter;
struct MCEmitter {
/* Machine/object emission context. Subclasses extend. */
@@ -725,7 +742,9 @@ typedef struct ArchImpl {
const ABIVtable* (*abi_vtable)(Compiler*, CfreeOSKind os);
CGTarget* (*cgtarget_new)(Compiler*, ObjBuilder*, MCEmitter*);
+ ArchAsm* (*asm_new)(Compiler*);
ArchDisasm* (*disasm_new)(Compiler*);
+ int (*apply_label_fixup)(Compiler*, const ArchLabelFixup*);
const LinkArchDesc* link;
const ArchElfOps* elf;
diff --git a/src/arch/mc.c b/src/arch/mc.c
@@ -5,14 +5,8 @@
* relocations / source-location stamps. Encoding is the caller's job —
* MCEmitter writes whatever bytes it's handed.
*
- * One MCEmitter serves every supported arch — arch-specific differences
- * live in CGTarget. The fixup encoder (apply_fixup) is the union of all
- * known arches' label-ref reloc encodings rather than a generic library:
- * each new arch adds cases to that switch. The cost of an arch enum
- * here is one switch case; promoting fixup application to a per-arch
- * vtable would be premature. Per-arch MCEmitter subclasses can layer on
- * later if encoding cache or peephole-merging need shared state with
- * the emitter.
+ * One MCEmitter serves every supported arch. Label fixup encoding delegates
+ * through ArchImpl so MCEmitter owns label bookkeeping only.
*
* MCLabel handling: ids are 1-based (0 = MC_LABEL_NONE). Each label
* carries either a placement (sec_id, offset) or a list of pending
@@ -35,7 +29,6 @@
#include "arch/arch.h"
#include "core/arena.h"
-#include "core/bytes.h"
#include "obj/obj.h"
typedef struct MCFixup {
@@ -83,92 +76,23 @@ static void labels_grow(MCImpl* mc, u32 want) {
static void apply_fixup(MCImpl* mc, const MCFixup* fx, u32 target_offset) {
/* signed displacement from end-of-instruction position to target. */
- i64 disp = (i64)target_offset - (i64)fx->offset + fx->addend;
-
- switch (fx->kind) {
- case R_PC32: {
- u8 bytes[4];
- wr_u32_le(bytes, (u32)(i32)disp);
- obj_patch(mc->base.obj, fx->sec_id, fx->offset, bytes, 4);
- break;
- }
- case R_AARCH64_JUMP26:
- case R_AARCH64_CALL26: {
- /* imm26 in the lower 26 bits of a BL/B; offset in instructions. */
- i64 idisp = disp >> 2; /* word-aligned displacement */
- u32 imm26 = (u32)(idisp & 0x03ffffffu);
- u8 cur[4];
- /* read existing 4 bytes via section accessor. */
- u8* p = mc->base.obj ? NULL : NULL;
- (void)p;
- /* obj has obj_patch but not "read"; fetch via flatten — for v1
- * we know callers emit a fresh insn with imm26=0 right before
- * recording the fixup, so we can reconstruct from the opcode
- * carried in addend's high bits. Simpler: callers emit the
- * full encoding with imm26=0 and the fixup just OR's imm26 in
- * by patching the low 26 bits. We emulate that by reading
- * indirectly through obj_section_get. */
- const Section* s = obj_section_get(mc->base.obj, fx->sec_id);
- if (!s) break;
- buf_read(&s->bytes, fx->offset, cur, 4);
- u32 word = rd_u32_le(cur);
- word = (word & ~0x03ffffffu) | imm26;
- wr_u32_le(cur, word);
- obj_patch(mc->base.obj, fx->sec_id, fx->offset, cur, 4);
- break;
- }
- case R_AARCH64_CONDBR19: {
- /* imm19 at bits 5..23 of B.cond; word-aligned displacement. */
- i64 idisp = disp >> 2;
- u32 imm19 = (u32)(idisp & 0x7ffffu);
- const Section* s = obj_section_get(mc->base.obj, fx->sec_id);
- if (!s) break;
- u8 cur[4];
- buf_read(&s->bytes, fx->offset, cur, 4);
- u32 word = rd_u32_le(cur);
- word = (word & ~(0x7ffffu << 5)) | (imm19 << 5);
- wr_u32_le(cur, word);
- obj_patch(mc->base.obj, fx->sec_id, fx->offset, cur, 4);
- break;
- }
- case R_RV_BRANCH: {
- /* B-type 12-bit signed displacement in 2-byte units. */
- const Section* s = obj_section_get(mc->base.obj, fx->sec_id);
- if (!s) break;
- u8 cur[4];
- buf_read(&s->bytes, fx->offset, cur, 4);
- u32 word = rd_u32_le(cur);
- u32 b = (u32)disp;
- word &= 0x01fff07fu;
- word |= ((b >> 12) & 1u) << 31;
- word |= ((b >> 5) & 0x3fu) << 25;
- word |= ((b >> 1) & 0xfu) << 8;
- word |= ((b >> 11) & 1u) << 7;
- wr_u32_le(cur, word);
- obj_patch(mc->base.obj, fx->sec_id, fx->offset, cur, 4);
- break;
- }
- case R_RV_JAL: {
- /* J-type 20-bit signed displacement in 2-byte units. */
- const Section* s = obj_section_get(mc->base.obj, fx->sec_id);
- if (!s) break;
- u8 cur[4];
- buf_read(&s->bytes, fx->offset, cur, 4);
- u32 word = rd_u32_le(cur);
- u32 b = (u32)disp;
- word &= 0x00000fffu;
- word |= ((b >> 20) & 1u) << 31;
- word |= ((b >> 1) & 0x3ffu) << 21;
- word |= ((b >> 11) & 1u) << 20;
- word |= ((b >> 12) & 0xffu) << 12;
- wr_u32_le(cur, word);
- obj_patch(mc->base.obj, fx->sec_id, fx->offset, cur, 4);
- break;
- }
- default:
- compiler_panic(mc->base.c, mc->base.loc,
- "MCEmitter: unsupported label-ref reloc kind %d",
- (int)fx->kind);
+ ArchLabelFixup desc;
+ const ArchImpl* arch;
+
+ memset(&desc, 0, sizeof desc);
+ desc.obj = mc->base.obj;
+ desc.sec_id = fx->sec_id;
+ desc.offset = fx->offset;
+ desc.width = fx->width;
+ desc.kind = fx->kind;
+ desc.disp = (i64)target_offset - (i64)fx->offset + fx->addend;
+
+ arch = arch_for_compiler(mc->base.c);
+ if (!arch || !arch->apply_label_fixup ||
+ arch->apply_label_fixup(mc->base.c, &desc) != 0) {
+ compiler_panic(mc->base.c, mc->base.loc,
+ "MCEmitter: unsupported label-ref reloc kind %d",
+ (int)fx->kind);
}
}
diff --git a/src/arch/rv64/arch.c b/src/arch/rv64/arch.c
@@ -2,8 +2,10 @@
#include "abi/abi_internal.h"
#include "arch/rv64.h"
+#include "core/bytes.h"
#include "link/link_arch.h"
#include "obj/elf.h"
+#include "obj/obj.h"
static const ABIVtable* rv64_abi_vtable(Compiler* c, CfreeOSKind os) {
(void)c;
@@ -18,12 +20,52 @@ static const ArchElfOps rv64_elf_ops = {
.reloc_from = elf_riscv64_reloc_from,
};
+static int rv64_apply_label_fixup(Compiler* c, const ArchLabelFixup* fx) {
+ const Section* s;
+ u8 cur[4];
+ u32 word;
+ u32 b;
+
+ (void)c;
+ if (!fx || fx->width != 4) return 1;
+ s = obj_section_get(fx->obj, fx->sec_id);
+ if (!s) return 0;
+ buf_read(&s->bytes, fx->offset, cur, 4);
+ word = rd_u32_le(cur);
+ b = (u32)fx->disp;
+
+ switch (fx->kind) {
+ case R_RV_BRANCH:
+ word &= 0x01fff07fu;
+ word |= ((b >> 12) & 1u) << 31;
+ word |= ((b >> 5) & 0x3fu) << 25;
+ word |= ((b >> 1) & 0xfu) << 8;
+ word |= ((b >> 11) & 1u) << 7;
+ break;
+ case R_RV_JAL:
+ word &= 0x00000fffu;
+ word |= ((b >> 20) & 1u) << 31;
+ word |= ((b >> 1) & 0x3ffu) << 21;
+ word |= ((b >> 11) & 1u) << 20;
+ word |= ((b >> 12) & 0xffu) << 12;
+ break;
+ default:
+ return 1;
+ }
+
+ wr_u32_le(cur, word);
+ obj_patch(fx->obj, fx->sec_id, fx->offset, cur, 4);
+ return 0;
+}
+
const ArchImpl arch_impl_rv64 = {
.kind = CFREE_ARCH_RV64,
.name = "rv64",
.abi_vtable = rv64_abi_vtable,
.cgtarget_new = rv64_cgtarget_new,
+ .asm_new = NULL,
.disasm_new = NULL,
+ .apply_label_fixup = rv64_apply_label_fixup,
.link = &link_arch_rv64,
.elf = &rv64_elf_ops,
.macho = NULL,
diff --git a/src/arch/x64/arch.c b/src/arch/x64/arch.c
@@ -2,8 +2,10 @@
#include "abi/abi_internal.h"
#include "arch/x64.h"
+#include "core/bytes.h"
#include "link/link_arch.h"
#include "obj/elf.h"
+#include "obj/obj.h"
static const ABIVtable* x64_abi_vtable(Compiler* c, CfreeOSKind os) {
(void)c;
@@ -18,12 +20,25 @@ static const ArchElfOps x64_elf_ops = {
.reloc_from = elf_x86_64_reloc_from,
};
+static int x64_apply_label_fixup(Compiler* c, const ArchLabelFixup* fx) {
+ (void)c;
+ if (!fx || fx->kind != R_PC32 || fx->width != 4) return 1;
+ {
+ u8 bytes[4];
+ wr_u32_le(bytes, (u32)(i32)fx->disp);
+ obj_patch(fx->obj, fx->sec_id, fx->offset, bytes, 4);
+ }
+ return 0;
+}
+
const ArchImpl arch_impl_x64 = {
.kind = CFREE_ARCH_X86_64,
.name = "x64",
.abi_vtable = x64_abi_vtable,
.cgtarget_new = x64_cgtarget_new,
+ .asm_new = NULL,
.disasm_new = NULL,
+ .apply_label_fixup = x64_apply_label_fixup,
.link = &link_arch_x64,
.elf = &x64_elf_ops,
.macho = NULL,
diff --git a/src/asm/asm.c b/src/asm/asm.c
@@ -23,7 +23,6 @@
#include <stdarg.h>
#include <string.h>
-#include "arch/aa64_asm.h"
#include "arch/arch.h"
#include "asm/asm_helpers.h"
#include "asm/asm_lex.h"
@@ -64,8 +63,7 @@ struct AsmDriver {
Sym n_text, n_data, n_rodata, n_bss;
- /* Per-arch handle. Phase-3 ships aa64 only; phase-5 adds dispatch. */
- AA64Asm* aa64;
+ ArchAsm* arch_asm;
};
/* ---- token plumbing ---- */
@@ -936,8 +934,8 @@ static Sym maybe_compose_mnemonic(AsmDriver* d, Sym head) {
* The driver does not own the AsmLexer or MCEmitter, does not allocate a
* default section (inline asm emits into whatever section the wrapping
* cg has selected on its MCEmitter), and skips the standalone driver's
- * per-arch handle (`d->aa64`) — the caller has already opened its own
- * AA64Asm to thread per-block bound state through. */
+ * per-arch handle (`d->arch_asm`) — the caller has already opened its own
+ * arch asm handle to thread per-block bound state through. */
AsmDriver* asm_driver_open_inline(Compiler* c, MCEmitter* mc, AsmLexer* lex) {
Heap* heap = (Heap*)c->env->heap;
AsmDriver* d = (AsmDriver*)heap->alloc(heap, sizeof *d, _Alignof(AsmDriver));
@@ -958,7 +956,7 @@ AsmDriver* asm_driver_open_inline(Compiler* c, MCEmitter* mc, AsmLexer* lex) {
SymSecMap_init(&d->sec_map, heap);
SymSymMap_init(&d->sym_map, heap);
SymEquMap_init(&d->equ_map, heap);
- d->aa64 = NULL; /* caller owns its own AA64Asm */
+ d->arch_asm = NULL; /* caller owns its own arch asm handle */
return d;
}
@@ -984,7 +982,15 @@ void asm_parse(Compiler* c, AsmLexer* l, MCEmitter* mc) {
SymSecMap_init(&d.sec_map, d.heap);
SymSymMap_init(&d.sym_map, d.heap);
SymEquMap_init(&d.equ_map, d.heap);
- d.aa64 = aa64_asm_open(c);
+ {
+ const ArchImpl* arch = arch_for_compiler(c);
+ if (!arch || !arch->asm_new) {
+ SrcLoc loc = asm_lex_loc(l);
+ compiler_panic(c, loc, "asm_parse: unsupported target arch %d",
+ (int)c->target.arch);
+ }
+ d.arch_asm = arch->asm_new(c);
+ }
for (;;) {
AsmTok t = d_peek(&d);
@@ -1017,7 +1023,7 @@ void asm_parse(Compiler* c, AsmLexer* l, MCEmitter* mc) {
continue;
}
Sym mnemonic = maybe_compose_mnemonic(&d, head);
- aa64_asm_insn(d.aa64, &d, mnemonic);
+ d.arch_asm->insn(d.arch_asm, &d, mnemonic);
d_skip_to_eol(&d);
continue;
}
@@ -1025,7 +1031,7 @@ void asm_parse(Compiler* c, AsmLexer* l, MCEmitter* mc) {
d_skip_to_eol(&d);
}
- aa64_asm_close(d.aa64);
+ if (d.arch_asm && d.arch_asm->destroy) d.arch_asm->destroy(d.arch_asm);
SymSecMap_fini(&d.sec_map);
SymSymMap_fini(&d.sym_map);
SymEquMap_fini(&d.equ_map);