commit db8ec5a267e8f4ef1f768879adc779a5f2c48c6f
parent eb26144df528f7a5dd14f56ceb608b1e14c3f066
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 26 May 2026 18:10:18 -0700
arch: split MCEmitter interface out of arch.h into mc.h
Lift the machine-code / object emission surface (MCEmitter, Reg, MCLabel,
ArchLabelFixup, mc_new / mc_begin_function / mc_emit_eh_frame,
cg_mc_debug_new, arch_lower_indexed) out of arch.h into a new leaf header
arch/mc.h. arch.h now includes mc.h (ArchImpl.cgtarget_new takes an
MCEmitter*), so the aggregating backends are unaffected.
Repoint emission-only consumers from arch.h to mc.h so they no longer
transitively depend on the decode/disasm/emu/dbg surface: the per-arch
backend headers/sources (x64, rv64, wasm, aa64, native_target), the
assembler entry point, the Debug producer, and the optimizer umbrella
(opt.h) -- no opt source uses an arch-only symbol. check_target.c only
needs CGBackend, so it moves to cg/cgtarget.h.
Add the direct includes the narrowing exposed: abi/abi.h in opt/ir.h
(ABIArgInfo/ABIFuncInfo) and arch/arch.h in debug_emit.c (ArchDwarfOps,
ArchImpl, arch_for_compiler), both previously leaning on a transitive
arch.h include.
Diffstat:
19 files changed, 178 insertions(+), 156 deletions(-)
diff --git a/src/arch/aa64/aa64.h b/src/arch/aa64/aa64.h
@@ -1,7 +1,7 @@
#ifndef CFREE_ARCH_AA64_H
#define CFREE_ARCH_AA64_H
-#include "arch/arch.h"
+#include "arch/mc.h"
#include "arch/native_target.h"
typedef struct NativeOps NativeOps;
diff --git a/src/arch/arch.h b/src/arch/arch.h
@@ -6,25 +6,11 @@
#include <cfree/disasm.h>
#include "abi/abi.h"
+#include "arch/mc.h"
#include "cg/cgtarget.h"
#include "core/core.h"
#include "obj/obj.h"
-/* Forward-declared so CgTarget can carry an optional Debug* without
- * pulling debug/debug.h into every translation unit that includes arch.h.
- * Per doc/DWARF.md §3.2 the backend gets exactly one new dependency on
- * Debug: this forward decl plus debug_emit_row (declared by the few
- * backend TUs that actually emit line rows). */
-typedef struct Debug Debug;
-
-/* Native-only register id. The semantic CgTarget surface uses CGLocal; this
- * remains here for MC/native helpers and disabled native backends. */
-typedef u32 Reg;
-#define REG_NONE 0xffffffffu
-
-typedef u32 MCLabel;
-#define MC_LABEL_NONE 0u
-
typedef struct AsmDriver AsmDriver;
typedef struct ArchAsm ArchAsm;
@@ -33,132 +19,6 @@ struct ArchAsm {
void (*destroy)(ArchAsm*);
};
-typedef struct ArchLabelFixup {
- ObjBuilder* obj;
- u32 sec_id;
- u32 offset;
- u32 width;
- RelocKind kind;
- i64 disp;
- ObjSymId cur_func_sym;
- u32 cur_func_start;
-} ArchLabelFixup;
-
-typedef struct MCEmitter MCEmitter;
-struct MCEmitter {
- /* Machine/object emission context. Subclasses extend. */
- Compiler* c;
- ObjBuilder* obj;
- u32 section_id;
-
- /* Pending source location, updated by set_loc. Promoted to the base so
- * arch backends' emit-bytes choke point can read it without reaching
- * into the per-arch impl (used to feed debug_emit_row). */
- SrcLoc loc;
-
- /* Optional Debug producer. NULL means -g is off and the per-instruction
- * line-row fanout is skipped. Set after construction by cg_new (or by
- * the cg_test harness, which is the parser stand-in). Per doc/DWARF.md
- * §3.2 this is the backend's only new dependency on Debug. */
- Debug* debug;
-
- /* Currently active function. Backends manage these via the
- * mc_begin_function / mc_end_function helpers from their func_begin /
- * func_end once they've computed the post-alignment function start
- * position. emit_label_data_reloc reads them to compute reloc
- * addends that resolve to the runtime address of an intra-function
- * label. */
- ObjSymId cur_func_sym;
- u32 cur_func_section;
- u32 cur_func_start;
-
- void (*set_section)(MCEmitter*, u32 section_id);
- u32 (*pos)(MCEmitter*);
-
- MCLabel (*label_new)(MCEmitter*);
- void (*label_place)(MCEmitter*, MCLabel);
-
- void (*emit_bytes)(MCEmitter*, const u8*, size_t);
- void (*emit_fill)(MCEmitter*, size_t n, u8 byte);
- void (*emit_align)(MCEmitter*, u32 align, u8 fill);
- void (*emit_reloc)(MCEmitter*, RelocKind, ObjSymId, i64 addend);
- void (*emit_reloc_at)(MCEmitter*, u32 section_id, u32 offset, RelocKind,
- ObjSymId, i64 addend, int explicit_addend, int pair);
- void (*emit_label_ref)(MCEmitter*, MCLabel, RelocKind, u32 width, i64 addend);
-
- /* Emit a relocation at (data_sec, data_offset) that resolves at link
- * time to the runtime address of `label` (an intra-function code label).
- *
- * The relocation is generated against the currently active function
- * symbol (cur_func_sym) with addend = (label_offset_in_section -
- * cur_func_start) + extra_addend. If `label` is already placed, the
- * reloc is emitted immediately; otherwise it is queued and emitted at
- * label_place time. Callers must have an active function (set by
- * backend func_begin); panics otherwise. */
- void (*emit_label_data_reloc)(MCEmitter*, u32 data_sec, u32 data_offset,
- MCLabel label, RelocKind kind, u32 width,
- i64 extra_addend);
- void (*set_loc)(MCEmitter*, SrcLoc);
-
- /* ---- CFI / unwind ----
- * Buffered per-function and emitted into .debug_frame / .eh_frame by Debug
- * at TU finalize. CFI directives are byte-position-bound — they describe
- * the register-save state starting at the current pos() in the current
- * section — so they live on MCEmitter (the only common point that already
- * tracks (section_id, offset)). If the CG was constructed with Debug=NULL,
- * records are discarded. Register numbering is the per-arch DWARF reg
- * number; offsets are byte deltas from the CFA. */
- void (*cfi_startproc)(MCEmitter*);
- void (*cfi_endproc)(MCEmitter*);
- void (*cfi_def_cfa)(MCEmitter*, u32 reg, i32 ofs);
- void (*cfi_def_cfa_offset)(MCEmitter*, i32 ofs);
- void (*cfi_def_cfa_register)(MCEmitter*, u32 reg);
- void (*cfi_offset)(MCEmitter*, u32 reg, i32 ofs);
- void (*cfi_rel_offset)(MCEmitter*, u32 reg, i32 ofs);
- void (*cfi_restore)(MCEmitter*, u32 reg);
- /* Override the PC offset used by the *next* cfi_* directive (one-shot).
- * Backends that patch the prologue in func_end (so the live pc has
- * moved past the prologue) call this with the post-prologue offset
- * (relative to cfi_startproc's recorded func_start) before emitting
- * the frame-state directives. */
- void (*cfi_set_next_pc_offset)(MCEmitter*, u32 pc_offset);
-
- void (*destroy)(MCEmitter*);
-};
-
-/* Construct the right target/emitter pair for c->target. */
-MCEmitter* mc_new(Compiler*, ObjBuilder*);
-void mc_free(MCEmitter*);
-
-/* Per-function context helpers. Backends call mc_begin_function from
- * their CgTarget func_begin (after computing the post-alignment function
- * start) and mc_end_function from func_end. The pair sets / clears
- * MCEmitter.cur_func_* — the metadata that emit_label_data_reloc reads
- * to resolve deferred intra-function label fixups in data sections. */
-void mc_begin_function(MCEmitter*, ObjSymId sym, u32 section_id,
- u32 start_offset);
-void mc_end_function(MCEmitter*);
-
-/* Flush buffered CFI state into a .eh_frame section in the ObjBuilder.
- * No-op when no functions called cfi_startproc. Idempotent. */
-void mc_emit_eh_frame(MCEmitter*);
-
-/* Construct the MCEmitter + (optionally) Debug pair that a machine-code
- * CGBackend's `make` typically needs. On success, sets *out_mc to a fresh
- * MCEmitter; sets *out_debug to a Debug producer (and wires mc->debug) when
- * opts->debug_info is true, else NULL. On allocation failure returns
- * CFREE_NOMEM with both outputs left NULL and any partial state cleaned up.
- * c_target's backend ignores this and does not create either. */
-CfreeStatus cg_mc_debug_new(Compiler*, ObjBuilder*, const CfreeCodeOptions*,
- MCEmitter** out_mc, Debug** out_debug);
-
-/* Helper for backends without a native indexed addressing mode. If addr has
- * an index (addr.v.ind.index != REG_NONE), materializes
- * base + (index << log2_scale) into `scratch` and returns a plain
- * OPK_INDIRECT(scratch, ofs). Otherwise returns `addr` unchanged. The caller
- * supplies the scratch register from its scratch pool. */
-Operand arch_lower_indexed(CgTarget*, Operand addr, Reg scratch);
-
/* ---- Disassembler hook ----
* Bytes -> records, not frontend-driven lowering, so this is a separate
* hook from CgTarget/MCEmitter. The internal implementation may share
diff --git a/src/arch/check_target.c b/src/arch/check_target.c
@@ -1,6 +1,6 @@
#include <string.h>
-#include "arch/arch.h"
+#include "cg/cgtarget.h"
#include "core/arena.h"
typedef struct CheckTarget {
diff --git a/src/arch/mc.h b/src/arch/mc.h
@@ -0,0 +1,159 @@
+#ifndef CFREE_INTERNAL_ARCH_MC_H
+#define CFREE_INTERNAL_ARCH_MC_H
+
+#include <cfree/compile.h>
+
+#include "cg/cgtarget.h"
+#include "core/core.h"
+#include "obj/obj.h"
+
+/* Machine-code / object emission interface. One generic implementation in
+ * src/arch/mc.c serves every machine-code arch; arch-specific behavior enters
+ * only via ArchImpl.apply_label_fixup (label reloc encoding) and the
+ * ArchImpl.cfi_* constants (eh_frame CIE defaults). Pulled out of arch.h so
+ * the many emission-only consumers (per-arch emit/ops/alloc TUs, the
+ * assembler, the Debug producer) don't transitively depend on the
+ * decode/disasm/emu/dbg surfaces. */
+
+/* Forward-declared so CgTarget can carry an optional Debug* without
+ * pulling debug/debug.h into every translation unit that includes this
+ * header. Per doc/DWARF.md §3.2 the backend gets exactly one new dependency
+ * on Debug: this forward decl plus debug_emit_row (declared by the few
+ * backend TUs that actually emit line rows). */
+typedef struct Debug Debug;
+
+/* Native-only register id. The semantic CgTarget surface uses CGLocal; this
+ * remains here for MC/native helpers and disabled native backends. */
+typedef u32 Reg;
+#define REG_NONE 0xffffffffu
+
+typedef u32 MCLabel;
+#define MC_LABEL_NONE 0u
+
+typedef struct ArchLabelFixup {
+ ObjBuilder* obj;
+ u32 sec_id;
+ u32 offset;
+ u32 width;
+ RelocKind kind;
+ i64 disp;
+ ObjSymId cur_func_sym;
+ u32 cur_func_start;
+} ArchLabelFixup;
+
+typedef struct MCEmitter MCEmitter;
+struct MCEmitter {
+ /* Machine/object emission context. Subclasses extend. */
+ Compiler* c;
+ ObjBuilder* obj;
+ u32 section_id;
+
+ /* Pending source location, updated by set_loc. Promoted to the base so
+ * arch backends' emit-bytes choke point can read it without reaching
+ * into the per-arch impl (used to feed debug_emit_row). */
+ SrcLoc loc;
+
+ /* Optional Debug producer. NULL means -g is off and the per-instruction
+ * line-row fanout is skipped. Set after construction by cg_new (or by
+ * the cg_test harness, which is the parser stand-in). Per doc/DWARF.md
+ * §3.2 this is the backend's only new dependency on Debug. */
+ Debug* debug;
+
+ /* Currently active function. Backends manage these via the
+ * mc_begin_function / mc_end_function helpers from their func_begin /
+ * func_end once they've computed the post-alignment function start
+ * position. emit_label_data_reloc reads them to compute reloc
+ * addends that resolve to the runtime address of an intra-function
+ * label. */
+ ObjSymId cur_func_sym;
+ u32 cur_func_section;
+ u32 cur_func_start;
+
+ void (*set_section)(MCEmitter*, u32 section_id);
+ u32 (*pos)(MCEmitter*);
+
+ MCLabel (*label_new)(MCEmitter*);
+ void (*label_place)(MCEmitter*, MCLabel);
+
+ void (*emit_bytes)(MCEmitter*, const u8*, size_t);
+ void (*emit_fill)(MCEmitter*, size_t n, u8 byte);
+ void (*emit_align)(MCEmitter*, u32 align, u8 fill);
+ void (*emit_reloc)(MCEmitter*, RelocKind, ObjSymId, i64 addend);
+ void (*emit_reloc_at)(MCEmitter*, u32 section_id, u32 offset, RelocKind,
+ ObjSymId, i64 addend, int explicit_addend, int pair);
+ void (*emit_label_ref)(MCEmitter*, MCLabel, RelocKind, u32 width, i64 addend);
+
+ /* Emit a relocation at (data_sec, data_offset) that resolves at link
+ * time to the runtime address of `label` (an intra-function code label).
+ *
+ * The relocation is generated against the currently active function
+ * symbol (cur_func_sym) with addend = (label_offset_in_section -
+ * cur_func_start) + extra_addend. If `label` is already placed, the
+ * reloc is emitted immediately; otherwise it is queued and emitted at
+ * label_place time. Callers must have an active function (set by
+ * backend func_begin); panics otherwise. */
+ void (*emit_label_data_reloc)(MCEmitter*, u32 data_sec, u32 data_offset,
+ MCLabel label, RelocKind kind, u32 width,
+ i64 extra_addend);
+ void (*set_loc)(MCEmitter*, SrcLoc);
+
+ /* ---- CFI / unwind ----
+ * Buffered per-function and emitted into .debug_frame / .eh_frame by Debug
+ * at TU finalize. CFI directives are byte-position-bound — they describe
+ * the register-save state starting at the current pos() in the current
+ * section — so they live on MCEmitter (the only common point that already
+ * tracks (section_id, offset)). If the CG was constructed with Debug=NULL,
+ * records are discarded. Register numbering is the per-arch DWARF reg
+ * number; offsets are byte deltas from the CFA. */
+ void (*cfi_startproc)(MCEmitter*);
+ void (*cfi_endproc)(MCEmitter*);
+ void (*cfi_def_cfa)(MCEmitter*, u32 reg, i32 ofs);
+ void (*cfi_def_cfa_offset)(MCEmitter*, i32 ofs);
+ void (*cfi_def_cfa_register)(MCEmitter*, u32 reg);
+ void (*cfi_offset)(MCEmitter*, u32 reg, i32 ofs);
+ void (*cfi_rel_offset)(MCEmitter*, u32 reg, i32 ofs);
+ void (*cfi_restore)(MCEmitter*, u32 reg);
+ /* Override the PC offset used by the *next* cfi_* directive (one-shot).
+ * Backends that patch the prologue in func_end (so the live pc has
+ * moved past the prologue) call this with the post-prologue offset
+ * (relative to cfi_startproc's recorded func_start) before emitting
+ * the frame-state directives. */
+ void (*cfi_set_next_pc_offset)(MCEmitter*, u32 pc_offset);
+
+ void (*destroy)(MCEmitter*);
+};
+
+/* Construct the right target/emitter pair for c->target. */
+MCEmitter* mc_new(Compiler*, ObjBuilder*);
+void mc_free(MCEmitter*);
+
+/* Per-function context helpers. Backends call mc_begin_function from
+ * their CgTarget func_begin (after computing the post-alignment function
+ * start) and mc_end_function from func_end. The pair sets / clears
+ * MCEmitter.cur_func_* — the metadata that emit_label_data_reloc reads
+ * to resolve deferred intra-function label fixups in data sections. */
+void mc_begin_function(MCEmitter*, ObjSymId sym, u32 section_id,
+ u32 start_offset);
+void mc_end_function(MCEmitter*);
+
+/* Flush buffered CFI state into a .eh_frame section in the ObjBuilder.
+ * No-op when no functions called cfi_startproc. Idempotent. */
+void mc_emit_eh_frame(MCEmitter*);
+
+/* Construct the MCEmitter + (optionally) Debug pair that a machine-code
+ * CGBackend's `make` typically needs. On success, sets *out_mc to a fresh
+ * MCEmitter; sets *out_debug to a Debug producer (and wires mc->debug) when
+ * opts->debug_info is true, else NULL. On allocation failure returns
+ * CFREE_NOMEM with both outputs left NULL and any partial state cleaned up.
+ * c_target's backend ignores this and does not create either. */
+CfreeStatus cg_mc_debug_new(Compiler*, ObjBuilder*, const CfreeCodeOptions*,
+ MCEmitter** out_mc, Debug** out_debug);
+
+/* Helper for backends without a native indexed addressing mode. If addr has
+ * an index (addr.v.ind.index != REG_NONE), materializes
+ * base + (index << log2_scale) into `scratch` and returns a plain
+ * OPK_INDIRECT(scratch, ofs). Otherwise returns `addr` unchanged. The caller
+ * supplies the scratch register from its scratch pool. */
+Operand arch_lower_indexed(CgTarget*, Operand addr, Reg scratch);
+
+#endif
diff --git a/src/arch/native_target.h b/src/arch/native_target.h
@@ -3,7 +3,7 @@
#include <string.h>
-#include "arch/arch.h"
+#include "arch/mc.h"
#include "cg/cgtarget.h"
#include "core/core.h"
diff --git a/src/arch/rv64/internal.h b/src/arch/rv64/internal.h
@@ -4,7 +4,7 @@
#include <string.h>
-#include "arch/arch.h"
+#include "arch/mc.h"
#include "arch/rv64/isa.h"
#include "arch/rv64/rv64.h"
#include "core/arena.h"
diff --git a/src/arch/rv64/rv64.h b/src/arch/rv64/rv64.h
@@ -1,7 +1,7 @@
#ifndef CFREE_ARCH_RV64_H
#define CFREE_ARCH_RV64_H
-#include "arch/arch.h"
+#include "arch/mc.h"
CGTarget* rv64_cgtarget_new(Compiler*, ObjBuilder*, MCEmitter*);
diff --git a/src/arch/wasm/internal.h b/src/arch/wasm/internal.h
@@ -22,7 +22,7 @@
#include <cfree/core.h>
-#include "arch/arch.h"
+#include "arch/mc.h"
#include "core/core.h"
#include "opt/ir.h"
#include "obj/obj.h"
diff --git a/src/arch/x64/alloc.c b/src/arch/x64/alloc.c
@@ -7,7 +7,7 @@
#include <string.h>
-#include "arch/arch.h"
+#include "arch/mc.h"
#include "arch/x64/internal.h"
#include "arch/x64/isa.h"
#include "arch/x64/regs.h"
diff --git a/src/arch/x64/emit.c b/src/arch/x64/emit.c
@@ -6,7 +6,7 @@
#include <string.h>
-#include "arch/arch.h"
+#include "arch/mc.h"
#include "arch/x64/internal.h"
#include "arch/x64/isa.h"
#include "arch/x64/x64.h"
diff --git a/src/arch/x64/internal.h b/src/arch/x64/internal.h
@@ -12,7 +12,7 @@
#include <string.h>
-#include "arch/arch.h"
+#include "arch/mc.h"
#include "arch/x64/isa.h"
#include "arch/x64/x64.h"
#include "core/arena.h"
diff --git a/src/arch/x64/ops.c b/src/arch/x64/ops.c
@@ -12,7 +12,7 @@
#include <string.h>
-#include "arch/arch.h"
+#include "arch/mc.h"
#include "arch/x64/asm.h"
#include "arch/x64/internal.h"
#include "arch/x64/isa.h"
diff --git a/src/arch/x64/x64.h b/src/arch/x64/x64.h
@@ -1,7 +1,7 @@
#ifndef CFREE_ARCH_X64_H
#define CFREE_ARCH_X64_H
-#include "arch/arch.h"
+#include "arch/mc.h"
CGTarget* x64_cgtarget_new(Compiler*, ObjBuilder*, MCEmitter*);
diff --git a/src/asm/asm.h b/src/asm/asm.h
@@ -1,7 +1,7 @@
#ifndef CFREE_ASM_H
#define CFREE_ASM_H
-#include "arch/arch.h"
+#include "arch/mc.h"
#include "asm/asm_lex.h"
/* Standalone assembler. Reads tokens directly from an AsmLexer; emits via
diff --git a/src/debug/debug.h b/src/debug/debug.h
@@ -1,7 +1,7 @@
#ifndef CFREE_DEBUG_H
#define CFREE_DEBUG_H
-#include "arch/arch.h"
+#include "arch/mc.h"
#include "core/core.h"
/* DWARF debug info. The producer side (CG, CGTarget/MCEmitter, opt) feeds
diff --git a/src/debug/debug_emit.c b/src/debug/debug_emit.c
@@ -16,6 +16,8 @@
#include <string.h>
+#include "arch/arch.h"
+
#include "core/buf.h"
#include "core/core.h"
#include "core/heap.h"
diff --git a/src/opt/ir.h b/src/opt/ir.h
@@ -1,6 +1,7 @@
#ifndef CFREE_IR_H
#define CFREE_IR_H
+#include "abi/abi.h"
#include "arch/native_target.h"
#include "core/arena.h"
#include "core/core.h"
diff --git a/src/opt/opt.h b/src/opt/opt.h
@@ -1,7 +1,7 @@
#ifndef CFREE_OPT_H
#define CFREE_OPT_H
-#include "arch/arch.h"
+#include "arch/mc.h"
#include "arch/native_target.h"
#include "cg/ir.h"
#include "opt/ir.h"
diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c
@@ -8,7 +8,7 @@
#include <string.h>
#include "abi/abi.h"
-#include "arch/arch.h"
+#include "arch/mc.h"
#include "arch/rv64/isa.h"
#include "arch/x64/isa.h"
#include "core/core.h"