kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit db8ec5a267e8f4ef1f768879adc779a5f2c48c6f
parent eb26144df528f7a5dd14f56ceb608b1e14c3f066
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue, 26 May 2026 18:10:18 -0700

arch: split MCEmitter interface out of arch.h into mc.h

Lift the machine-code / object emission surface (MCEmitter, Reg, MCLabel,
ArchLabelFixup, mc_new / mc_begin_function / mc_emit_eh_frame,
cg_mc_debug_new, arch_lower_indexed) out of arch.h into a new leaf header
arch/mc.h. arch.h now includes mc.h (ArchImpl.cgtarget_new takes an
MCEmitter*), so the aggregating backends are unaffected.

Repoint emission-only consumers from arch.h to mc.h so they no longer
transitively depend on the decode/disasm/emu/dbg surface: the per-arch
backend headers/sources (x64, rv64, wasm, aa64, native_target), the
assembler entry point, the Debug producer, and the optimizer umbrella
(opt.h) -- no opt source uses an arch-only symbol. check_target.c only
needs CGBackend, so it moves to cg/cgtarget.h.

Add the direct includes the narrowing exposed: abi/abi.h in opt/ir.h
(ABIArgInfo/ABIFuncInfo) and arch/arch.h in debug_emit.c (ArchDwarfOps,
ArchImpl, arch_for_compiler), both previously leaning on a transitive
arch.h include.

Diffstat:
Msrc/arch/aa64/aa64.h | 2+-
Msrc/arch/arch.h | 142+------------------------------------------------------------------------------
Msrc/arch/check_target.c | 2+-
Asrc/arch/mc.h | 159+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/arch/native_target.h | 2+-
Msrc/arch/rv64/internal.h | 2+-
Msrc/arch/rv64/rv64.h | 2+-
Msrc/arch/wasm/internal.h | 2+-
Msrc/arch/x64/alloc.c | 2+-
Msrc/arch/x64/emit.c | 2+-
Msrc/arch/x64/internal.h | 2+-
Msrc/arch/x64/ops.c | 2+-
Msrc/arch/x64/x64.h | 2+-
Msrc/asm/asm.h | 2+-
Msrc/debug/debug.h | 2+-
Msrc/debug/debug_emit.c | 2++
Msrc/opt/ir.h | 1+
Msrc/opt/opt.h | 2+-
Mtest/opt/opt_test.c | 2+-
19 files changed, 178 insertions(+), 156 deletions(-)

diff --git a/src/arch/aa64/aa64.h b/src/arch/aa64/aa64.h @@ -1,7 +1,7 @@ #ifndef CFREE_ARCH_AA64_H #define CFREE_ARCH_AA64_H -#include "arch/arch.h" +#include "arch/mc.h" #include "arch/native_target.h" typedef struct NativeOps NativeOps; diff --git a/src/arch/arch.h b/src/arch/arch.h @@ -6,25 +6,11 @@ #include <cfree/disasm.h> #include "abi/abi.h" +#include "arch/mc.h" #include "cg/cgtarget.h" #include "core/core.h" #include "obj/obj.h" -/* Forward-declared so CgTarget can carry an optional Debug* without - * pulling debug/debug.h into every translation unit that includes arch.h. - * Per doc/DWARF.md §3.2 the backend gets exactly one new dependency on - * Debug: this forward decl plus debug_emit_row (declared by the few - * backend TUs that actually emit line rows). */ -typedef struct Debug Debug; - -/* Native-only register id. The semantic CgTarget surface uses CGLocal; this - * remains here for MC/native helpers and disabled native backends. */ -typedef u32 Reg; -#define REG_NONE 0xffffffffu - -typedef u32 MCLabel; -#define MC_LABEL_NONE 0u - typedef struct AsmDriver AsmDriver; typedef struct ArchAsm ArchAsm; @@ -33,132 +19,6 @@ struct ArchAsm { void (*destroy)(ArchAsm*); }; -typedef struct ArchLabelFixup { - ObjBuilder* obj; - u32 sec_id; - u32 offset; - u32 width; - RelocKind kind; - i64 disp; - ObjSymId cur_func_sym; - u32 cur_func_start; -} ArchLabelFixup; - -typedef struct MCEmitter MCEmitter; -struct MCEmitter { - /* Machine/object emission context. Subclasses extend. */ - Compiler* c; - ObjBuilder* obj; - u32 section_id; - - /* Pending source location, updated by set_loc. Promoted to the base so - * arch backends' emit-bytes choke point can read it without reaching - * into the per-arch impl (used to feed debug_emit_row). */ - SrcLoc loc; - - /* Optional Debug producer. NULL means -g is off and the per-instruction - * line-row fanout is skipped. Set after construction by cg_new (or by - * the cg_test harness, which is the parser stand-in). Per doc/DWARF.md - * §3.2 this is the backend's only new dependency on Debug. */ - Debug* debug; - - /* Currently active function. Backends manage these via the - * mc_begin_function / mc_end_function helpers from their func_begin / - * func_end once they've computed the post-alignment function start - * position. emit_label_data_reloc reads them to compute reloc - * addends that resolve to the runtime address of an intra-function - * label. */ - ObjSymId cur_func_sym; - u32 cur_func_section; - u32 cur_func_start; - - void (*set_section)(MCEmitter*, u32 section_id); - u32 (*pos)(MCEmitter*); - - MCLabel (*label_new)(MCEmitter*); - void (*label_place)(MCEmitter*, MCLabel); - - void (*emit_bytes)(MCEmitter*, const u8*, size_t); - void (*emit_fill)(MCEmitter*, size_t n, u8 byte); - void (*emit_align)(MCEmitter*, u32 align, u8 fill); - void (*emit_reloc)(MCEmitter*, RelocKind, ObjSymId, i64 addend); - void (*emit_reloc_at)(MCEmitter*, u32 section_id, u32 offset, RelocKind, - ObjSymId, i64 addend, int explicit_addend, int pair); - void (*emit_label_ref)(MCEmitter*, MCLabel, RelocKind, u32 width, i64 addend); - - /* Emit a relocation at (data_sec, data_offset) that resolves at link - * time to the runtime address of `label` (an intra-function code label). - * - * The relocation is generated against the currently active function - * symbol (cur_func_sym) with addend = (label_offset_in_section - - * cur_func_start) + extra_addend. If `label` is already placed, the - * reloc is emitted immediately; otherwise it is queued and emitted at - * label_place time. Callers must have an active function (set by - * backend func_begin); panics otherwise. */ - void (*emit_label_data_reloc)(MCEmitter*, u32 data_sec, u32 data_offset, - MCLabel label, RelocKind kind, u32 width, - i64 extra_addend); - void (*set_loc)(MCEmitter*, SrcLoc); - - /* ---- CFI / unwind ---- - * Buffered per-function and emitted into .debug_frame / .eh_frame by Debug - * at TU finalize. CFI directives are byte-position-bound — they describe - * the register-save state starting at the current pos() in the current - * section — so they live on MCEmitter (the only common point that already - * tracks (section_id, offset)). If the CG was constructed with Debug=NULL, - * records are discarded. Register numbering is the per-arch DWARF reg - * number; offsets are byte deltas from the CFA. */ - void (*cfi_startproc)(MCEmitter*); - void (*cfi_endproc)(MCEmitter*); - void (*cfi_def_cfa)(MCEmitter*, u32 reg, i32 ofs); - void (*cfi_def_cfa_offset)(MCEmitter*, i32 ofs); - void (*cfi_def_cfa_register)(MCEmitter*, u32 reg); - void (*cfi_offset)(MCEmitter*, u32 reg, i32 ofs); - void (*cfi_rel_offset)(MCEmitter*, u32 reg, i32 ofs); - void (*cfi_restore)(MCEmitter*, u32 reg); - /* Override the PC offset used by the *next* cfi_* directive (one-shot). - * Backends that patch the prologue in func_end (so the live pc has - * moved past the prologue) call this with the post-prologue offset - * (relative to cfi_startproc's recorded func_start) before emitting - * the frame-state directives. */ - void (*cfi_set_next_pc_offset)(MCEmitter*, u32 pc_offset); - - void (*destroy)(MCEmitter*); -}; - -/* Construct the right target/emitter pair for c->target. */ -MCEmitter* mc_new(Compiler*, ObjBuilder*); -void mc_free(MCEmitter*); - -/* Per-function context helpers. Backends call mc_begin_function from - * their CgTarget func_begin (after computing the post-alignment function - * start) and mc_end_function from func_end. The pair sets / clears - * MCEmitter.cur_func_* — the metadata that emit_label_data_reloc reads - * to resolve deferred intra-function label fixups in data sections. */ -void mc_begin_function(MCEmitter*, ObjSymId sym, u32 section_id, - u32 start_offset); -void mc_end_function(MCEmitter*); - -/* Flush buffered CFI state into a .eh_frame section in the ObjBuilder. - * No-op when no functions called cfi_startproc. Idempotent. */ -void mc_emit_eh_frame(MCEmitter*); - -/* Construct the MCEmitter + (optionally) Debug pair that a machine-code - * CGBackend's `make` typically needs. On success, sets *out_mc to a fresh - * MCEmitter; sets *out_debug to a Debug producer (and wires mc->debug) when - * opts->debug_info is true, else NULL. On allocation failure returns - * CFREE_NOMEM with both outputs left NULL and any partial state cleaned up. - * c_target's backend ignores this and does not create either. */ -CfreeStatus cg_mc_debug_new(Compiler*, ObjBuilder*, const CfreeCodeOptions*, - MCEmitter** out_mc, Debug** out_debug); - -/* Helper for backends without a native indexed addressing mode. If addr has - * an index (addr.v.ind.index != REG_NONE), materializes - * base + (index << log2_scale) into `scratch` and returns a plain - * OPK_INDIRECT(scratch, ofs). Otherwise returns `addr` unchanged. The caller - * supplies the scratch register from its scratch pool. */ -Operand arch_lower_indexed(CgTarget*, Operand addr, Reg scratch); - /* ---- Disassembler hook ---- * Bytes -> records, not frontend-driven lowering, so this is a separate * hook from CgTarget/MCEmitter. The internal implementation may share diff --git a/src/arch/check_target.c b/src/arch/check_target.c @@ -1,6 +1,6 @@ #include <string.h> -#include "arch/arch.h" +#include "cg/cgtarget.h" #include "core/arena.h" typedef struct CheckTarget { diff --git a/src/arch/mc.h b/src/arch/mc.h @@ -0,0 +1,159 @@ +#ifndef CFREE_INTERNAL_ARCH_MC_H +#define CFREE_INTERNAL_ARCH_MC_H + +#include <cfree/compile.h> + +#include "cg/cgtarget.h" +#include "core/core.h" +#include "obj/obj.h" + +/* Machine-code / object emission interface. One generic implementation in + * src/arch/mc.c serves every machine-code arch; arch-specific behavior enters + * only via ArchImpl.apply_label_fixup (label reloc encoding) and the + * ArchImpl.cfi_* constants (eh_frame CIE defaults). Pulled out of arch.h so + * the many emission-only consumers (per-arch emit/ops/alloc TUs, the + * assembler, the Debug producer) don't transitively depend on the + * decode/disasm/emu/dbg surfaces. */ + +/* Forward-declared so CgTarget can carry an optional Debug* without + * pulling debug/debug.h into every translation unit that includes this + * header. Per doc/DWARF.md §3.2 the backend gets exactly one new dependency + * on Debug: this forward decl plus debug_emit_row (declared by the few + * backend TUs that actually emit line rows). */ +typedef struct Debug Debug; + +/* Native-only register id. The semantic CgTarget surface uses CGLocal; this + * remains here for MC/native helpers and disabled native backends. */ +typedef u32 Reg; +#define REG_NONE 0xffffffffu + +typedef u32 MCLabel; +#define MC_LABEL_NONE 0u + +typedef struct ArchLabelFixup { + ObjBuilder* obj; + u32 sec_id; + u32 offset; + u32 width; + RelocKind kind; + i64 disp; + ObjSymId cur_func_sym; + u32 cur_func_start; +} ArchLabelFixup; + +typedef struct MCEmitter MCEmitter; +struct MCEmitter { + /* Machine/object emission context. Subclasses extend. */ + Compiler* c; + ObjBuilder* obj; + u32 section_id; + + /* Pending source location, updated by set_loc. Promoted to the base so + * arch backends' emit-bytes choke point can read it without reaching + * into the per-arch impl (used to feed debug_emit_row). */ + SrcLoc loc; + + /* Optional Debug producer. NULL means -g is off and the per-instruction + * line-row fanout is skipped. Set after construction by cg_new (or by + * the cg_test harness, which is the parser stand-in). Per doc/DWARF.md + * §3.2 this is the backend's only new dependency on Debug. */ + Debug* debug; + + /* Currently active function. Backends manage these via the + * mc_begin_function / mc_end_function helpers from their func_begin / + * func_end once they've computed the post-alignment function start + * position. emit_label_data_reloc reads them to compute reloc + * addends that resolve to the runtime address of an intra-function + * label. */ + ObjSymId cur_func_sym; + u32 cur_func_section; + u32 cur_func_start; + + void (*set_section)(MCEmitter*, u32 section_id); + u32 (*pos)(MCEmitter*); + + MCLabel (*label_new)(MCEmitter*); + void (*label_place)(MCEmitter*, MCLabel); + + void (*emit_bytes)(MCEmitter*, const u8*, size_t); + void (*emit_fill)(MCEmitter*, size_t n, u8 byte); + void (*emit_align)(MCEmitter*, u32 align, u8 fill); + void (*emit_reloc)(MCEmitter*, RelocKind, ObjSymId, i64 addend); + void (*emit_reloc_at)(MCEmitter*, u32 section_id, u32 offset, RelocKind, + ObjSymId, i64 addend, int explicit_addend, int pair); + void (*emit_label_ref)(MCEmitter*, MCLabel, RelocKind, u32 width, i64 addend); + + /* Emit a relocation at (data_sec, data_offset) that resolves at link + * time to the runtime address of `label` (an intra-function code label). + * + * The relocation is generated against the currently active function + * symbol (cur_func_sym) with addend = (label_offset_in_section - + * cur_func_start) + extra_addend. If `label` is already placed, the + * reloc is emitted immediately; otherwise it is queued and emitted at + * label_place time. Callers must have an active function (set by + * backend func_begin); panics otherwise. */ + void (*emit_label_data_reloc)(MCEmitter*, u32 data_sec, u32 data_offset, + MCLabel label, RelocKind kind, u32 width, + i64 extra_addend); + void (*set_loc)(MCEmitter*, SrcLoc); + + /* ---- CFI / unwind ---- + * Buffered per-function and emitted into .debug_frame / .eh_frame by Debug + * at TU finalize. CFI directives are byte-position-bound — they describe + * the register-save state starting at the current pos() in the current + * section — so they live on MCEmitter (the only common point that already + * tracks (section_id, offset)). If the CG was constructed with Debug=NULL, + * records are discarded. Register numbering is the per-arch DWARF reg + * number; offsets are byte deltas from the CFA. */ + void (*cfi_startproc)(MCEmitter*); + void (*cfi_endproc)(MCEmitter*); + void (*cfi_def_cfa)(MCEmitter*, u32 reg, i32 ofs); + void (*cfi_def_cfa_offset)(MCEmitter*, i32 ofs); + void (*cfi_def_cfa_register)(MCEmitter*, u32 reg); + void (*cfi_offset)(MCEmitter*, u32 reg, i32 ofs); + void (*cfi_rel_offset)(MCEmitter*, u32 reg, i32 ofs); + void (*cfi_restore)(MCEmitter*, u32 reg); + /* Override the PC offset used by the *next* cfi_* directive (one-shot). + * Backends that patch the prologue in func_end (so the live pc has + * moved past the prologue) call this with the post-prologue offset + * (relative to cfi_startproc's recorded func_start) before emitting + * the frame-state directives. */ + void (*cfi_set_next_pc_offset)(MCEmitter*, u32 pc_offset); + + void (*destroy)(MCEmitter*); +}; + +/* Construct the right target/emitter pair for c->target. */ +MCEmitter* mc_new(Compiler*, ObjBuilder*); +void mc_free(MCEmitter*); + +/* Per-function context helpers. Backends call mc_begin_function from + * their CgTarget func_begin (after computing the post-alignment function + * start) and mc_end_function from func_end. The pair sets / clears + * MCEmitter.cur_func_* — the metadata that emit_label_data_reloc reads + * to resolve deferred intra-function label fixups in data sections. */ +void mc_begin_function(MCEmitter*, ObjSymId sym, u32 section_id, + u32 start_offset); +void mc_end_function(MCEmitter*); + +/* Flush buffered CFI state into a .eh_frame section in the ObjBuilder. + * No-op when no functions called cfi_startproc. Idempotent. */ +void mc_emit_eh_frame(MCEmitter*); + +/* Construct the MCEmitter + (optionally) Debug pair that a machine-code + * CGBackend's `make` typically needs. On success, sets *out_mc to a fresh + * MCEmitter; sets *out_debug to a Debug producer (and wires mc->debug) when + * opts->debug_info is true, else NULL. On allocation failure returns + * CFREE_NOMEM with both outputs left NULL and any partial state cleaned up. + * c_target's backend ignores this and does not create either. */ +CfreeStatus cg_mc_debug_new(Compiler*, ObjBuilder*, const CfreeCodeOptions*, + MCEmitter** out_mc, Debug** out_debug); + +/* Helper for backends without a native indexed addressing mode. If addr has + * an index (addr.v.ind.index != REG_NONE), materializes + * base + (index << log2_scale) into `scratch` and returns a plain + * OPK_INDIRECT(scratch, ofs). Otherwise returns `addr` unchanged. The caller + * supplies the scratch register from its scratch pool. */ +Operand arch_lower_indexed(CgTarget*, Operand addr, Reg scratch); + +#endif diff --git a/src/arch/native_target.h b/src/arch/native_target.h @@ -3,7 +3,7 @@ #include <string.h> -#include "arch/arch.h" +#include "arch/mc.h" #include "cg/cgtarget.h" #include "core/core.h" diff --git a/src/arch/rv64/internal.h b/src/arch/rv64/internal.h @@ -4,7 +4,7 @@ #include <string.h> -#include "arch/arch.h" +#include "arch/mc.h" #include "arch/rv64/isa.h" #include "arch/rv64/rv64.h" #include "core/arena.h" diff --git a/src/arch/rv64/rv64.h b/src/arch/rv64/rv64.h @@ -1,7 +1,7 @@ #ifndef CFREE_ARCH_RV64_H #define CFREE_ARCH_RV64_H -#include "arch/arch.h" +#include "arch/mc.h" CGTarget* rv64_cgtarget_new(Compiler*, ObjBuilder*, MCEmitter*); diff --git a/src/arch/wasm/internal.h b/src/arch/wasm/internal.h @@ -22,7 +22,7 @@ #include <cfree/core.h> -#include "arch/arch.h" +#include "arch/mc.h" #include "core/core.h" #include "opt/ir.h" #include "obj/obj.h" diff --git a/src/arch/x64/alloc.c b/src/arch/x64/alloc.c @@ -7,7 +7,7 @@ #include <string.h> -#include "arch/arch.h" +#include "arch/mc.h" #include "arch/x64/internal.h" #include "arch/x64/isa.h" #include "arch/x64/regs.h" diff --git a/src/arch/x64/emit.c b/src/arch/x64/emit.c @@ -6,7 +6,7 @@ #include <string.h> -#include "arch/arch.h" +#include "arch/mc.h" #include "arch/x64/internal.h" #include "arch/x64/isa.h" #include "arch/x64/x64.h" diff --git a/src/arch/x64/internal.h b/src/arch/x64/internal.h @@ -12,7 +12,7 @@ #include <string.h> -#include "arch/arch.h" +#include "arch/mc.h" #include "arch/x64/isa.h" #include "arch/x64/x64.h" #include "core/arena.h" diff --git a/src/arch/x64/ops.c b/src/arch/x64/ops.c @@ -12,7 +12,7 @@ #include <string.h> -#include "arch/arch.h" +#include "arch/mc.h" #include "arch/x64/asm.h" #include "arch/x64/internal.h" #include "arch/x64/isa.h" diff --git a/src/arch/x64/x64.h b/src/arch/x64/x64.h @@ -1,7 +1,7 @@ #ifndef CFREE_ARCH_X64_H #define CFREE_ARCH_X64_H -#include "arch/arch.h" +#include "arch/mc.h" CGTarget* x64_cgtarget_new(Compiler*, ObjBuilder*, MCEmitter*); diff --git a/src/asm/asm.h b/src/asm/asm.h @@ -1,7 +1,7 @@ #ifndef CFREE_ASM_H #define CFREE_ASM_H -#include "arch/arch.h" +#include "arch/mc.h" #include "asm/asm_lex.h" /* Standalone assembler. Reads tokens directly from an AsmLexer; emits via diff --git a/src/debug/debug.h b/src/debug/debug.h @@ -1,7 +1,7 @@ #ifndef CFREE_DEBUG_H #define CFREE_DEBUG_H -#include "arch/arch.h" +#include "arch/mc.h" #include "core/core.h" /* DWARF debug info. The producer side (CG, CGTarget/MCEmitter, opt) feeds diff --git a/src/debug/debug_emit.c b/src/debug/debug_emit.c @@ -16,6 +16,8 @@ #include <string.h> +#include "arch/arch.h" + #include "core/buf.h" #include "core/core.h" #include "core/heap.h" diff --git a/src/opt/ir.h b/src/opt/ir.h @@ -1,6 +1,7 @@ #ifndef CFREE_IR_H #define CFREE_IR_H +#include "abi/abi.h" #include "arch/native_target.h" #include "core/arena.h" #include "core/core.h" diff --git a/src/opt/opt.h b/src/opt/opt.h @@ -1,7 +1,7 @@ #ifndef CFREE_OPT_H #define CFREE_OPT_H -#include "arch/arch.h" +#include "arch/mc.h" #include "arch/native_target.h" #include "cg/ir.h" #include "opt/ir.h" diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c @@ -8,7 +8,7 @@ #include <string.h> #include "abi/abi.h" -#include "arch/arch.h" +#include "arch/mc.h" #include "arch/rv64/isa.h" #include "arch/x64/isa.h" #include "core/core.h"