commit f4e8879166746eed53379bf53e7012989dda8736
parent 1ad968848968d8f5138df151b65883bb9d9b0005
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Wed, 20 May 2026 19:09:33 -0700
cg: redesign switch / computed_goto / label-addr around structured IR
Replace the old optional switch_branch hook with a uniform vtable
split: switch_ is structured (CGSwitchDesc carries selector + cases +
default + hint), indirect_branch is the universal register-indirect
jump primitive, and load_label_addr materializes intra-function label
addresses via PC-relative emission per arch (x64 lea, aa64 adr, rv64
auipc+addi). Native arches get cg_lower_switch_default's shared
cmp-chain when they leave switch_ NULL; the C-source target keeps its
native `switch (x) { case V: goto L; }` lowering.
The opt layer records IR_SWITCH, IR_INDIRECT_BRANCH, and
IR_LOAD_LABEL_ADDR end-to-end so cross-function passes see the
structured construct; pass_emit replays them through the wrapped
backend. w_label_new pre-allocates a stable MCLabel per block so
cfree_cg_data_label_addr can queue a fixup that survives the
recording/replay split. Block.succ becomes variable-length to support
IR_SWITCH's N+1 successors. Hard-liveness/pass_combine learn about
the new ops so DCE no longer drops the load feeding indirect_branch.
cfree_cg_data_label_addr now emits a real cross-section relocation
against the enclosing function symbol with addend = (label_offset -
func_start), resolved at label_place time by a new
MCEmitter.emit_label_data_reloc that captures the current function
context (mc_begin_function / mc_end_function on every native
func_begin / func_end). The addend goes in both inline data bytes
(Mach-O ARM64_RELOC_UNSIGNED consumes inline) and the reloc record
(ELF RELA and the JIT's link_reloc_apply use the record) so all paths
converge on sym + addend.
Diagnostics: data_label_addr now panics with a precise message on
unsupported width, missing function context, or the C-source target
(which can't render &&L in file-scope static initializers — two toy
cases that exercise this are marked .cbackend.skip).
Tests: 732 pass / 0 fail / 5 skip across R, L, and C paths
(test/toy). test-cg-api, test-opt, test-isa, test-aa64-inline,
test-link, test-elf, test-ar, test-ar-driver, test-debug, test-dwarf
all pass.
Diffstat:
31 files changed, 840 insertions(+), 93 deletions(-)
diff --git a/include/cfree/cg.h b/include/cfree/cg.h
@@ -491,9 +491,9 @@ void cfree_cg_switch(CfreeCg*, CfreeCgSwitch sw);
* dynamic activation and must not be called or dereferenced as data. */
void cfree_cg_push_label_addr(CfreeCg*, CfreeCgLabel, CfreeCgTypeId ptr_type);
-/* Pops a label address and branches to it. valid_targets may be NULL when the
- * frontend cannot enumerate them, but providing it lets targets validate and
- * apply branch-protection lowering. */
+/* Pops a label address and branches to it. valid_targets must name the
+ * non-empty closed set of labels the target may resolve to; targets use it
+ * for validation, CFG construction, and branch-protection lowering. */
void cfree_cg_computed_goto(CfreeCg*, const CfreeCgLabel* valid_targets,
uint32_t ntargets);
diff --git a/src/arch/aa64/alloc.c b/src/arch/aa64/alloc.c
@@ -90,6 +90,45 @@ void aa_jump(CGTarget* t, Label l) {
mc->emit_label_ref(mc, (MCLabel)l, R_AARCH64_JUMP26, 4, 0);
}
+static void aa_emit_zero64(MCEmitter* mc) {
+ static const u8 zero[8] = {0};
+ mc->emit_bytes(mc, zero, sizeof zero);
+}
+
+static void aa_load_label_addr(CGTarget* t, Operand dst, Label l) {
+ /* Reserve:
+ * insn0: ADR Xdst, label (patched to LDR literal if out of range)
+ * insn1: B .+12 (skip the inline literal)
+ * lit: .quad label (relocated fallback target if needed)
+ *
+ * The MC fixup range-checks ADR at label placement. In-range labels use the
+ * first instruction; out-of-range labels use the relocated literal slot. */
+ MCEmitter* mc = t->mc;
+ u32 rd;
+ if (dst.kind != OPK_REG) {
+ compiler_panic(t->c, mc->loc,
+ "aa64: load_label_addr dst must be REG");
+ }
+ rd = reg_num(dst);
+ aa64_emit32(mc, aa64_adr(rd, 0u, 0u));
+ aa64_emit32(mc, aa64_b_base() | 3u);
+ aa_emit_zero64(mc);
+ mc->emit_label_ref(mc, (MCLabel)l, R_AARCH64_INTRA_LABEL_ADDR, 16, 0);
+}
+
+static void aa_indirect_branch(CGTarget* t, Operand addr,
+ const Label* targets, u32 ntargets) {
+ /* BR Xn — register-indirect branch (no fixup needed). */
+ MCEmitter* mc = t->mc;
+ (void)targets;
+ (void)ntargets;
+ if (addr.kind != OPK_REG) {
+ compiler_panic(t->c, mc->loc,
+ "aa64: indirect_branch expects REG operand");
+ }
+ aa64_emit32(mc, aa64_br(reg_num(addr)));
+}
+
static u32 cmp_to_cond(CmpOp op) {
switch (op) {
case CMP_EQ: return 0x0u;
@@ -255,6 +294,8 @@ void aa_alloc_vtable_init(CGTarget* t) {
t->jump = aa_jump;
t->cmp_branch = aa_cmp_branch;
t->cmp = aa_cmp;
+ t->load_label_addr = aa_load_label_addr;
+ t->indirect_branch = aa_indirect_branch;
t->scope_begin = aa_scope_begin;
t->scope_else = aa_scope_else;
diff --git a/src/arch/aa64/arch.c b/src/arch/aa64/arch.c
@@ -4,6 +4,7 @@
#include "arch/aa64/aa64.h"
#include "arch/aa64/asm.h"
#include "arch/aa64/disasm.h"
+#include "arch/aa64/isa.h"
#include "arch/aa64/regs.h"
#include "core/bytes.h"
#include "link/link_arch.h"
@@ -28,6 +29,14 @@ static int aa64_register_at_public(uint32_t idx, CfreeArchReg* out) {
return aa64_register_iter_get(idx, &out->dwarf_idx, &out->name);
}
+static void aa64_wr_u64_target(Compiler* c, u8* p, u64 v) {
+ if (c && c->target.big_endian) {
+ for (u32 i = 0; i < 8; ++i) p[i] = (u8)(v >> ((7u - i) * 8u));
+ } else {
+ wr_u64_le(p, v);
+ }
+}
+
static const ArchElfOps aa64_elf_ops = {
.e_machine = EM_AARCH64,
.e_flags = 0,
@@ -49,8 +58,9 @@ static int aa64_apply_label_fixup(Compiler* c, const ArchLabelFixup* fx) {
u8 cur[4];
u32 word;
- (void)c;
- if (!fx || fx->width != 4) return 1;
+ if (!fx) return 1;
+ if (fx->kind != R_AARCH64_INTRA_LABEL_ADDR && fx->width != 4) return 1;
+ if (fx->kind == R_AARCH64_INTRA_LABEL_ADDR && fx->width != 16) return 1;
s = obj_section_get(fx->obj, fx->sec_id);
if (!s) return 0;
buf_read(&s->bytes, fx->offset, cur, 4);
@@ -70,6 +80,47 @@ static int aa64_apply_label_fixup(Compiler* c, const ArchLabelFixup* fx) {
word = (word & ~(0x7ffffu << 5)) | (imm19 << 5);
break;
}
+ case R_AARCH64_ADR_PREL_LO21: {
+ /* ADR: imm21 (byte displacement) split into immlo (bits 30:29)
+ * and immhi (bits 23:5). */
+ if (fx->disp < -(i64)(1 << 20) || fx->disp >= (i64)(1 << 20)) {
+ compiler_panic(c, (SrcLoc){0, 0, 0},
+ "aarch64 label fixup: ADR target out of range "
+ "(need +/-1MiB)");
+ }
+ u32 imm21 = (u32)(fx->disp & 0x1fffffu);
+ u32 immlo = imm21 & 0x3u;
+ u32 immhi = (imm21 >> 2) & 0x7ffffu;
+ word = (word & ~((0x3u << 29) | (0x7ffffu << 5))) |
+ (immlo << 29) | (immhi << 5);
+ break;
+ }
+ case R_AARCH64_INTRA_LABEL_ADDR: {
+ u32 rd = word & 0x1fu;
+ if (fx->disp >= -(i64)(1 << 20) && fx->disp < (i64)(1 << 20)) {
+ u32 imm21 = (u32)(fx->disp & 0x1fffffu);
+ u32 immlo = imm21 & 0x3u;
+ u32 immhi = (imm21 >> 2) & 0x7ffffu;
+ word = aa64_adr(rd, immlo, immhi);
+ } else {
+ u8 lit[8];
+ i64 label_offset;
+ i64 addend;
+ if (fx->cur_func_sym == OBJ_SYM_NONE) {
+ compiler_panic(c, (SrcLoc){0, 0, 0},
+ "aarch64 label fixup: wide label address resolved "
+ "outside a function");
+ }
+ label_offset = (i64)fx->offset + fx->disp;
+ addend = label_offset - (i64)fx->cur_func_start;
+ word = 0x58000000u | (2u << 5) | rd; /* LDR Xt, [PC + 8] */
+ aa64_wr_u64_target(c, lit, (u64)addend);
+ obj_patch(fx->obj, fx->sec_id, fx->offset + 8u, lit, sizeof lit);
+ obj_reloc_ex(fx->obj, fx->sec_id, fx->offset + 8u, R_ABS64,
+ fx->cur_func_sym, addend, 1, 0);
+ }
+ break;
+ }
default:
return 1;
}
diff --git a/src/arch/aa64/emit.c b/src/arch/aa64/emit.c
@@ -93,6 +93,7 @@ static void aa_func_begin_init(CGTarget *t, const CGFuncDesc *fd) {
a->fd = fd;
a->func_start = mc->pos(mc);
+ mc_begin_function(mc, fd->sym, fd->text_section_id, a->func_start);
a->next_param_int = 0;
a->next_param_fp = 0;
a->next_param_stack = 0;
@@ -478,6 +479,7 @@ finish:
debug_func_pc_range(t->debug, sec, a->func_start, end);
mc->cfi_endproc(mc);
+ mc_end_function(mc);
a->fd = NULL;
}
diff --git a/src/arch/arch.h b/src/arch/arch.h
@@ -534,6 +534,8 @@ typedef struct ArchLabelFixup {
u32 width;
RelocKind kind;
i64 disp;
+ ObjSymId cur_func_sym;
+ u32 cur_func_start;
} ArchLabelFixup;
typedef struct MCEmitter MCEmitter;
@@ -554,6 +556,16 @@ struct MCEmitter {
* §3.2 this is the backend's only new dependency on Debug. */
Debug* debug;
+ /* Currently active function. Backends manage these via the
+ * mc_begin_function / mc_end_function helpers from their func_begin /
+ * func_end once they've computed the post-alignment function start
+ * position. emit_label_data_reloc reads them to compute reloc
+ * addends that resolve to the runtime address of an intra-function
+ * label. */
+ ObjSymId cur_func_sym;
+ u32 cur_func_section;
+ u32 cur_func_start;
+
void (*set_section)(MCEmitter*, u32 section_id);
u32 (*pos)(MCEmitter*);
@@ -567,6 +579,19 @@ struct MCEmitter {
void (*emit_reloc_at)(MCEmitter*, u32 section_id, u32 offset, RelocKind,
ObjSymId, i64 addend, int explicit_addend, int pair);
void (*emit_label_ref)(MCEmitter*, MCLabel, RelocKind, u32 width, i64 addend);
+
+ /* Emit a relocation at (data_sec, data_offset) that resolves at link
+ * time to the runtime address of `label` (an intra-function code label).
+ *
+ * The relocation is generated against the currently active function
+ * symbol (cur_func_sym) with addend = (label_offset_in_section -
+ * cur_func_start) + extra_addend. If `label` is already placed, the
+ * reloc is emitted immediately; otherwise it is queued and emitted at
+ * label_place time. Callers must have an active function (set by
+ * backend func_begin); panics otherwise. */
+ void (*emit_label_data_reloc)(MCEmitter*, u32 data_sec, u32 data_offset,
+ MCLabel label, RelocKind kind, u32 width,
+ i64 extra_addend);
void (*set_loc)(MCEmitter*, SrcLoc);
/* ---- CFI / unwind ----
@@ -589,6 +614,24 @@ struct MCEmitter {
void (*destroy)(MCEmitter*);
};
+typedef struct CGSwitchCase {
+ /* Bit pattern matched against the selector; interpreted using
+ * selector_type's width and signedness (signed comparison uses
+ * sign-extension to selector_type's width). */
+ u64 value;
+ Label label;
+} CGSwitchCase;
+
+typedef struct CGSwitchDesc {
+ Operand selector; /* OPK_REG or OPK_IMM */
+ CfreeCgTypeId selector_type;
+ Label default_label; /* LABEL_NONE means "fall through past the switch" */
+ const CGSwitchCase* cases;
+ u32 ncases;
+ u8 hint; /* CfreeCgSwitchHint */
+ u8 pad[3];
+} CGSwitchDesc;
+
typedef struct CGTarget CGTarget;
struct CGTarget {
/* Typed IR lowering context. Subclasses extend. */
@@ -693,25 +736,57 @@ struct CGTarget {
/* ---- labels and control flow ---- */
Label (*label_new)(CGTarget*);
void (*label_place)(CGTarget*, Label);
+ /* Translate a CGTarget-visible Label to the underlying MCEmitter
+ * MCLabel id. For direct CG backends Label IS the MCLabel id so this
+ * is an identity function (optional — NULL is treated as identity).
+ * The opt wrapper overrides this to look up the IR block's
+ * pre-allocated MCLabel; cfree_cg_data_label_addr needs the stable
+ * MCLabel id at IR-recording time, before opt has built its
+ * per-function label_map. */
+ MCLabel (*cg_label_to_mc_label)(CGTarget*, Label);
void (*jump)(CGTarget*, Label);
/* Fused compare-and-branch. cg's preferred form: avoids materializing 0/1
* for a normal `if (a < b)`. For an arbitrary i1 in a register, callers
* synthesize cmp_branch(CMP_NE, val, IMM_ZERO, label). */
void (*cmp_branch)(CGTarget*, CmpOp, Operand a, Operand b, Label);
- /* Switch dispatch. Optional: if NULL, cg falls back to a chain of
- * cmp_branch calls + jump-to-default — the same lowering native arches
- * have always used. The C-source target overrides this to emit
- * `switch (val) { case V: goto L_V; … default: goto L_def; }` so the
- * host C compiler picks the best lowering (jump table / branch tree).
- * `values[i]` is the constant the case matches; `labels[i]` is where
- * to branch when it does. Both arrays have length `ncases`. For
- * jump-table-shaped use (wasm br_table, direct threading) frontends
- * pass dense values 0..ncases-1 and a non-NONE default_label; the
- * arch backend can detect the dense shape and emit a real table. */
- void (*switch_branch)(CGTarget*, Operand selector, const Label* labels,
- const u64* values, u32 ncases, Label default_label,
- u8 hint /* CfreeCgSwitchHint */);
+ /* Structured switch dispatch.
+ *
+ * Optional: when NULL, cg's shared `cg_lower_switch_default` runs and
+ * lowers in terms of cmp_branch / jump / indirect_branch / data ops —
+ * the path every native arch uses. Backends override switch_ only when
+ * they can express the construct natively: the C-source target emits
+ * `switch (val) { case V: goto L_V; ... default: goto L_def; }`; a
+ * future WASM target would emit `br_table`.
+ *
+ * The descriptor carries the full structured form (selector + paired
+ * cases + default + frontend hint); density policy lives in
+ * cg_lower_switch_default. */
+ void (*switch_)(CGTarget*, const CGSwitchDesc*);
+
+ /* Indirect branch primitive: transfer control to the address in
+ * `addr_reg` (an OPK_REG holding a function-local label address).
+ *
+ * Required on every native arch and used by:
+ * - cfree_cg_computed_goto for direct-threaded dispatch
+ * - opt-level jump-table lowerings of IR_SWITCH (when implemented)
+ *
+ * `valid_targets[0..ntargets)` is the closed set of labels the address
+ * can resolve to. Backends use it for branch-target hardening (BTI,
+ * PAC, x86 CFG, IBT) and opt uses it to build the CFG; opt requires
+ * ntargets > 0. */
+ void (*indirect_branch)(CGTarget*, Operand addr_reg,
+ const Label* valid_targets, u32 ntargets);
+
+ /* Materialize the runtime address of a function-local label into
+ * `dst_reg`. The label must already exist (label_new); it does not
+ * need to be placed yet. Backends emit the arch's PC-relative load:
+ * x86_64 `lea L(%rip), %r`, aarch64 `adr X, L`, riscv `auipc/addi`.
+ *
+ * The resulting pointer is a function-local label address (per the
+ * public cfree_cg_push_label_addr contract) and must only be consumed
+ * by indirect_branch inside the defining function's activation. */
+ void (*load_label_addr)(CGTarget*, Operand dst_reg, Label label);
/* ---- structured control flow ----
* Mirrors CG's scope ops. CG passes explicit break/continue targets so C
@@ -876,10 +951,27 @@ struct CGTarget {
void (*destroy)(CGTarget*);
};
+/* Shared switch lowering. cg's cfree_cg_switch installs this as the
+ * default target->switch_ behavior; opt's pass_emit calls it when
+ * replaying IR_SWITCH against a backend that doesn't override switch_.
+ * Emits a cmp-and-branch chain over (target->cmp_branch + target->jump)
+ * — fast at -O0 and the input shape an opt-level jump-table rewrite
+ * starts from. */
+void cg_lower_switch_default(CGTarget* t, const CGSwitchDesc* desc);
+
/* Construct the right target/emitter pair for c->target. */
MCEmitter* mc_new(Compiler*, ObjBuilder*);
void mc_free(MCEmitter*);
+/* Per-function context helpers. Backends call mc_begin_function from
+ * their CGTarget func_begin (after computing the post-alignment function
+ * start) and mc_end_function from func_end. The pair sets / clears
+ * MCEmitter.cur_func_* — the metadata that emit_label_data_reloc reads
+ * to resolve deferred intra-function label fixups in data sections. */
+void mc_begin_function(MCEmitter*, ObjSymId sym, u32 section_id,
+ u32 start_offset);
+void mc_end_function(MCEmitter*);
+
CGTarget* cgtarget_new(Compiler*, ObjBuilder*, MCEmitter*);
void cgtarget_finalize(CGTarget*);
void cgtarget_free(CGTarget*);
diff --git a/src/arch/c_target/emit.c b/src/arch/c_target/emit.c
@@ -1569,27 +1569,26 @@ static void c_emit_case_value(CTarget* t, CfreeCgTypeId sel_ty, u64 v) {
cbuf_puts(&t->body, ":");
}
-void c_switch_branch(CGTarget* T, Operand selector, const Label* labels,
- const u64* values, u32 ncases, Label default_label,
- u8 hint) {
+void c_switch_(CGTarget* T, const CGSwitchDesc* d) {
CTarget* t = (CTarget*)T;
- (void)hint; /* gcc/clang ignore strategy hints and pick their own. */
+ /* gcc/clang ignore strategy hints and pick their own dispatch shape. */
+ (void)d->hint;
if (t->last_was_terminator) return;
cbuf_puts(&t->body, " switch (");
- c_emit_operand(t, selector);
+ c_emit_operand(t, d->selector);
cbuf_puts(&t->body, ") {\n");
- for (u32 i = 0; i < ncases; ++i) {
+ for (u32 i = 0; i < d->ncases; ++i) {
char buf[24];
- c_label_name(labels[i], buf, sizeof buf);
- c_emit_case_value(t, selector.type, values[i]);
+ c_label_name(d->cases[i].label, buf, sizeof buf);
+ c_emit_case_value(t, d->selector.type, d->cases[i].value);
cbuf_puts(&t->body, " goto ");
cbuf_puts(&t->body, buf);
cbuf_puts(&t->body, ";\n");
}
cbuf_puts(&t->body, " default: ");
- if (default_label != (Label)LABEL_NONE) {
+ if (d->default_label != (Label)LABEL_NONE) {
char buf[24];
- c_label_name(default_label, buf, sizeof buf);
+ c_label_name(d->default_label, buf, sizeof buf);
cbuf_puts(&t->body, "goto ");
cbuf_puts(&t->body, buf);
cbuf_puts(&t->body, ";\n");
@@ -1606,6 +1605,38 @@ void c_switch_branch(CGTarget* T, Operand selector, const Label* labels,
t->last_was_terminator = 1;
}
+/* ===== load_label_addr / indirect_branch =====
+ * GCC computed-goto extension: `&&L` is the address of label L within
+ * the current function, and `goto *p;` jumps to such an address. This
+ * is the lowering every cc1-like backend uses (and what the toy
+ * frontend ultimately compiles to via the C target). */
+void c_load_label_addr(CGTarget* T, Operand dst, Label l) {
+ CTarget* t = (CTarget*)T;
+ char buf[24];
+ if (dst.kind != OPK_REG) {
+ compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0},
+ "C target: load_label_addr dst must be REG");
+ }
+ c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls);
+ c_emit_reg_assign_open(t, dst.v.reg, (CfreeCgTypeId)0);
+ cbuf_puts(&t->body, "(void*)&&");
+ c_label_name(l, buf, sizeof buf);
+ cbuf_puts(&t->body, buf);
+ c_emit_reg_assign_close(t);
+}
+
+void c_indirect_branch(CGTarget* T, Operand addr, const Label* valid_targets,
+ u32 ntargets) {
+ CTarget* t = (CTarget*)T;
+ (void)valid_targets;
+ (void)ntargets;
+ if (t->last_was_terminator) return;
+ cbuf_puts(&t->body, " goto *");
+ c_emit_operand(t, addr);
+ cbuf_puts(&t->body, ";\n");
+ t->last_was_terminator = 1;
+}
+
/* ===== local, local_addr ===== */
CGLocalStorage c_local(CGTarget* T, const CGLocalDesc* d) {
diff --git a/src/arch/c_target/target.c b/src/arch/c_target/target.c
@@ -36,8 +36,9 @@ Label c_label_new(CGTarget*);
void c_label_place(CGTarget*, Label);
void c_jump(CGTarget*, Label);
void c_cmp_branch(CGTarget*, CmpOp, Operand, Operand, Label);
-void c_switch_branch(CGTarget*, Operand, const Label*, const u64*, u32,
- Label, u8);
+void c_switch_(CGTarget*, const CGSwitchDesc*);
+void c_indirect_branch(CGTarget*, Operand, const Label*, u32);
+void c_load_label_addr(CGTarget*, Operand, Label);
CGScope c_scope_begin(CGTarget*, const CGScopeDesc*);
void c_scope_else(CGTarget*, CGScope);
void c_scope_end(CGTarget*, CGScope);
@@ -214,7 +215,9 @@ CGTarget* c_cgtarget_new(Compiler* c, ObjBuilder* o, CfreeWriter* w) {
t->label_place = c_label_place;
t->jump = c_jump;
t->cmp_branch = c_cmp_branch;
- t->switch_branch = c_switch_branch;
+ t->switch_ = c_switch_;
+ t->indirect_branch = c_indirect_branch;
+ t->load_label_addr = c_load_label_addr;
t->scope_begin = c_scope_begin;
t->scope_else = c_scope_else;
t->scope_end = c_scope_end;
diff --git a/src/arch/mc.c b/src/arch/mc.c
@@ -40,12 +40,30 @@ typedef struct MCFixup {
struct MCFixup* next;
} MCFixup;
+typedef struct MCDataLabelRef {
+ /* Where in the data section to write the relocation. */
+ u32 data_sec;
+ u32 data_offset;
+ RelocKind kind;
+ u32 width;
+ i64 extra_addend;
+ struct MCDataLabelRef* next;
+ /* func_sym + func_start are read from MCEmitter at label_place time
+ * (when the label's offset becomes known). Under -O1 the queue-time
+ * call comes during opt IR recording — before any backend func_begin
+ * has set cur_func_* — so capturing them here would be wrong. The
+ * label is always placed inside its owning function's emit, so the
+ * MCEmitter's current function tracks the right symbol at that
+ * moment. */
+} MCDataLabelRef;
+
typedef struct MCLabelInfo {
u8 placed;
u8 pad[3];
u32 sec_id;
u32 offset;
MCFixup* pending;
+ MCDataLabelRef* pending_data;
} MCLabelInfo;
typedef struct MCImpl {
@@ -74,6 +92,42 @@ static void labels_grow(MCImpl* mc, u32 want) {
mc->cap = ncap;
}
+static void emit_label_data_reloc_now(MCImpl* mc, const MCDataLabelRef* r,
+ u32 label_offset) {
+ i64 addend;
+ u8 bytes[8];
+ u32 i;
+ int big_endian;
+ if (mc->base.cur_func_sym == OBJ_SYM_NONE) {
+ compiler_panic(mc->base.c, mc->base.loc,
+ "MCEmitter: label-data reloc resolved outside a function");
+ }
+ addend =
+ (i64)label_offset - (i64)mc->base.cur_func_start + r->extra_addend;
+ /* Patch the inline addend into the data bytes. Object formats that
+ * carry the addend in the relocation record (ELF RELA) read both
+ * inline and r->addend; static link adds them. Mach-O R_ABS64
+ * (ARM64_RELOC_UNSIGNED) only reads the inline addend. Write the
+ * computed addend inline and pass 0 in the reloc so both formats
+ * resolve to the same runtime address. */
+ big_endian = mc->base.c->target.big_endian;
+ memset(bytes, 0, sizeof bytes);
+ for (i = 0; i < r->width && i < sizeof bytes; ++i) {
+ u32 shift = big_endian ? (r->width - 1u - i) * 8u : i * 8u;
+ bytes[i] = (u8)((u64)addend >> shift);
+ }
+ obj_patch(mc->base.obj, r->data_sec, r->data_offset, bytes, r->width);
+ /* Pass the addend in BOTH the inline data bytes AND the reloc record:
+ * - Mach-O ARM64_RELOC_UNSIGNED uses only the inline value (the .o
+ * emitter drops the record's addend for UNSIGNED).
+ * - ELF RELA and the JIT linker's link_reloc_apply use the record
+ * addend (the inline gets overwritten by S + A).
+ * Both paths converge on sym + addend at runtime. */
+ mc->base.emit_reloc_at(&mc->base, r->data_sec, r->data_offset, r->kind,
+ mc->base.cur_func_sym, addend,
+ /*explicit_addend=*/1, /*pair=*/0);
+}
+
static void apply_fixup(MCImpl* mc, const MCFixup* fx, u32 target_offset) {
/* signed displacement from end-of-instruction position to target. */
ArchLabelFixup desc;
@@ -86,6 +140,8 @@ static void apply_fixup(MCImpl* mc, const MCFixup* fx, u32 target_offset) {
desc.width = fx->width;
desc.kind = fx->kind;
desc.disp = (i64)target_offset - (i64)fx->offset + fx->addend;
+ desc.cur_func_sym = mc->base.cur_func_sym;
+ desc.cur_func_start = mc->base.cur_func_start;
arch = arch_for_compiler(mc->base.c);
if (!arch || !arch->apply_label_fixup ||
@@ -117,6 +173,7 @@ static MCLabel m_label_new(MCEmitter* m) {
li->sec_id = 0;
li->offset = 0;
li->pending = NULL;
+ li->pending_data = NULL;
return (MCLabel)id;
}
@@ -133,11 +190,19 @@ static void m_label_place(MCEmitter* m, MCLabel id) {
li->placed = 1;
li->sec_id = m->section_id;
li->offset = obj_pos(m->obj, m->section_id);
- /* Apply pending fixups. */
+ /* Apply pending intra-section fixups. */
for (MCFixup* fx = li->pending; fx; fx = fx->next) {
apply_fixup(mc, fx, li->offset);
}
li->pending = NULL;
+ /* Resolve any deferred data-section relocations referencing this label.
+ * MCEmitter's cur_func_sym/cur_func_start track the function whose
+ * body is currently being emitted; the label is always placed inside
+ * its owning function's emit, so the active function context matches. */
+ for (MCDataLabelRef* r = li->pending_data; r; r = r->next) {
+ emit_label_data_reloc_now(mc, r, li->offset);
+ }
+ li->pending_data = NULL;
}
static void m_emit_bytes(MCEmitter* m, const u8* data, size_t n) {
@@ -197,6 +262,38 @@ static void m_emit_label_ref(MCEmitter* m, MCLabel id, RelocKind kind,
}
}
+static void m_emit_label_data_reloc(MCEmitter* m, u32 data_sec, u32 data_offset,
+ MCLabel id, RelocKind kind, u32 width,
+ i64 extra_addend) {
+ MCImpl* mc = impl_of(m);
+ MCLabelInfo* li;
+ if (id == MC_LABEL_NONE || id >= mc->nlabels) {
+ compiler_panic(m->c, m->loc, "MCEmitter: bad label %u", (unsigned)id);
+ }
+ li = &mc->labels[id];
+ if (li->placed) {
+ MCDataLabelRef tmp;
+ tmp.data_sec = data_sec;
+ tmp.data_offset = data_offset;
+ tmp.kind = kind;
+ tmp.width = width;
+ tmp.extra_addend = extra_addend;
+ tmp.next = NULL;
+ emit_label_data_reloc_now(mc, &tmp, li->offset);
+ return;
+ }
+ {
+ MCDataLabelRef* r = arena_new(mc->arena, MCDataLabelRef);
+ r->data_sec = data_sec;
+ r->data_offset = data_offset;
+ r->kind = kind;
+ r->width = width;
+ r->extra_addend = extra_addend;
+ r->next = li->pending_data;
+ li->pending_data = r;
+ }
+}
+
static void m_set_loc(MCEmitter* m, SrcLoc loc) { m->loc = loc; }
/* CFI: buffered for .eh_frame / .debug_frame emission. v1 stores nothing
@@ -246,6 +343,9 @@ MCEmitter* mc_new(Compiler* c, ObjBuilder* o) {
base->c = c;
base->obj = o;
base->section_id = OBJ_SEC_NONE;
+ base->cur_func_sym = OBJ_SYM_NONE;
+ base->cur_func_section = 0;
+ base->cur_func_start = 0;
base->set_section = m_set_section;
base->pos = m_pos;
@@ -259,6 +359,7 @@ MCEmitter* mc_new(Compiler* c, ObjBuilder* o) {
base->emit_reloc = m_emit_reloc;
base->emit_reloc_at = m_emit_reloc_at;
base->emit_label_ref = m_emit_label_ref;
+ base->emit_label_data_reloc = m_emit_label_data_reloc;
base->set_loc = m_set_loc;
base->cfi_startproc = m_cfi_startproc;
@@ -285,3 +386,18 @@ void mc_free(MCEmitter* m) {
if (!m) return;
/* Arena-backed; nothing to free. */
}
+
+void mc_begin_function(MCEmitter* m, ObjSymId sym, u32 section_id,
+ u32 start_offset) {
+ if (!m) return;
+ m->cur_func_sym = sym;
+ m->cur_func_section = section_id;
+ m->cur_func_start = start_offset;
+}
+
+void mc_end_function(MCEmitter* m) {
+ if (!m) return;
+ m->cur_func_sym = OBJ_SYM_NONE;
+ m->cur_func_section = 0;
+ m->cur_func_start = 0;
+}
diff --git a/src/arch/rv64/alloc.c b/src/arch/rv64/alloc.c
@@ -272,6 +272,37 @@ void rv_jump(CGTarget* t, Label l) {
mc->emit_label_ref(mc, (MCLabel)l, R_RV_JAL, 4, 0);
}
+void rv_load_label_addr(CGTarget* t, Operand dst, Label l) {
+ /* AUIPC rd, %hi(L); ADDI rd, rd, %lo(L) — PC-relative pair fixed up
+ * via R_RV_INTRA_AUIPC_ADDI (width=8, addend=0 references the AUIPC
+ * site). */
+ MCEmitter* mc = t->mc;
+ u32 rd;
+ if (dst.kind != OPK_REG) {
+ compiler_panic(t->c, impl_of(t)->loc,
+ "rv64: load_label_addr dst must be REG");
+ }
+ rd = reg_num(dst);
+ rv64_emit32(mc, rv_auipc(rd, 0));
+ rv64_emit32(mc, rv_addi(rd, rd, 0));
+ mc->emit_label_ref(mc, (MCLabel)l, R_RV_INTRA_AUIPC_ADDI, 8, 0);
+}
+
+void rv_indirect_branch(CGTarget* t, Operand addr, const Label* targets,
+ u32 ntargets) {
+ /* JALR x0, rd, 0 — register-indirect jump (discards return address). */
+ MCEmitter* mc = t->mc;
+ u32 rs1;
+ (void)targets;
+ (void)ntargets;
+ if (addr.kind != OPK_REG) {
+ compiler_panic(t->c, impl_of(t)->loc,
+ "rv64: indirect_branch expects REG operand");
+ }
+ rs1 = reg_num(addr);
+ rv64_emit32(mc, rv_i(0, rs1, 0, RV_ZERO, RV_JALR));
+}
+
/* Force an integer Operand into a register; materializes IMM via scratch. */
u32 rv64_force_reg_int(CGTarget* t, Operand op, u32 scratch) {
if (op.kind == OPK_REG) return reg_num(op);
diff --git a/src/arch/rv64/arch.c b/src/arch/rv64/arch.c
@@ -52,6 +52,30 @@ static int rv64_apply_label_fixup(Compiler* c, const ArchLabelFixup* fx) {
word |= ((b >> 11) & 1u) << 20;
word |= ((b >> 12) & 0xffu) << 12;
break;
+ case R_RV_INTRA_AUIPC_ADDI: {
+ /* width=8: patch both the AUIPC at fx->offset and the ADDI at
+ * fx->offset+4. disp is the byte offset from the AUIPC PC to the
+ * target label. */
+ u8 cur2[4];
+ u32 word2;
+ i32 disp = (i32)fx->disp;
+ /* hi20 is the top 20 bits of (disp + 0x800) so the sign-extended
+ * 12-bit lo12 cancels out. */
+ u32 hi20 = (u32)((disp + 0x800) >> 12) & 0xfffffu;
+ u32 lo12 = (u32)disp & 0xfffu;
+ if (fx->width != 8) return 1;
+ /* AUIPC: keep rd (bits 11:7) and opcode (bits 6:0); patch imm[31:12]. */
+ word = (word & 0x00000fffu) | (hi20 << 12);
+ wr_u32_le(cur, word);
+ obj_patch(fx->obj, fx->sec_id, fx->offset, cur, 4);
+ buf_read(&s->bytes, fx->offset + 4, cur2, 4);
+ word2 = rd_u32_le(cur2);
+ /* ADDI: keep rs1/funct3/rd/opcode (bits 19:0); patch imm[11:0]. */
+ word2 = (word2 & 0x000fffffu) | (lo12 << 20);
+ wr_u32_le(cur2, word2);
+ obj_patch(fx->obj, fx->sec_id, fx->offset + 4, cur2, 4);
+ return 0;
+ }
default:
return 1;
}
diff --git a/src/arch/rv64/emit.c b/src/arch/rv64/emit.c
@@ -146,6 +146,7 @@ static void rv_func_begin_init(CGTarget *t, const CGFuncDesc *fd) {
a->fd = fd;
a->func_start = mc->pos(mc);
+ mc_begin_function(mc, fd->sym, fd->text_section_id, a->func_start);
a->next_param_int = 0;
a->next_param_fp = 0;
a->next_param_stack = 0;
@@ -562,5 +563,6 @@ finish:
debug_func_pc_range(t->debug, sec, a->func_start, end);
mc->cfi_endproc(mc);
+ mc_end_function(mc);
a->fd = NULL;
}
diff --git a/src/arch/rv64/internal.h b/src/arch/rv64/internal.h
@@ -147,6 +147,9 @@ void rv_reload_reg(CGTarget *t, Operand dst, FrameSlot slot, MemAccess ma);
Label rv_label_new(CGTarget *t);
void rv_label_place(CGTarget *t, Label l);
void rv_jump(CGTarget *t, Label l);
+void rv_load_label_addr(CGTarget *t, Operand dst, Label l);
+void rv_indirect_branch(CGTarget *t, Operand addr, const Label *targets,
+ u32 ntargets);
u32 rv64_force_reg_int(CGTarget *t, Operand op, u32 scratch);
void rv_cmp_branch(CGTarget *t, CmpOp op, Operand a_op, Operand b_op, Label l);
void rv_cmp(CGTarget *t, CmpOp op, Operand dst, Operand a_op, Operand b_op);
diff --git a/src/arch/rv64/ops.c b/src/arch/rv64/ops.c
@@ -2208,6 +2208,8 @@ CGTarget* rv64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) {
t->label_place = rv_label_place;
t->jump = rv_jump;
t->cmp_branch = rv_cmp_branch;
+ t->load_label_addr = rv_load_label_addr;
+ t->indirect_branch = rv_indirect_branch;
t->scope_begin = rv_scope_begin;
t->scope_else = rv_scope_else;
diff --git a/src/arch/x64/alloc.c b/src/arch/x64/alloc.c
@@ -273,6 +273,46 @@ void emit_jcc_label(MCEmitter* mc, u32 cc, MCLabel l) {
void x_jump(CGTarget* t, Label l) { emit_jmp_label(t->mc, (MCLabel)l); }
+void x_load_label_addr(CGTarget* t, Operand dst, Label l) {
+ /* lea %dst, [rip + disp32]
+ * REX.W + 0x8D /5 (mod=00 r/m=101 = RIP-relative)
+ * The disp32 is fixed up at label_place via R_PC32 with addend -4
+ * (because the PC is end-of-instruction). */
+ MCEmitter* mc = t->mc;
+ u32 dr = dst.v.reg & 0xFu;
+ emit_rex(mc, 1, dr, 0, 0);
+ u8 op = 0x8D;
+ mc->emit_bytes(mc, &op, 1);
+ u8 mr = modrm(0u, (dr & 7u), 5u);
+ mc->emit_bytes(mc, &mr, 1);
+ emit_u32le(mc, 0);
+ mc->emit_label_ref(mc, (MCLabel)l, R_PC32, 4, -4);
+}
+
+void x_indirect_branch(CGTarget* t, Operand addr, const Label* targets,
+ u32 ntargets) {
+ /* jmpq *%reg
+ * FF /4 with mod=11 r/m=reg */
+ MCEmitter* mc = t->mc;
+ u32 reg;
+ (void)targets;
+ (void)ntargets;
+ if (addr.kind != OPK_REG) {
+ compiler_panic(t->c, mc->loc,
+ "x64: indirect_branch expects REG operand");
+ }
+ reg = addr.v.reg & 0xFu;
+ /* REX.B if reg >= 8 (no REX.W needed for jmpq *) */
+ if (reg & 8u) {
+ u8 rex = 0x41;
+ mc->emit_bytes(mc, &rex, 1);
+ }
+ u8 op = 0xFF;
+ mc->emit_bytes(mc, &op, 1);
+ u8 mr = modrm(3u, 4u /* sub-opcode */, (reg & 7u));
+ mc->emit_bytes(mc, &mr, 1);
+}
+
static u32 cmp_to_cc(CmpOp op) {
switch (op) {
case CMP_EQ: return X64_CC_E;
diff --git a/src/arch/x64/emit.c b/src/arch/x64/emit.c
@@ -521,6 +521,7 @@ static void x_func_begin_init(CGTarget *t, const CGFuncDesc *fd) {
a->fd = fd;
a->func_start = mc->pos(mc);
+ mc_begin_function(mc, fd->sym, fd->text_section_id, a->func_start);
a->next_param_int = 0;
a->next_param_fp = 0;
a->next_param_stack = 0;
@@ -797,5 +798,6 @@ finish:
debug_func_pc_range(t->debug, a->fd->text_section_id, a->func_start, end);
mc->cfi_endproc(mc);
+ mc_end_function(mc);
a->fd = NULL;
}
diff --git a/src/arch/x64/internal.h b/src/arch/x64/internal.h
@@ -214,6 +214,9 @@ void emit_jmp_label(MCEmitter *mc, MCLabel l);
void emit_jcc_label(MCEmitter *mc, u32 cc, MCLabel l);
void x_jump(CGTarget *t, Label l);
void x_cmp_branch(CGTarget *t, CmpOp op, Operand a, Operand b, Label l);
+void x_load_label_addr(CGTarget *t, Operand dst, Label l);
+void x_indirect_branch(CGTarget *t, Operand addr, const Label *targets,
+ u32 ntargets);
void x_cmp(CGTarget *t, CmpOp op, Operand dst, Operand a, Operand b);
CGScope x_scope_begin(CGTarget *t, const CGScopeDesc *d);
void x_scope_else(CGTarget *t, CGScope s);
diff --git a/src/arch/x64/ops.c b/src/arch/x64/ops.c
@@ -2335,6 +2335,8 @@ CGTarget* x64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) {
t->label_place = x_label_place;
t->jump = x_jump;
t->cmp_branch = x_cmp_branch;
+ t->load_label_addr = x_load_label_addr;
+ t->indirect_branch = x_indirect_branch;
t->scope_begin = x_scope_begin;
t->scope_else = x_scope_else;
diff --git a/src/cg/control.c b/src/cg/control.c
@@ -72,53 +72,61 @@ void cfree_cg_branch_false(CfreeCg* g, CfreeCgLabel label) {
api_branch_if(g, &v, 0, (Label)label);
}
+void cg_lower_switch_default(CGTarget* t, const CGSwitchDesc* d) {
+ /* Cmp-and-branch chain: one cmp_branch per case, then jump to
+ * default (or fall through if LABEL_NONE). Fast to emit and the
+ * default policy at -O0. Density-driven jump-table conversion runs
+ * later as an opt-level rewrite over IR_SWITCH; the structured
+ * shape survives in IR until then.
+ *
+ * d->hint is currently advisory only here — cg does not rewrite
+ * into a jump table at lowering time, so JUMP_TABLE and
+ * BRANCH_CHAIN both produce the chain. */
+ for (u32 i = 0; i < d->ncases; ++i) {
+ Operand imm = api_op_imm((i64)d->cases[i].value, d->selector_type);
+ t->cmp_branch(t, CMP_EQ, d->selector, imm, d->cases[i].label);
+ }
+ if (d->default_label != LABEL_NONE) {
+ t->jump(t, d->default_label);
+ }
+}
+
void cfree_cg_switch(CfreeCg* g, CfreeCgSwitch sw) {
ApiSValue selector;
- CfreeCgTypeId ty;
- Operand sel;
+ CGSwitchDesc desc;
+ Heap* h;
+ CGSwitchCase* cases = NULL;
if (!g) return;
if (g->sp == 0) return;
api_local_const_control_boundary(g);
selector = api_pop(g);
- ty = resolve_type(g->c, sw.selector_type);
- if (!ty) ty = api_sv_type(&selector);
- sel = api_force_reg_unless_imm(g, &selector, ty);
- if (g->target->switch_branch) {
- /* Materialize parallel label[] / value[] arrays — the public-side
- * CfreeCgSwitchCase keeps them paired per-entry for caller
- * convenience, but the vtable wants them split so the target can
- * scan values for density and pick chain vs jump table without
- * per-element indirection. */
- Heap* h = g->c->ctx->heap;
- Label* labels = NULL;
- u64* values = NULL;
- if (sw.ncases) {
- labels = (Label*)h->alloc(h, sw.ncases * sizeof(Label), _Alignof(Label));
- values = (u64*)h->alloc(h, sw.ncases * sizeof(u64), _Alignof(u64));
- if (!labels || !values) {
- compiler_panic(g->c, g->cur_loc, "cfree_cg_switch: out of memory");
- }
- for (u32 i = 0; i < sw.ncases; ++i) {
- labels[i] = (Label)sw.cases[i].label;
- values[i] = sw.cases[i].value;
- }
- }
- g->target->switch_branch(g->target, sel, labels, values, sw.ncases,
- (Label)sw.default_label, (u8)sw.hint);
- if (labels) h->free(h, labels, sw.ncases * sizeof(Label));
- if (values) h->free(h, values, sw.ncases * sizeof(u64));
- } else {
- /* Default lowering: cmp-and-branch chain — same behaviour native
- * arches have always had. Targets that can do better (real C
- * `switch`, machine jump table) override `switch_branch`. */
+ memset(&desc, 0, sizeof desc);
+ desc.selector_type = resolve_type(g->c, sw.selector_type);
+ if (!desc.selector_type) desc.selector_type = api_sv_type(&selector);
+ desc.selector =
+ api_force_reg_unless_imm(g, &selector, desc.selector_type);
+ desc.default_label = (Label)sw.default_label;
+ desc.ncases = sw.ncases;
+ desc.hint = (u8)sw.hint;
+ if (sw.ncases) {
+ h = g->c->ctx->heap;
+ cases = (CGSwitchCase*)h->alloc(h, sw.ncases * sizeof(CGSwitchCase),
+ _Alignof(CGSwitchCase));
+ if (!cases) compiler_panic(g->c, g->cur_loc, "cfree_cg_switch: out of memory");
for (u32 i = 0; i < sw.ncases; ++i) {
- Operand imm = api_op_imm((i64)sw.cases[i].value, ty);
- g->target->cmp_branch(g->target, CMP_EQ, sel, imm,
- (Label)sw.cases[i].label);
- }
- if (sw.default_label != CFREE_CG_LABEL_NONE) {
- g->target->jump(g->target, (Label)sw.default_label);
+ cases[i].value = sw.cases[i].value;
+ cases[i].label = (Label)sw.cases[i].label;
}
+ desc.cases = cases;
+ }
+ if (g->target->switch_) {
+ g->target->switch_(g->target, &desc);
+ } else {
+ cg_lower_switch_default(g->target, &desc);
+ }
+ if (cases) {
+ h = g->c->ctx->heap;
+ h->free(h, cases, sw.ncases * sizeof(CGSwitchCase));
}
api_release(g, &selector);
}
@@ -126,10 +134,15 @@ void cfree_cg_switch(CfreeCg* g, CfreeCgSwitch sw) {
void cfree_cg_push_label_addr(CfreeCg* g, CfreeCgLabel label,
CfreeCgTypeId ptr_type) {
CfreeCgTypeId ty;
+ Reg r;
+ Operand dst;
if (!g) return;
ty = resolve_type(g->c, ptr_type);
- if (!ty) return;
- api_push(g, api_make_sv(api_op_imm((i64)label, ty), ty));
+ if (!ty) ty = cg_type_ptr_to(g->c, builtin_id(CFREE_CG_BUILTIN_VOID));
+ r = api_alloc_reg_or_spill(g, RC_INT, ty);
+ dst = api_op_reg(r, ty);
+ g->target->load_label_addr(g->target, dst, (Label)label);
+ api_push(g, api_make_sv(dst, ty));
}
void cfree_cg_computed_goto(CfreeCg* g, const CfreeCgLabel* valid_targets,
@@ -138,17 +151,18 @@ void cfree_cg_computed_goto(CfreeCg* g, const CfreeCgLabel* valid_targets,
CfreeCgTypeId target_ty;
Operand target_op;
if (!g) return;
+ if (!valid_targets || ntargets == 0) {
+ compiler_panic(g->c, g->cur_loc,
+ "cfree_cg_computed_goto: valid_targets must be non-empty");
+ return;
+ }
api_local_const_control_boundary(g);
target = api_pop(g);
target_ty = api_sv_type(&target);
target_op = api_force_reg(g, &target, target_ty);
- for (uint32_t i = 0; i < ntargets; ++i) {
- Operand imm = api_op_imm((i64)valid_targets[i], target_ty);
- g->target->cmp_branch(g->target, CMP_EQ, target_op, imm,
- (Label)valid_targets[i]);
- }
+ g->target->indirect_branch(g->target, target_op, (const Label*)valid_targets,
+ ntargets);
api_release(g, &target);
- g->target->intrinsic(g->target, INTRIN_UNREACHABLE, NULL, 0, NULL, 0);
}
void cfree_cg_unreachable(CfreeCg* g) {
diff --git a/src/cg/data.c b/src/cg/data.c
@@ -351,19 +351,68 @@ void cfree_cg_data_addr(CfreeCg* g, CfreeCgSym target, int64_t addend,
void cfree_cg_data_label_addr(CfreeCg* g, CfreeCgLabel target, int64_t addend,
uint32_t width, uint32_t address_space) {
u8 pad[8];
+ RelocKind rk;
+ u32 data_offset;
+ MCLabel ml;
(void)address_space;
- if (!g || !width || width > sizeof(pad)) return;
- memset(pad, 0, sizeof(pad));
- for (u32 i = 0; i < width; ++i) {
- u32 shift = g->c->target.big_endian ? (width - 1u - i) * 8u : i * 8u;
- pad[i] = (u8)(((uint64_t)target + (uint64_t)addend) >> shift);
+ if (!g) return;
+ if (!width || width > sizeof(pad)) {
+ compiler_panic(g->c, g->cur_loc,
+ "cfree_cg_data_label_addr: width must be 1..%u, got %u",
+ (unsigned)sizeof(pad), (unsigned)width);
+ return;
+ }
+ if (!g->mc) {
+ /* The C-source target has no MCEmitter and can't emit a relocation
+ * that resolves to an intra-function label address: GCC's `&&L`
+ * operator only works in function-local-static initializers, and
+ * the data path here writes to a file-scope C object. Silently
+ * writing zeros would produce a binary that jumps to NULL at
+ * runtime — fail loudly instead. Tests that intentionally exercise
+ * this can opt out via a `.cbackend.skip` file. */
+ compiler_panic(g->c, g->cur_loc,
+ "cfree_cg_data_label_addr: --emit=c (C-source target) "
+ "does not support intra-function label addresses in "
+ "data sections (GCC's &&L is only valid in "
+ "function-local-static initializers)");
+ return;
}
+ /* Resolve the cfree_cg_label to a stable MCLabel. Under direct CG
+ * the two ids coincide; under opt the wrapper pre-allocated an
+ * MCLabel at w_label_new time and stashed it on the IR block. */
+ if (g->target->cg_label_to_mc_label) {
+ ml = g->target->cg_label_to_mc_label(g->target, (Label)target);
+ } else {
+ ml = (MCLabel)target;
+ }
+ if (ml == MC_LABEL_NONE) {
+ compiler_panic(g->c, g->cur_loc,
+ "cfree_cg_data_label_addr: label has no MCLabel");
+ return;
+ }
+ rk = api_data_reloc_kind(/*pcrel=*/0, width);
+ if (rk == R_NONE) {
+ compiler_panic(g->c, g->cur_loc,
+ "cfree_cg_data_label_addr: unsupported width %u",
+ (unsigned)width);
+ return;
+ }
+ memset(pad, 0, sizeof pad);
if (g->data_tls_collect) {
- api_data_tls_write(g, pad, width);
+ compiler_panic(g->c, g->cur_loc,
+ "cfree_cg_data_label_addr: TLS label-address data is not "
+ "supported");
return;
}
+ data_offset = g->data_base + (u32)g->data_size;
obj_write(g->obj, g->data_sec, pad, width);
g->data_size += width;
+ /* The MCEmitter resolves this to obj_reloc(data_sec, data_offset, kind,
+ * cur_func_sym, label_offset - cur_func_start + addend) at label
+ * placement time. cur_func_sym/cur_func_start were set by the backend
+ * func_begin; at -O1 they're set when pass_emit replays the function. */
+ g->mc->emit_label_data_reloc(g->mc, g->data_sec, data_offset, ml, rk, width,
+ addend);
}
void cfree_cg_data_pcrel(CfreeCg* g, CfreeCgSym target, int64_t addend,
diff --git a/src/cg/internal.h b/src/cg/internal.h
@@ -266,6 +266,7 @@ void cfree_cg_push_label_addr(CfreeCg* g, CfreeCgLabel label,
CfreeCgTypeId ptr_type);
void cfree_cg_computed_goto(CfreeCg* g, const CfreeCgLabel* valid_targets,
uint32_t ntargets);
+
void cfree_cg_unreachable(CfreeCg* g);
CfreeCgScope api_scope_handle(u32 idx, u32 generation);
ApiCgScope* api_scope_from_handle(CfreeCg* g, CfreeCgScope scope,
diff --git a/src/obj/obj.h b/src/obj/obj.h
@@ -115,6 +115,10 @@ typedef enum RelocKind {
R_AARCH64_TSTBR14,
R_AARCH64_LD_PREL_LO19,
R_AARCH64_ADR_PREL_LO21,
+ /* MCEmitter-only function-local label address materialization. The fixup
+ * patches a fixed 16-byte sequence as either ADR+B+literal when in range,
+ * or LDR-literal+B+relocated-literal when the ADR range is exceeded. */
+ R_AARCH64_INTRA_LABEL_ADDR,
R_AARCH64_ADR_PREL_PG_HI21,
R_AARCH64_ADR_PREL_PG_HI21_NC,
R_AARCH64_ADD_ABS_LO12_NC,
@@ -194,6 +198,12 @@ typedef enum RelocKind {
R_RV_PCREL_HI20,
R_RV_PCREL_LO12_I,
R_RV_PCREL_LO12_S,
+ /* Intra-section label address materialization via an AUIPC+ADDI pair.
+ * Used only by MCEmitter intra-section label fixups (CGTarget
+ * load_label_addr). Width is 8 bytes, covering both instructions; the
+ * fixup site is the AUIPC and the disp is the label byte offset
+ * relative to the AUIPC site. */
+ R_RV_INTRA_AUIPC_ADDI,
R_RV_GOT_HI20,
R_RV_TPREL_HI20,
R_RV_TPREL_LO12_I,
diff --git a/src/opt/ir.c b/src/opt/ir.c
@@ -93,9 +93,26 @@ u32 ir_block_new(Func* f) {
b = &f->blocks[f->nblocks];
memset(b, 0, sizeof *b);
b->id = f->nblocks;
+ b->succ = arena_zarray(f->arena, u32, 2);
+ b->succ_cap = 2;
+ b->mc_label = MC_LABEL_NONE;
return f->nblocks++;
}
+void ir_block_set_nsucc(Func* f, u32 block, u32 n) {
+ Block* bl;
+ if (block >= f->nblocks) return;
+ bl = &f->blocks[block];
+ if (n > bl->succ_cap) {
+ u32* nb = arena_zarray(f->arena, u32, n);
+ if (bl->succ && bl->nsucc)
+ memcpy(nb, bl->succ, sizeof(u32) * bl->nsucc);
+ bl->succ = nb;
+ bl->succ_cap = n;
+ }
+ bl->nsucc = n;
+}
+
/* ---- emit order ---- */
void ir_note_emit(Func* f, u32 block) {
diff --git a/src/opt/ir.h b/src/opt/ir.h
@@ -58,6 +58,12 @@ typedef enum IROp {
IR_CONDBR, /* opnds[0] cond REG; succ[0] = true, succ[1] = false. */
IR_CMP_BRANCH, /* fused. opnds = [a, b]; extra.imm = CmpOp;
succ[0] = taken, succ[1] = fallthrough. */
+ IR_SWITCH, /* multi-target structured switch. opnds[0] = selector;
+ extra.aux = IRSwitchAux; succ[0..ncases) = case
+ blocks, succ[ncases] = default block. */
+ IR_INDIRECT_BRANCH, /* opnds[0] = addr REG; extra.aux = IRIndirectAux.
+ succ[0..nvalid) = the valid target blocks. */
+ IR_LOAD_LABEL_ADDR, /* opnds[0] dst REG; extra.imm = target block id. */
IR_RET, /* extra.aux = IRRetAux* (NULL for void). */
IR_SCOPE_BEGIN, /* extra.aux = IRScopeAux. defs[0] = scope id Val. */
IR_SCOPE_ELSE, /* extra.imm = scope id (Val). */
@@ -96,6 +102,26 @@ typedef struct IRTlsAux {
i64 addend;
} IRTlsAux;
+typedef struct IRSwitchAuxCase {
+ u64 value;
+ u32 block; /* successor block id */
+} IRSwitchAuxCase;
+
+typedef struct IRSwitchAux {
+ CfreeCgTypeId selector_type;
+ IRSwitchAuxCase* cases;
+ u32 ncases;
+ u32 default_block; /* if no default, this is the post-switch fallthrough */
+ u8 has_default; /* 1 if frontend supplied an explicit default */
+ u8 hint; /* CfreeCgSwitchHint */
+ u8 pad[2];
+} IRSwitchAux;
+
+typedef struct IRIndirectAux {
+ u32* targets; /* successor block ids; same as Block.succ[0..ntargets) */
+ u32 ntargets;
+} IRIndirectAux;
+
typedef struct IRAggAux {
AggregateAccess access;
} IRAggAux;
@@ -263,11 +289,21 @@ typedef struct Block {
u32 ninsts, cap;
u32* preds;
u32 npreds;
- u32 succ[2];
- u8 nsucc;
- u8 loop_depth;
- u16 pad;
+ /* Variable-length successor list. ir_block_new arena-allocates a
+ * 2-slot array (large enough for ordinary terminators); ops with
+ * more successors (IR_SWITCH, IR_INDIRECT_BRANCH) reallocate via
+ * ir_block_set_nsucc before writing. nsucc names the prefix in use. */
+ u32* succ;
+ u32 nsucc;
+ u32 succ_cap;
+ u32 loop_depth;
u32 frequency;
+ /* MCLabel id pre-allocated by w_label_new for blocks created via
+ * cg_label_new. Stable from recording through pass_emit. Blocks
+ * created internally by opt (fallthroughs, scope-implicit blocks)
+ * leave this at MC_LABEL_NONE; pass_emit's ensure_label mints one
+ * on demand for those. */
+ MCLabel mc_label;
} Block;
typedef enum OptAllocKind {
@@ -383,6 +419,10 @@ void ir_ensure_val(Func*, Val, CfreeCgTypeId, u8 cls);
Inst* ir_emit(Func*, u32 block, IROp);
+/* Resize a block's successor array. Used by ops with >2 successors
+ * (IR_SWITCH, IR_INDIRECT_BRANCH). Always sets nsucc to n. */
+void ir_block_set_nsucc(Func*, u32 block, u32 n);
+
/* Append `block` to f->emit_order if not already present. Called by
* the wrapper whenever cur transitions to a block. */
void ir_note_emit(Func*, u32 block);
diff --git a/src/opt/opt.c b/src/opt/opt.c
@@ -534,7 +534,22 @@ static int w_resolve_reg_name(CGTarget* t, Sym name, Reg* out,
static Label w_label_new(CGTarget* t) {
OptImpl* o = impl_of(t);
- return (Label)ir_block_new(o->f);
+ u32 block = ir_block_new(o->f);
+ /* Pre-allocate an MCLabel id so frontend code that needs a stable
+ * MCLabel before pass_emit replays (cfree_cg_data_label_addr in
+ * particular) has one. pass_emit places it through the wrapped
+ * target's label_place during replay. */
+ if (o->target && o->target->mc) {
+ o->f->blocks[block].mc_label = o->target->mc->label_new(o->target->mc);
+ }
+ return (Label)block;
+}
+
+static MCLabel w_cg_label_to_mc_label(CGTarget* t, Label l) {
+ OptImpl* o = impl_of(t);
+ u32 block = (u32)l;
+ if (block >= o->f->nblocks) return MC_LABEL_NONE;
+ return o->f->blocks[block].mc_label;
}
static void w_label_place(CGTarget* t, Label l) {
@@ -580,6 +595,85 @@ static void w_cmp_branch(CGTarget* t, CmpOp op, Operand a, Operand b, Label l) {
set_cur(o, ft);
}
+static void w_switch_(CGTarget* t, const CGSwitchDesc* d) {
+ OptImpl* o = impl_of(t);
+ Inst* in = rec(o, IR_SWITCH);
+ IRSwitchAux* aux = arena_znew(o->f->arena, IRSwitchAux);
+ Operand sel = d->selector;
+ in->opnds = dup_opnds(o->f, &sel, 1);
+ in->nopnds = 1;
+ aux->selector_type = d->selector_type;
+ aux->ncases = d->ncases;
+ aux->hint = d->hint;
+ aux->cases = NULL;
+ if (d->ncases) {
+ aux->cases = arena_array(o->f->arena, IRSwitchAuxCase, d->ncases);
+ for (u32 i = 0; i < d->ncases; ++i) {
+ aux->cases[i].value = d->cases[i].value;
+ aux->cases[i].block = (u32)d->cases[i].label;
+ }
+ }
+ u32 cur = o->cur;
+ /* Default label may be LABEL_NONE meaning "fall through past the
+ * switch." Materialize a fresh post-switch block to land on in that
+ * case so the CFG still has a single block as default successor. */
+ u32 default_blk;
+ if (d->default_label != LABEL_NONE) {
+ aux->has_default = 1;
+ default_blk = (u32)d->default_label;
+ } else {
+ aux->has_default = 0;
+ default_blk = ir_block_new(o->f);
+ }
+ aux->default_block = default_blk;
+ in->extra.aux = aux;
+
+ ir_block_set_nsucc(o->f, cur, d->ncases + 1u);
+ Block* cb = &o->f->blocks[cur];
+ for (u32 i = 0; i < d->ncases; ++i) cb->succ[i] = (u32)d->cases[i].label;
+ cb->succ[d->ncases] = default_blk;
+ /* No-default fall-through: emit a fresh post-switch block as the
+ * continuation point. With an explicit default the next recorded
+ * instruction is unreachable until a label_place re-anchors cur. */
+ if (!aux->has_default) {
+ set_cur(o, default_blk);
+ } else {
+ after_terminator(o);
+ }
+}
+
+static void w_indirect_branch(CGTarget* t, Operand addr,
+ const Label* targets, u32 ntargets) {
+ OptImpl* o = impl_of(t);
+ Inst* in = rec(o, IR_INDIRECT_BRANCH);
+ IRIndirectAux* aux = arena_znew(o->f->arena, IRIndirectAux);
+ Operand a = addr;
+ in->opnds = dup_opnds(o->f, &a, 1);
+ in->nopnds = 1;
+ aux->ntargets = ntargets;
+ aux->targets = NULL;
+ if (ntargets) {
+ aux->targets = arena_array(o->f->arena, u32, ntargets);
+ for (u32 i = 0; i < ntargets; ++i) aux->targets[i] = (u32)targets[i];
+ }
+ in->extra.aux = aux;
+ u32 cur = o->cur;
+ ir_block_set_nsucc(o->f, cur, ntargets);
+ Block* cb = &o->f->blocks[cur];
+ for (u32 i = 0; i < ntargets; ++i) cb->succ[i] = (u32)targets[i];
+ after_terminator(o);
+}
+
+static void w_load_label_addr(CGTarget* t, Operand dst, Label l) {
+ OptImpl* o = impl_of(t);
+ ensure_operand(o->f, &dst);
+ Inst* in = rec(o, IR_LOAD_LABEL_ADDR);
+ in->opnds = dup_opnds(o->f, &dst, 1);
+ in->nopnds = 1;
+ in->extra.imm = (i64)(u32)l;
+ set_def(o->f, in, o->cur, (Val)dst.v.reg, dst.type);
+}
+
/* ---- structured scopes ---- */
static u32 scope_register(Func* f, Inst* in) {
@@ -1382,8 +1476,12 @@ CGTarget* opt_cgtarget_new(Compiler* c, CGTarget* target, int level) {
t->label_new = w_label_new;
t->label_place = w_label_place;
+ t->cg_label_to_mc_label = w_cg_label_to_mc_label;
t->jump = w_jump;
t->cmp_branch = w_cmp_branch;
+ t->switch_ = w_switch_;
+ t->indirect_branch = w_indirect_branch;
+ t->load_label_addr = w_load_label_addr;
t->scope_begin = w_scope_begin;
t->scope_else = w_scope_else;
diff --git a/src/opt/pass_cfg.c b/src/opt/pass_cfg.c
@@ -33,6 +33,8 @@ static int is_terminator(const Inst* in) {
case IR_BR:
case IR_CONDBR:
case IR_CMP_BRANCH:
+ case IR_SWITCH:
+ case IR_INDIRECT_BRANCH:
case IR_RET:
case IR_BREAK_TO:
case IR_CONTINUE_TO:
@@ -103,8 +105,6 @@ static void prune_unreachable(Func* f, const u8* reachable) {
bl->cap = 0;
bl->preds = NULL;
bl->npreds = 0;
- bl->succ[0] = 0;
- bl->succ[1] = 0;
bl->nsucc = 0;
}
@@ -158,6 +158,10 @@ void opt_build_cfg(Func* f) {
case IR_CMP_BRANCH:
bl->nsucc = 2;
break;
+ case IR_SWITCH:
+ case IR_INDIRECT_BRANCH:
+ /* nsucc was set by the recorder at emit time; trust it. */
+ break;
default:
break;
}
diff --git a/src/opt/pass_combine.c b/src/opt/pass_combine.c
@@ -132,6 +132,8 @@ static int inst_uses_phys_reg(const Inst* in, const Operand* r) {
}
case IR_CMP_BRANCH:
case IR_CONDBR:
+ case IR_SWITCH:
+ case IR_INDIRECT_BRANCH:
for (u32 i = 0; i < in->nopnds; ++i)
n += count_operand_phys_uses(&in->opnds[i], r);
break;
@@ -199,6 +201,7 @@ static int inst_defines_phys_reg(const Inst* in, const Operand* r) {
switch ((IROp)in->op) {
case IR_LOAD_IMM:
case IR_LOAD_CONST:
+ case IR_LOAD_LABEL_ADDR:
case IR_COPY:
case IR_LOAD:
case IR_ADDR_OF:
diff --git a/src/opt/pass_dce.c b/src/opt/pass_dce.c
@@ -20,6 +20,8 @@ int opt_inst_has_side_effect(Func* f, const Inst* in) {
case IR_BR:
case IR_CONDBR:
case IR_CMP_BRANCH:
+ case IR_SWITCH:
+ case IR_INDIRECT_BRANCH:
case IR_RET:
case IR_SCOPE_BEGIN:
case IR_SCOPE_ELSE:
diff --git a/src/opt/pass_emit.c b/src/opt/pass_emit.c
@@ -428,7 +428,16 @@ static void replay_planned_call(ReplayCtx* r, const IRCallAux* aux) {
static Label ensure_label(ReplayCtx* r, u32 b) {
if (b >= r->f->nblocks) return LABEL_NONE;
if (r->label_map[b] == LABEL_NONE) {
- r->label_map[b] = r->tgt->label_new(r->tgt);
+ /* If w_label_new pre-allocated an MCLabel during recording (so
+ * cfree_cg_data_label_addr could queue a deferred fixup against
+ * it), reuse it here so the place we emit lines up with the
+ * existing pending fixup list. */
+ Block* bl = &r->f->blocks[b];
+ if (bl->mc_label != MC_LABEL_NONE) {
+ r->label_map[b] = (Label)bl->mc_label;
+ } else {
+ r->label_map[b] = r->tgt->label_new(r->tgt);
+ }
}
return r->label_map[b];
}
@@ -593,6 +602,54 @@ static void replay_inst(ReplayCtx* r, u32 b, Inst* in) {
w->cmp_branch(w, (CmpOp)in->extra.imm, a, bo, taken);
break;
}
+ case IR_SWITCH: {
+ IRSwitchAux* aux = (IRSwitchAux*)in->extra.aux;
+ Operand sel = xlat_op(r, in->opnds[0]);
+ CGSwitchDesc d;
+ CGSwitchCase* cases = NULL;
+ memset(&d, 0, sizeof d);
+ d.selector = sel;
+ d.selector_type = aux->selector_type;
+ /* default_block is always a real successor block in the IR (the
+ * recorder synthesizes one for no-default switches). Replay must
+ * emit an explicit jump to it so fall-through layout assumptions
+ * don't depend on block placement. */
+ d.default_label = ensure_label(r, aux->default_block);
+ d.ncases = aux->ncases;
+ d.hint = aux->hint;
+ if (aux->ncases) {
+ cases = arena_array(r->f->arena, CGSwitchCase, aux->ncases);
+ for (u32 i = 0; i < aux->ncases; ++i) {
+ cases[i].value = aux->cases[i].value;
+ cases[i].label = ensure_label(r, aux->cases[i].block);
+ }
+ d.cases = cases;
+ }
+ if (w->switch_) {
+ w->switch_(w, &d);
+ } else {
+ cg_lower_switch_default(w, &d);
+ }
+ break;
+ }
+ case IR_INDIRECT_BRANCH: {
+ IRIndirectAux* aux = (IRIndirectAux*)in->extra.aux;
+ Operand addr = xlat_op(r, in->opnds[0]);
+ Label* labels = NULL;
+ if (aux->ntargets) {
+ labels = arena_array(r->f->arena, Label, aux->ntargets);
+ for (u32 i = 0; i < aux->ntargets; ++i)
+ labels[i] = ensure_label(r, aux->targets[i]);
+ }
+ w->indirect_branch(w, addr, labels, aux->ntargets);
+ break;
+ }
+ case IR_LOAD_LABEL_ADDR: {
+ Operand dst = xlat_op(r, in->opnds[0]);
+ Label l = ensure_label(r, (u32)in->extra.imm);
+ w->load_label_addr(w, dst, l);
+ break;
+ }
case IR_RET: {
IRRetAux* aux = (IRRetAux*)in->extra.aux;
if (!aux || !aux->present) {
diff --git a/src/opt/pass_hard_live.c b/src/opt/pass_hard_live.c
@@ -140,8 +140,13 @@ void opt_hard_inst_use_def(Func* f, const Inst* in, OptHardRegSet* use,
}
case IR_CMP_BRANCH:
case IR_CONDBR:
+ case IR_SWITCH:
+ case IR_INDIRECT_BRANCH:
for (u32 i = 0; i < in->nopnds; ++i) hard_use_operand(use, &in->opnds[i]);
break;
+ case IR_LOAD_LABEL_ADDR:
+ if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]);
+ break;
case IR_RET: {
IRRetAux* aux = (IRRetAux*)in->extra.aux;
if (aux && aux->present) hard_use_abivalue(use, &aux->val);
diff --git a/test/toy/cases/119_static_labeladdr_data.cbackend.skip b/test/toy/cases/119_static_labeladdr_data.cbackend.skip
@@ -0,0 +1 @@
+C target: toy's @[.static] lowers to a file-scope object, but GCC's &&L only works in function-local-static initializers
diff --git a/test/toy/cases/123_spec_demo.cbackend.skip b/test/toy/cases/123_spec_demo.cbackend.skip
@@ -0,0 +1 @@
+C target: toy's @[.static] lowers to a file-scope object, but GCC's &&L only works in function-local-static initializers