kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit f4e8879166746eed53379bf53e7012989dda8736
parent 1ad968848968d8f5138df151b65883bb9d9b0005
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Wed, 20 May 2026 19:09:33 -0700

cg: redesign switch / computed_goto / label-addr around structured IR

Replace the old optional switch_branch hook with a uniform vtable
split: switch_ is structured (CGSwitchDesc carries selector + cases +
default + hint), indirect_branch is the universal register-indirect
jump primitive, and load_label_addr materializes intra-function label
addresses via PC-relative emission per arch (x64 lea, aa64 adr, rv64
auipc+addi). Native arches get cg_lower_switch_default's shared
cmp-chain when they leave switch_ NULL; the C-source target keeps its
native `switch (x) { case V: goto L; }` lowering.

The opt layer records IR_SWITCH, IR_INDIRECT_BRANCH, and
IR_LOAD_LABEL_ADDR end-to-end so cross-function passes see the
structured construct; pass_emit replays them through the wrapped
backend. w_label_new pre-allocates a stable MCLabel per block so
cfree_cg_data_label_addr can queue a fixup that survives the
recording/replay split. Block.succ becomes variable-length to support
IR_SWITCH's N+1 successors. Hard-liveness/pass_combine learn about
the new ops so DCE no longer drops the load feeding indirect_branch.

cfree_cg_data_label_addr now emits a real cross-section relocation
against the enclosing function symbol with addend = (label_offset -
func_start), resolved at label_place time by a new
MCEmitter.emit_label_data_reloc that captures the current function
context (mc_begin_function / mc_end_function on every native
func_begin / func_end). The addend goes in both inline data bytes
(Mach-O ARM64_RELOC_UNSIGNED consumes inline) and the reloc record
(ELF RELA and the JIT's link_reloc_apply use the record) so all paths
converge on sym + addend.

Diagnostics: data_label_addr now panics with a precise message on
unsupported width, missing function context, or the C-source target
(which can't render &&L in file-scope static initializers — two toy
cases that exercise this are marked .cbackend.skip).

Tests: 732 pass / 0 fail / 5 skip across R, L, and C paths
(test/toy). test-cg-api, test-opt, test-isa, test-aa64-inline,
test-link, test-elf, test-ar, test-ar-driver, test-debug, test-dwarf
all pass.

Diffstat:
Minclude/cfree/cg.h | 6+++---
Msrc/arch/aa64/alloc.c | 41+++++++++++++++++++++++++++++++++++++++++
Msrc/arch/aa64/arch.c | 55+++++++++++++++++++++++++++++++++++++++++++++++++++++--
Msrc/arch/aa64/emit.c | 2++
Msrc/arch/arch.h | 118++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------
Msrc/arch/c_target/emit.c | 51+++++++++++++++++++++++++++++++++++++++++----------
Msrc/arch/c_target/target.c | 9++++++---
Msrc/arch/mc.c | 118++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Msrc/arch/rv64/alloc.c | 31+++++++++++++++++++++++++++++++
Msrc/arch/rv64/arch.c | 24++++++++++++++++++++++++
Msrc/arch/rv64/emit.c | 2++
Msrc/arch/rv64/internal.h | 3+++
Msrc/arch/rv64/ops.c | 2++
Msrc/arch/x64/alloc.c | 40++++++++++++++++++++++++++++++++++++++++
Msrc/arch/x64/emit.c | 2++
Msrc/arch/x64/internal.h | 3+++
Msrc/arch/x64/ops.c | 2++
Msrc/cg/control.c | 108+++++++++++++++++++++++++++++++++++++++++++++----------------------------------
Msrc/cg/data.c | 61+++++++++++++++++++++++++++++++++++++++++++++++++++++++------
Msrc/cg/internal.h | 1+
Msrc/obj/obj.h | 10++++++++++
Msrc/opt/ir.c | 17+++++++++++++++++
Msrc/opt/ir.h | 48++++++++++++++++++++++++++++++++++++++++++++----
Msrc/opt/opt.c | 100++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Msrc/opt/pass_cfg.c | 8++++++--
Msrc/opt/pass_combine.c | 3+++
Msrc/opt/pass_dce.c | 2++
Msrc/opt/pass_emit.c | 59++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Msrc/opt/pass_hard_live.c | 5+++++
Atest/toy/cases/119_static_labeladdr_data.cbackend.skip | 1+
Atest/toy/cases/123_spec_demo.cbackend.skip | 1+
31 files changed, 840 insertions(+), 93 deletions(-)

diff --git a/include/cfree/cg.h b/include/cfree/cg.h @@ -491,9 +491,9 @@ void cfree_cg_switch(CfreeCg*, CfreeCgSwitch sw); * dynamic activation and must not be called or dereferenced as data. */ void cfree_cg_push_label_addr(CfreeCg*, CfreeCgLabel, CfreeCgTypeId ptr_type); -/* Pops a label address and branches to it. valid_targets may be NULL when the - * frontend cannot enumerate them, but providing it lets targets validate and - * apply branch-protection lowering. */ +/* Pops a label address and branches to it. valid_targets must name the + * non-empty closed set of labels the target may resolve to; targets use it + * for validation, CFG construction, and branch-protection lowering. */ void cfree_cg_computed_goto(CfreeCg*, const CfreeCgLabel* valid_targets, uint32_t ntargets); diff --git a/src/arch/aa64/alloc.c b/src/arch/aa64/alloc.c @@ -90,6 +90,45 @@ void aa_jump(CGTarget* t, Label l) { mc->emit_label_ref(mc, (MCLabel)l, R_AARCH64_JUMP26, 4, 0); } +static void aa_emit_zero64(MCEmitter* mc) { + static const u8 zero[8] = {0}; + mc->emit_bytes(mc, zero, sizeof zero); +} + +static void aa_load_label_addr(CGTarget* t, Operand dst, Label l) { + /* Reserve: + * insn0: ADR Xdst, label (patched to LDR literal if out of range) + * insn1: B .+12 (skip the inline literal) + * lit: .quad label (relocated fallback target if needed) + * + * The MC fixup range-checks ADR at label placement. In-range labels use the + * first instruction; out-of-range labels use the relocated literal slot. */ + MCEmitter* mc = t->mc; + u32 rd; + if (dst.kind != OPK_REG) { + compiler_panic(t->c, mc->loc, + "aa64: load_label_addr dst must be REG"); + } + rd = reg_num(dst); + aa64_emit32(mc, aa64_adr(rd, 0u, 0u)); + aa64_emit32(mc, aa64_b_base() | 3u); + aa_emit_zero64(mc); + mc->emit_label_ref(mc, (MCLabel)l, R_AARCH64_INTRA_LABEL_ADDR, 16, 0); +} + +static void aa_indirect_branch(CGTarget* t, Operand addr, + const Label* targets, u32 ntargets) { + /* BR Xn — register-indirect branch (no fixup needed). */ + MCEmitter* mc = t->mc; + (void)targets; + (void)ntargets; + if (addr.kind != OPK_REG) { + compiler_panic(t->c, mc->loc, + "aa64: indirect_branch expects REG operand"); + } + aa64_emit32(mc, aa64_br(reg_num(addr))); +} + static u32 cmp_to_cond(CmpOp op) { switch (op) { case CMP_EQ: return 0x0u; @@ -255,6 +294,8 @@ void aa_alloc_vtable_init(CGTarget* t) { t->jump = aa_jump; t->cmp_branch = aa_cmp_branch; t->cmp = aa_cmp; + t->load_label_addr = aa_load_label_addr; + t->indirect_branch = aa_indirect_branch; t->scope_begin = aa_scope_begin; t->scope_else = aa_scope_else; diff --git a/src/arch/aa64/arch.c b/src/arch/aa64/arch.c @@ -4,6 +4,7 @@ #include "arch/aa64/aa64.h" #include "arch/aa64/asm.h" #include "arch/aa64/disasm.h" +#include "arch/aa64/isa.h" #include "arch/aa64/regs.h" #include "core/bytes.h" #include "link/link_arch.h" @@ -28,6 +29,14 @@ static int aa64_register_at_public(uint32_t idx, CfreeArchReg* out) { return aa64_register_iter_get(idx, &out->dwarf_idx, &out->name); } +static void aa64_wr_u64_target(Compiler* c, u8* p, u64 v) { + if (c && c->target.big_endian) { + for (u32 i = 0; i < 8; ++i) p[i] = (u8)(v >> ((7u - i) * 8u)); + } else { + wr_u64_le(p, v); + } +} + static const ArchElfOps aa64_elf_ops = { .e_machine = EM_AARCH64, .e_flags = 0, @@ -49,8 +58,9 @@ static int aa64_apply_label_fixup(Compiler* c, const ArchLabelFixup* fx) { u8 cur[4]; u32 word; - (void)c; - if (!fx || fx->width != 4) return 1; + if (!fx) return 1; + if (fx->kind != R_AARCH64_INTRA_LABEL_ADDR && fx->width != 4) return 1; + if (fx->kind == R_AARCH64_INTRA_LABEL_ADDR && fx->width != 16) return 1; s = obj_section_get(fx->obj, fx->sec_id); if (!s) return 0; buf_read(&s->bytes, fx->offset, cur, 4); @@ -70,6 +80,47 @@ static int aa64_apply_label_fixup(Compiler* c, const ArchLabelFixup* fx) { word = (word & ~(0x7ffffu << 5)) | (imm19 << 5); break; } + case R_AARCH64_ADR_PREL_LO21: { + /* ADR: imm21 (byte displacement) split into immlo (bits 30:29) + * and immhi (bits 23:5). */ + if (fx->disp < -(i64)(1 << 20) || fx->disp >= (i64)(1 << 20)) { + compiler_panic(c, (SrcLoc){0, 0, 0}, + "aarch64 label fixup: ADR target out of range " + "(need +/-1MiB)"); + } + u32 imm21 = (u32)(fx->disp & 0x1fffffu); + u32 immlo = imm21 & 0x3u; + u32 immhi = (imm21 >> 2) & 0x7ffffu; + word = (word & ~((0x3u << 29) | (0x7ffffu << 5))) | + (immlo << 29) | (immhi << 5); + break; + } + case R_AARCH64_INTRA_LABEL_ADDR: { + u32 rd = word & 0x1fu; + if (fx->disp >= -(i64)(1 << 20) && fx->disp < (i64)(1 << 20)) { + u32 imm21 = (u32)(fx->disp & 0x1fffffu); + u32 immlo = imm21 & 0x3u; + u32 immhi = (imm21 >> 2) & 0x7ffffu; + word = aa64_adr(rd, immlo, immhi); + } else { + u8 lit[8]; + i64 label_offset; + i64 addend; + if (fx->cur_func_sym == OBJ_SYM_NONE) { + compiler_panic(c, (SrcLoc){0, 0, 0}, + "aarch64 label fixup: wide label address resolved " + "outside a function"); + } + label_offset = (i64)fx->offset + fx->disp; + addend = label_offset - (i64)fx->cur_func_start; + word = 0x58000000u | (2u << 5) | rd; /* LDR Xt, [PC + 8] */ + aa64_wr_u64_target(c, lit, (u64)addend); + obj_patch(fx->obj, fx->sec_id, fx->offset + 8u, lit, sizeof lit); + obj_reloc_ex(fx->obj, fx->sec_id, fx->offset + 8u, R_ABS64, + fx->cur_func_sym, addend, 1, 0); + } + break; + } default: return 1; } diff --git a/src/arch/aa64/emit.c b/src/arch/aa64/emit.c @@ -93,6 +93,7 @@ static void aa_func_begin_init(CGTarget *t, const CGFuncDesc *fd) { a->fd = fd; a->func_start = mc->pos(mc); + mc_begin_function(mc, fd->sym, fd->text_section_id, a->func_start); a->next_param_int = 0; a->next_param_fp = 0; a->next_param_stack = 0; @@ -478,6 +479,7 @@ finish: debug_func_pc_range(t->debug, sec, a->func_start, end); mc->cfi_endproc(mc); + mc_end_function(mc); a->fd = NULL; } diff --git a/src/arch/arch.h b/src/arch/arch.h @@ -534,6 +534,8 @@ typedef struct ArchLabelFixup { u32 width; RelocKind kind; i64 disp; + ObjSymId cur_func_sym; + u32 cur_func_start; } ArchLabelFixup; typedef struct MCEmitter MCEmitter; @@ -554,6 +556,16 @@ struct MCEmitter { * §3.2 this is the backend's only new dependency on Debug. */ Debug* debug; + /* Currently active function. Backends manage these via the + * mc_begin_function / mc_end_function helpers from their func_begin / + * func_end once they've computed the post-alignment function start + * position. emit_label_data_reloc reads them to compute reloc + * addends that resolve to the runtime address of an intra-function + * label. */ + ObjSymId cur_func_sym; + u32 cur_func_section; + u32 cur_func_start; + void (*set_section)(MCEmitter*, u32 section_id); u32 (*pos)(MCEmitter*); @@ -567,6 +579,19 @@ struct MCEmitter { void (*emit_reloc_at)(MCEmitter*, u32 section_id, u32 offset, RelocKind, ObjSymId, i64 addend, int explicit_addend, int pair); void (*emit_label_ref)(MCEmitter*, MCLabel, RelocKind, u32 width, i64 addend); + + /* Emit a relocation at (data_sec, data_offset) that resolves at link + * time to the runtime address of `label` (an intra-function code label). + * + * The relocation is generated against the currently active function + * symbol (cur_func_sym) with addend = (label_offset_in_section - + * cur_func_start) + extra_addend. If `label` is already placed, the + * reloc is emitted immediately; otherwise it is queued and emitted at + * label_place time. Callers must have an active function (set by + * backend func_begin); panics otherwise. */ + void (*emit_label_data_reloc)(MCEmitter*, u32 data_sec, u32 data_offset, + MCLabel label, RelocKind kind, u32 width, + i64 extra_addend); void (*set_loc)(MCEmitter*, SrcLoc); /* ---- CFI / unwind ---- @@ -589,6 +614,24 @@ struct MCEmitter { void (*destroy)(MCEmitter*); }; +typedef struct CGSwitchCase { + /* Bit pattern matched against the selector; interpreted using + * selector_type's width and signedness (signed comparison uses + * sign-extension to selector_type's width). */ + u64 value; + Label label; +} CGSwitchCase; + +typedef struct CGSwitchDesc { + Operand selector; /* OPK_REG or OPK_IMM */ + CfreeCgTypeId selector_type; + Label default_label; /* LABEL_NONE means "fall through past the switch" */ + const CGSwitchCase* cases; + u32 ncases; + u8 hint; /* CfreeCgSwitchHint */ + u8 pad[3]; +} CGSwitchDesc; + typedef struct CGTarget CGTarget; struct CGTarget { /* Typed IR lowering context. Subclasses extend. */ @@ -693,25 +736,57 @@ struct CGTarget { /* ---- labels and control flow ---- */ Label (*label_new)(CGTarget*); void (*label_place)(CGTarget*, Label); + /* Translate a CGTarget-visible Label to the underlying MCEmitter + * MCLabel id. For direct CG backends Label IS the MCLabel id so this + * is an identity function (optional — NULL is treated as identity). + * The opt wrapper overrides this to look up the IR block's + * pre-allocated MCLabel; cfree_cg_data_label_addr needs the stable + * MCLabel id at IR-recording time, before opt has built its + * per-function label_map. */ + MCLabel (*cg_label_to_mc_label)(CGTarget*, Label); void (*jump)(CGTarget*, Label); /* Fused compare-and-branch. cg's preferred form: avoids materializing 0/1 * for a normal `if (a < b)`. For an arbitrary i1 in a register, callers * synthesize cmp_branch(CMP_NE, val, IMM_ZERO, label). */ void (*cmp_branch)(CGTarget*, CmpOp, Operand a, Operand b, Label); - /* Switch dispatch. Optional: if NULL, cg falls back to a chain of - * cmp_branch calls + jump-to-default — the same lowering native arches - * have always used. The C-source target overrides this to emit - * `switch (val) { case V: goto L_V; … default: goto L_def; }` so the - * host C compiler picks the best lowering (jump table / branch tree). - * `values[i]` is the constant the case matches; `labels[i]` is where - * to branch when it does. Both arrays have length `ncases`. For - * jump-table-shaped use (wasm br_table, direct threading) frontends - * pass dense values 0..ncases-1 and a non-NONE default_label; the - * arch backend can detect the dense shape and emit a real table. */ - void (*switch_branch)(CGTarget*, Operand selector, const Label* labels, - const u64* values, u32 ncases, Label default_label, - u8 hint /* CfreeCgSwitchHint */); + /* Structured switch dispatch. + * + * Optional: when NULL, cg's shared `cg_lower_switch_default` runs and + * lowers in terms of cmp_branch / jump / indirect_branch / data ops — + * the path every native arch uses. Backends override switch_ only when + * they can express the construct natively: the C-source target emits + * `switch (val) { case V: goto L_V; ... default: goto L_def; }`; a + * future WASM target would emit `br_table`. + * + * The descriptor carries the full structured form (selector + paired + * cases + default + frontend hint); density policy lives in + * cg_lower_switch_default. */ + void (*switch_)(CGTarget*, const CGSwitchDesc*); + + /* Indirect branch primitive: transfer control to the address in + * `addr_reg` (an OPK_REG holding a function-local label address). + * + * Required on every native arch and used by: + * - cfree_cg_computed_goto for direct-threaded dispatch + * - opt-level jump-table lowerings of IR_SWITCH (when implemented) + * + * `valid_targets[0..ntargets)` is the closed set of labels the address + * can resolve to. Backends use it for branch-target hardening (BTI, + * PAC, x86 CFG, IBT) and opt uses it to build the CFG; opt requires + * ntargets > 0. */ + void (*indirect_branch)(CGTarget*, Operand addr_reg, + const Label* valid_targets, u32 ntargets); + + /* Materialize the runtime address of a function-local label into + * `dst_reg`. The label must already exist (label_new); it does not + * need to be placed yet. Backends emit the arch's PC-relative load: + * x86_64 `lea L(%rip), %r`, aarch64 `adr X, L`, riscv `auipc/addi`. + * + * The resulting pointer is a function-local label address (per the + * public cfree_cg_push_label_addr contract) and must only be consumed + * by indirect_branch inside the defining function's activation. */ + void (*load_label_addr)(CGTarget*, Operand dst_reg, Label label); /* ---- structured control flow ---- * Mirrors CG's scope ops. CG passes explicit break/continue targets so C @@ -876,10 +951,27 @@ struct CGTarget { void (*destroy)(CGTarget*); }; +/* Shared switch lowering. cg's cfree_cg_switch installs this as the + * default target->switch_ behavior; opt's pass_emit calls it when + * replaying IR_SWITCH against a backend that doesn't override switch_. + * Emits a cmp-and-branch chain over (target->cmp_branch + target->jump) + * — fast at -O0 and the input shape an opt-level jump-table rewrite + * starts from. */ +void cg_lower_switch_default(CGTarget* t, const CGSwitchDesc* desc); + /* Construct the right target/emitter pair for c->target. */ MCEmitter* mc_new(Compiler*, ObjBuilder*); void mc_free(MCEmitter*); +/* Per-function context helpers. Backends call mc_begin_function from + * their CGTarget func_begin (after computing the post-alignment function + * start) and mc_end_function from func_end. The pair sets / clears + * MCEmitter.cur_func_* — the metadata that emit_label_data_reloc reads + * to resolve deferred intra-function label fixups in data sections. */ +void mc_begin_function(MCEmitter*, ObjSymId sym, u32 section_id, + u32 start_offset); +void mc_end_function(MCEmitter*); + CGTarget* cgtarget_new(Compiler*, ObjBuilder*, MCEmitter*); void cgtarget_finalize(CGTarget*); void cgtarget_free(CGTarget*); diff --git a/src/arch/c_target/emit.c b/src/arch/c_target/emit.c @@ -1569,27 +1569,26 @@ static void c_emit_case_value(CTarget* t, CfreeCgTypeId sel_ty, u64 v) { cbuf_puts(&t->body, ":"); } -void c_switch_branch(CGTarget* T, Operand selector, const Label* labels, - const u64* values, u32 ncases, Label default_label, - u8 hint) { +void c_switch_(CGTarget* T, const CGSwitchDesc* d) { CTarget* t = (CTarget*)T; - (void)hint; /* gcc/clang ignore strategy hints and pick their own. */ + /* gcc/clang ignore strategy hints and pick their own dispatch shape. */ + (void)d->hint; if (t->last_was_terminator) return; cbuf_puts(&t->body, " switch ("); - c_emit_operand(t, selector); + c_emit_operand(t, d->selector); cbuf_puts(&t->body, ") {\n"); - for (u32 i = 0; i < ncases; ++i) { + for (u32 i = 0; i < d->ncases; ++i) { char buf[24]; - c_label_name(labels[i], buf, sizeof buf); - c_emit_case_value(t, selector.type, values[i]); + c_label_name(d->cases[i].label, buf, sizeof buf); + c_emit_case_value(t, d->selector.type, d->cases[i].value); cbuf_puts(&t->body, " goto "); cbuf_puts(&t->body, buf); cbuf_puts(&t->body, ";\n"); } cbuf_puts(&t->body, " default: "); - if (default_label != (Label)LABEL_NONE) { + if (d->default_label != (Label)LABEL_NONE) { char buf[24]; - c_label_name(default_label, buf, sizeof buf); + c_label_name(d->default_label, buf, sizeof buf); cbuf_puts(&t->body, "goto "); cbuf_puts(&t->body, buf); cbuf_puts(&t->body, ";\n"); @@ -1606,6 +1605,38 @@ void c_switch_branch(CGTarget* T, Operand selector, const Label* labels, t->last_was_terminator = 1; } +/* ===== load_label_addr / indirect_branch ===== + * GCC computed-goto extension: `&&L` is the address of label L within + * the current function, and `goto *p;` jumps to such an address. This + * is the lowering every cc1-like backend uses (and what the toy + * frontend ultimately compiles to via the C target). */ +void c_load_label_addr(CGTarget* T, Operand dst, Label l) { + CTarget* t = (CTarget*)T; + char buf[24]; + if (dst.kind != OPK_REG) { + compiler_panic(t->c, t->cur_fn ? t->cur_fn->loc : (SrcLoc){0, 0, 0}, + "C target: load_label_addr dst must be REG"); + } + c_ensure_reg(t, dst.v.reg, dst.type, (RegClass)dst.cls); + c_emit_reg_assign_open(t, dst.v.reg, (CfreeCgTypeId)0); + cbuf_puts(&t->body, "(void*)&&"); + c_label_name(l, buf, sizeof buf); + cbuf_puts(&t->body, buf); + c_emit_reg_assign_close(t); +} + +void c_indirect_branch(CGTarget* T, Operand addr, const Label* valid_targets, + u32 ntargets) { + CTarget* t = (CTarget*)T; + (void)valid_targets; + (void)ntargets; + if (t->last_was_terminator) return; + cbuf_puts(&t->body, " goto *"); + c_emit_operand(t, addr); + cbuf_puts(&t->body, ";\n"); + t->last_was_terminator = 1; +} + /* ===== local, local_addr ===== */ CGLocalStorage c_local(CGTarget* T, const CGLocalDesc* d) { diff --git a/src/arch/c_target/target.c b/src/arch/c_target/target.c @@ -36,8 +36,9 @@ Label c_label_new(CGTarget*); void c_label_place(CGTarget*, Label); void c_jump(CGTarget*, Label); void c_cmp_branch(CGTarget*, CmpOp, Operand, Operand, Label); -void c_switch_branch(CGTarget*, Operand, const Label*, const u64*, u32, - Label, u8); +void c_switch_(CGTarget*, const CGSwitchDesc*); +void c_indirect_branch(CGTarget*, Operand, const Label*, u32); +void c_load_label_addr(CGTarget*, Operand, Label); CGScope c_scope_begin(CGTarget*, const CGScopeDesc*); void c_scope_else(CGTarget*, CGScope); void c_scope_end(CGTarget*, CGScope); @@ -214,7 +215,9 @@ CGTarget* c_cgtarget_new(Compiler* c, ObjBuilder* o, CfreeWriter* w) { t->label_place = c_label_place; t->jump = c_jump; t->cmp_branch = c_cmp_branch; - t->switch_branch = c_switch_branch; + t->switch_ = c_switch_; + t->indirect_branch = c_indirect_branch; + t->load_label_addr = c_load_label_addr; t->scope_begin = c_scope_begin; t->scope_else = c_scope_else; t->scope_end = c_scope_end; diff --git a/src/arch/mc.c b/src/arch/mc.c @@ -40,12 +40,30 @@ typedef struct MCFixup { struct MCFixup* next; } MCFixup; +typedef struct MCDataLabelRef { + /* Where in the data section to write the relocation. */ + u32 data_sec; + u32 data_offset; + RelocKind kind; + u32 width; + i64 extra_addend; + struct MCDataLabelRef* next; + /* func_sym + func_start are read from MCEmitter at label_place time + * (when the label's offset becomes known). Under -O1 the queue-time + * call comes during opt IR recording — before any backend func_begin + * has set cur_func_* — so capturing them here would be wrong. The + * label is always placed inside its owning function's emit, so the + * MCEmitter's current function tracks the right symbol at that + * moment. */ +} MCDataLabelRef; + typedef struct MCLabelInfo { u8 placed; u8 pad[3]; u32 sec_id; u32 offset; MCFixup* pending; + MCDataLabelRef* pending_data; } MCLabelInfo; typedef struct MCImpl { @@ -74,6 +92,42 @@ static void labels_grow(MCImpl* mc, u32 want) { mc->cap = ncap; } +static void emit_label_data_reloc_now(MCImpl* mc, const MCDataLabelRef* r, + u32 label_offset) { + i64 addend; + u8 bytes[8]; + u32 i; + int big_endian; + if (mc->base.cur_func_sym == OBJ_SYM_NONE) { + compiler_panic(mc->base.c, mc->base.loc, + "MCEmitter: label-data reloc resolved outside a function"); + } + addend = + (i64)label_offset - (i64)mc->base.cur_func_start + r->extra_addend; + /* Patch the inline addend into the data bytes. Object formats that + * carry the addend in the relocation record (ELF RELA) read both + * inline and r->addend; static link adds them. Mach-O R_ABS64 + * (ARM64_RELOC_UNSIGNED) only reads the inline addend. Write the + * computed addend inline and pass 0 in the reloc so both formats + * resolve to the same runtime address. */ + big_endian = mc->base.c->target.big_endian; + memset(bytes, 0, sizeof bytes); + for (i = 0; i < r->width && i < sizeof bytes; ++i) { + u32 shift = big_endian ? (r->width - 1u - i) * 8u : i * 8u; + bytes[i] = (u8)((u64)addend >> shift); + } + obj_patch(mc->base.obj, r->data_sec, r->data_offset, bytes, r->width); + /* Pass the addend in BOTH the inline data bytes AND the reloc record: + * - Mach-O ARM64_RELOC_UNSIGNED uses only the inline value (the .o + * emitter drops the record's addend for UNSIGNED). + * - ELF RELA and the JIT linker's link_reloc_apply use the record + * addend (the inline gets overwritten by S + A). + * Both paths converge on sym + addend at runtime. */ + mc->base.emit_reloc_at(&mc->base, r->data_sec, r->data_offset, r->kind, + mc->base.cur_func_sym, addend, + /*explicit_addend=*/1, /*pair=*/0); +} + static void apply_fixup(MCImpl* mc, const MCFixup* fx, u32 target_offset) { /* signed displacement from end-of-instruction position to target. */ ArchLabelFixup desc; @@ -86,6 +140,8 @@ static void apply_fixup(MCImpl* mc, const MCFixup* fx, u32 target_offset) { desc.width = fx->width; desc.kind = fx->kind; desc.disp = (i64)target_offset - (i64)fx->offset + fx->addend; + desc.cur_func_sym = mc->base.cur_func_sym; + desc.cur_func_start = mc->base.cur_func_start; arch = arch_for_compiler(mc->base.c); if (!arch || !arch->apply_label_fixup || @@ -117,6 +173,7 @@ static MCLabel m_label_new(MCEmitter* m) { li->sec_id = 0; li->offset = 0; li->pending = NULL; + li->pending_data = NULL; return (MCLabel)id; } @@ -133,11 +190,19 @@ static void m_label_place(MCEmitter* m, MCLabel id) { li->placed = 1; li->sec_id = m->section_id; li->offset = obj_pos(m->obj, m->section_id); - /* Apply pending fixups. */ + /* Apply pending intra-section fixups. */ for (MCFixup* fx = li->pending; fx; fx = fx->next) { apply_fixup(mc, fx, li->offset); } li->pending = NULL; + /* Resolve any deferred data-section relocations referencing this label. + * MCEmitter's cur_func_sym/cur_func_start track the function whose + * body is currently being emitted; the label is always placed inside + * its owning function's emit, so the active function context matches. */ + for (MCDataLabelRef* r = li->pending_data; r; r = r->next) { + emit_label_data_reloc_now(mc, r, li->offset); + } + li->pending_data = NULL; } static void m_emit_bytes(MCEmitter* m, const u8* data, size_t n) { @@ -197,6 +262,38 @@ static void m_emit_label_ref(MCEmitter* m, MCLabel id, RelocKind kind, } } +static void m_emit_label_data_reloc(MCEmitter* m, u32 data_sec, u32 data_offset, + MCLabel id, RelocKind kind, u32 width, + i64 extra_addend) { + MCImpl* mc = impl_of(m); + MCLabelInfo* li; + if (id == MC_LABEL_NONE || id >= mc->nlabels) { + compiler_panic(m->c, m->loc, "MCEmitter: bad label %u", (unsigned)id); + } + li = &mc->labels[id]; + if (li->placed) { + MCDataLabelRef tmp; + tmp.data_sec = data_sec; + tmp.data_offset = data_offset; + tmp.kind = kind; + tmp.width = width; + tmp.extra_addend = extra_addend; + tmp.next = NULL; + emit_label_data_reloc_now(mc, &tmp, li->offset); + return; + } + { + MCDataLabelRef* r = arena_new(mc->arena, MCDataLabelRef); + r->data_sec = data_sec; + r->data_offset = data_offset; + r->kind = kind; + r->width = width; + r->extra_addend = extra_addend; + r->next = li->pending_data; + li->pending_data = r; + } +} + static void m_set_loc(MCEmitter* m, SrcLoc loc) { m->loc = loc; } /* CFI: buffered for .eh_frame / .debug_frame emission. v1 stores nothing @@ -246,6 +343,9 @@ MCEmitter* mc_new(Compiler* c, ObjBuilder* o) { base->c = c; base->obj = o; base->section_id = OBJ_SEC_NONE; + base->cur_func_sym = OBJ_SYM_NONE; + base->cur_func_section = 0; + base->cur_func_start = 0; base->set_section = m_set_section; base->pos = m_pos; @@ -259,6 +359,7 @@ MCEmitter* mc_new(Compiler* c, ObjBuilder* o) { base->emit_reloc = m_emit_reloc; base->emit_reloc_at = m_emit_reloc_at; base->emit_label_ref = m_emit_label_ref; + base->emit_label_data_reloc = m_emit_label_data_reloc; base->set_loc = m_set_loc; base->cfi_startproc = m_cfi_startproc; @@ -285,3 +386,18 @@ void mc_free(MCEmitter* m) { if (!m) return; /* Arena-backed; nothing to free. */ } + +void mc_begin_function(MCEmitter* m, ObjSymId sym, u32 section_id, + u32 start_offset) { + if (!m) return; + m->cur_func_sym = sym; + m->cur_func_section = section_id; + m->cur_func_start = start_offset; +} + +void mc_end_function(MCEmitter* m) { + if (!m) return; + m->cur_func_sym = OBJ_SYM_NONE; + m->cur_func_section = 0; + m->cur_func_start = 0; +} diff --git a/src/arch/rv64/alloc.c b/src/arch/rv64/alloc.c @@ -272,6 +272,37 @@ void rv_jump(CGTarget* t, Label l) { mc->emit_label_ref(mc, (MCLabel)l, R_RV_JAL, 4, 0); } +void rv_load_label_addr(CGTarget* t, Operand dst, Label l) { + /* AUIPC rd, %hi(L); ADDI rd, rd, %lo(L) — PC-relative pair fixed up + * via R_RV_INTRA_AUIPC_ADDI (width=8, addend=0 references the AUIPC + * site). */ + MCEmitter* mc = t->mc; + u32 rd; + if (dst.kind != OPK_REG) { + compiler_panic(t->c, impl_of(t)->loc, + "rv64: load_label_addr dst must be REG"); + } + rd = reg_num(dst); + rv64_emit32(mc, rv_auipc(rd, 0)); + rv64_emit32(mc, rv_addi(rd, rd, 0)); + mc->emit_label_ref(mc, (MCLabel)l, R_RV_INTRA_AUIPC_ADDI, 8, 0); +} + +void rv_indirect_branch(CGTarget* t, Operand addr, const Label* targets, + u32 ntargets) { + /* JALR x0, rd, 0 — register-indirect jump (discards return address). */ + MCEmitter* mc = t->mc; + u32 rs1; + (void)targets; + (void)ntargets; + if (addr.kind != OPK_REG) { + compiler_panic(t->c, impl_of(t)->loc, + "rv64: indirect_branch expects REG operand"); + } + rs1 = reg_num(addr); + rv64_emit32(mc, rv_i(0, rs1, 0, RV_ZERO, RV_JALR)); +} + /* Force an integer Operand into a register; materializes IMM via scratch. */ u32 rv64_force_reg_int(CGTarget* t, Operand op, u32 scratch) { if (op.kind == OPK_REG) return reg_num(op); diff --git a/src/arch/rv64/arch.c b/src/arch/rv64/arch.c @@ -52,6 +52,30 @@ static int rv64_apply_label_fixup(Compiler* c, const ArchLabelFixup* fx) { word |= ((b >> 11) & 1u) << 20; word |= ((b >> 12) & 0xffu) << 12; break; + case R_RV_INTRA_AUIPC_ADDI: { + /* width=8: patch both the AUIPC at fx->offset and the ADDI at + * fx->offset+4. disp is the byte offset from the AUIPC PC to the + * target label. */ + u8 cur2[4]; + u32 word2; + i32 disp = (i32)fx->disp; + /* hi20 is the top 20 bits of (disp + 0x800) so the sign-extended + * 12-bit lo12 cancels out. */ + u32 hi20 = (u32)((disp + 0x800) >> 12) & 0xfffffu; + u32 lo12 = (u32)disp & 0xfffu; + if (fx->width != 8) return 1; + /* AUIPC: keep rd (bits 11:7) and opcode (bits 6:0); patch imm[31:12]. */ + word = (word & 0x00000fffu) | (hi20 << 12); + wr_u32_le(cur, word); + obj_patch(fx->obj, fx->sec_id, fx->offset, cur, 4); + buf_read(&s->bytes, fx->offset + 4, cur2, 4); + word2 = rd_u32_le(cur2); + /* ADDI: keep rs1/funct3/rd/opcode (bits 19:0); patch imm[11:0]. */ + word2 = (word2 & 0x000fffffu) | (lo12 << 20); + wr_u32_le(cur2, word2); + obj_patch(fx->obj, fx->sec_id, fx->offset + 4, cur2, 4); + return 0; + } default: return 1; } diff --git a/src/arch/rv64/emit.c b/src/arch/rv64/emit.c @@ -146,6 +146,7 @@ static void rv_func_begin_init(CGTarget *t, const CGFuncDesc *fd) { a->fd = fd; a->func_start = mc->pos(mc); + mc_begin_function(mc, fd->sym, fd->text_section_id, a->func_start); a->next_param_int = 0; a->next_param_fp = 0; a->next_param_stack = 0; @@ -562,5 +563,6 @@ finish: debug_func_pc_range(t->debug, sec, a->func_start, end); mc->cfi_endproc(mc); + mc_end_function(mc); a->fd = NULL; } diff --git a/src/arch/rv64/internal.h b/src/arch/rv64/internal.h @@ -147,6 +147,9 @@ void rv_reload_reg(CGTarget *t, Operand dst, FrameSlot slot, MemAccess ma); Label rv_label_new(CGTarget *t); void rv_label_place(CGTarget *t, Label l); void rv_jump(CGTarget *t, Label l); +void rv_load_label_addr(CGTarget *t, Operand dst, Label l); +void rv_indirect_branch(CGTarget *t, Operand addr, const Label *targets, + u32 ntargets); u32 rv64_force_reg_int(CGTarget *t, Operand op, u32 scratch); void rv_cmp_branch(CGTarget *t, CmpOp op, Operand a_op, Operand b_op, Label l); void rv_cmp(CGTarget *t, CmpOp op, Operand dst, Operand a_op, Operand b_op); diff --git a/src/arch/rv64/ops.c b/src/arch/rv64/ops.c @@ -2208,6 +2208,8 @@ CGTarget* rv64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) { t->label_place = rv_label_place; t->jump = rv_jump; t->cmp_branch = rv_cmp_branch; + t->load_label_addr = rv_load_label_addr; + t->indirect_branch = rv_indirect_branch; t->scope_begin = rv_scope_begin; t->scope_else = rv_scope_else; diff --git a/src/arch/x64/alloc.c b/src/arch/x64/alloc.c @@ -273,6 +273,46 @@ void emit_jcc_label(MCEmitter* mc, u32 cc, MCLabel l) { void x_jump(CGTarget* t, Label l) { emit_jmp_label(t->mc, (MCLabel)l); } +void x_load_label_addr(CGTarget* t, Operand dst, Label l) { + /* lea %dst, [rip + disp32] + * REX.W + 0x8D /5 (mod=00 r/m=101 = RIP-relative) + * The disp32 is fixed up at label_place via R_PC32 with addend -4 + * (because the PC is end-of-instruction). */ + MCEmitter* mc = t->mc; + u32 dr = dst.v.reg & 0xFu; + emit_rex(mc, 1, dr, 0, 0); + u8 op = 0x8D; + mc->emit_bytes(mc, &op, 1); + u8 mr = modrm(0u, (dr & 7u), 5u); + mc->emit_bytes(mc, &mr, 1); + emit_u32le(mc, 0); + mc->emit_label_ref(mc, (MCLabel)l, R_PC32, 4, -4); +} + +void x_indirect_branch(CGTarget* t, Operand addr, const Label* targets, + u32 ntargets) { + /* jmpq *%reg + * FF /4 with mod=11 r/m=reg */ + MCEmitter* mc = t->mc; + u32 reg; + (void)targets; + (void)ntargets; + if (addr.kind != OPK_REG) { + compiler_panic(t->c, mc->loc, + "x64: indirect_branch expects REG operand"); + } + reg = addr.v.reg & 0xFu; + /* REX.B if reg >= 8 (no REX.W needed for jmpq *) */ + if (reg & 8u) { + u8 rex = 0x41; + mc->emit_bytes(mc, &rex, 1); + } + u8 op = 0xFF; + mc->emit_bytes(mc, &op, 1); + u8 mr = modrm(3u, 4u /* sub-opcode */, (reg & 7u)); + mc->emit_bytes(mc, &mr, 1); +} + static u32 cmp_to_cc(CmpOp op) { switch (op) { case CMP_EQ: return X64_CC_E; diff --git a/src/arch/x64/emit.c b/src/arch/x64/emit.c @@ -521,6 +521,7 @@ static void x_func_begin_init(CGTarget *t, const CGFuncDesc *fd) { a->fd = fd; a->func_start = mc->pos(mc); + mc_begin_function(mc, fd->sym, fd->text_section_id, a->func_start); a->next_param_int = 0; a->next_param_fp = 0; a->next_param_stack = 0; @@ -797,5 +798,6 @@ finish: debug_func_pc_range(t->debug, a->fd->text_section_id, a->func_start, end); mc->cfi_endproc(mc); + mc_end_function(mc); a->fd = NULL; } diff --git a/src/arch/x64/internal.h b/src/arch/x64/internal.h @@ -214,6 +214,9 @@ void emit_jmp_label(MCEmitter *mc, MCLabel l); void emit_jcc_label(MCEmitter *mc, u32 cc, MCLabel l); void x_jump(CGTarget *t, Label l); void x_cmp_branch(CGTarget *t, CmpOp op, Operand a, Operand b, Label l); +void x_load_label_addr(CGTarget *t, Operand dst, Label l); +void x_indirect_branch(CGTarget *t, Operand addr, const Label *targets, + u32 ntargets); void x_cmp(CGTarget *t, CmpOp op, Operand dst, Operand a, Operand b); CGScope x_scope_begin(CGTarget *t, const CGScopeDesc *d); void x_scope_else(CGTarget *t, CGScope s); diff --git a/src/arch/x64/ops.c b/src/arch/x64/ops.c @@ -2335,6 +2335,8 @@ CGTarget* x64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) { t->label_place = x_label_place; t->jump = x_jump; t->cmp_branch = x_cmp_branch; + t->load_label_addr = x_load_label_addr; + t->indirect_branch = x_indirect_branch; t->scope_begin = x_scope_begin; t->scope_else = x_scope_else; diff --git a/src/cg/control.c b/src/cg/control.c @@ -72,53 +72,61 @@ void cfree_cg_branch_false(CfreeCg* g, CfreeCgLabel label) { api_branch_if(g, &v, 0, (Label)label); } +void cg_lower_switch_default(CGTarget* t, const CGSwitchDesc* d) { + /* Cmp-and-branch chain: one cmp_branch per case, then jump to + * default (or fall through if LABEL_NONE). Fast to emit and the + * default policy at -O0. Density-driven jump-table conversion runs + * later as an opt-level rewrite over IR_SWITCH; the structured + * shape survives in IR until then. + * + * d->hint is currently advisory only here — cg does not rewrite + * into a jump table at lowering time, so JUMP_TABLE and + * BRANCH_CHAIN both produce the chain. */ + for (u32 i = 0; i < d->ncases; ++i) { + Operand imm = api_op_imm((i64)d->cases[i].value, d->selector_type); + t->cmp_branch(t, CMP_EQ, d->selector, imm, d->cases[i].label); + } + if (d->default_label != LABEL_NONE) { + t->jump(t, d->default_label); + } +} + void cfree_cg_switch(CfreeCg* g, CfreeCgSwitch sw) { ApiSValue selector; - CfreeCgTypeId ty; - Operand sel; + CGSwitchDesc desc; + Heap* h; + CGSwitchCase* cases = NULL; if (!g) return; if (g->sp == 0) return; api_local_const_control_boundary(g); selector = api_pop(g); - ty = resolve_type(g->c, sw.selector_type); - if (!ty) ty = api_sv_type(&selector); - sel = api_force_reg_unless_imm(g, &selector, ty); - if (g->target->switch_branch) { - /* Materialize parallel label[] / value[] arrays — the public-side - * CfreeCgSwitchCase keeps them paired per-entry for caller - * convenience, but the vtable wants them split so the target can - * scan values for density and pick chain vs jump table without - * per-element indirection. */ - Heap* h = g->c->ctx->heap; - Label* labels = NULL; - u64* values = NULL; - if (sw.ncases) { - labels = (Label*)h->alloc(h, sw.ncases * sizeof(Label), _Alignof(Label)); - values = (u64*)h->alloc(h, sw.ncases * sizeof(u64), _Alignof(u64)); - if (!labels || !values) { - compiler_panic(g->c, g->cur_loc, "cfree_cg_switch: out of memory"); - } - for (u32 i = 0; i < sw.ncases; ++i) { - labels[i] = (Label)sw.cases[i].label; - values[i] = sw.cases[i].value; - } - } - g->target->switch_branch(g->target, sel, labels, values, sw.ncases, - (Label)sw.default_label, (u8)sw.hint); - if (labels) h->free(h, labels, sw.ncases * sizeof(Label)); - if (values) h->free(h, values, sw.ncases * sizeof(u64)); - } else { - /* Default lowering: cmp-and-branch chain — same behaviour native - * arches have always had. Targets that can do better (real C - * `switch`, machine jump table) override `switch_branch`. */ + memset(&desc, 0, sizeof desc); + desc.selector_type = resolve_type(g->c, sw.selector_type); + if (!desc.selector_type) desc.selector_type = api_sv_type(&selector); + desc.selector = + api_force_reg_unless_imm(g, &selector, desc.selector_type); + desc.default_label = (Label)sw.default_label; + desc.ncases = sw.ncases; + desc.hint = (u8)sw.hint; + if (sw.ncases) { + h = g->c->ctx->heap; + cases = (CGSwitchCase*)h->alloc(h, sw.ncases * sizeof(CGSwitchCase), + _Alignof(CGSwitchCase)); + if (!cases) compiler_panic(g->c, g->cur_loc, "cfree_cg_switch: out of memory"); for (u32 i = 0; i < sw.ncases; ++i) { - Operand imm = api_op_imm((i64)sw.cases[i].value, ty); - g->target->cmp_branch(g->target, CMP_EQ, sel, imm, - (Label)sw.cases[i].label); - } - if (sw.default_label != CFREE_CG_LABEL_NONE) { - g->target->jump(g->target, (Label)sw.default_label); + cases[i].value = sw.cases[i].value; + cases[i].label = (Label)sw.cases[i].label; } + desc.cases = cases; + } + if (g->target->switch_) { + g->target->switch_(g->target, &desc); + } else { + cg_lower_switch_default(g->target, &desc); + } + if (cases) { + h = g->c->ctx->heap; + h->free(h, cases, sw.ncases * sizeof(CGSwitchCase)); } api_release(g, &selector); } @@ -126,10 +134,15 @@ void cfree_cg_switch(CfreeCg* g, CfreeCgSwitch sw) { void cfree_cg_push_label_addr(CfreeCg* g, CfreeCgLabel label, CfreeCgTypeId ptr_type) { CfreeCgTypeId ty; + Reg r; + Operand dst; if (!g) return; ty = resolve_type(g->c, ptr_type); - if (!ty) return; - api_push(g, api_make_sv(api_op_imm((i64)label, ty), ty)); + if (!ty) ty = cg_type_ptr_to(g->c, builtin_id(CFREE_CG_BUILTIN_VOID)); + r = api_alloc_reg_or_spill(g, RC_INT, ty); + dst = api_op_reg(r, ty); + g->target->load_label_addr(g->target, dst, (Label)label); + api_push(g, api_make_sv(dst, ty)); } void cfree_cg_computed_goto(CfreeCg* g, const CfreeCgLabel* valid_targets, @@ -138,17 +151,18 @@ void cfree_cg_computed_goto(CfreeCg* g, const CfreeCgLabel* valid_targets, CfreeCgTypeId target_ty; Operand target_op; if (!g) return; + if (!valid_targets || ntargets == 0) { + compiler_panic(g->c, g->cur_loc, + "cfree_cg_computed_goto: valid_targets must be non-empty"); + return; + } api_local_const_control_boundary(g); target = api_pop(g); target_ty = api_sv_type(&target); target_op = api_force_reg(g, &target, target_ty); - for (uint32_t i = 0; i < ntargets; ++i) { - Operand imm = api_op_imm((i64)valid_targets[i], target_ty); - g->target->cmp_branch(g->target, CMP_EQ, target_op, imm, - (Label)valid_targets[i]); - } + g->target->indirect_branch(g->target, target_op, (const Label*)valid_targets, + ntargets); api_release(g, &target); - g->target->intrinsic(g->target, INTRIN_UNREACHABLE, NULL, 0, NULL, 0); } void cfree_cg_unreachable(CfreeCg* g) { diff --git a/src/cg/data.c b/src/cg/data.c @@ -351,19 +351,68 @@ void cfree_cg_data_addr(CfreeCg* g, CfreeCgSym target, int64_t addend, void cfree_cg_data_label_addr(CfreeCg* g, CfreeCgLabel target, int64_t addend, uint32_t width, uint32_t address_space) { u8 pad[8]; + RelocKind rk; + u32 data_offset; + MCLabel ml; (void)address_space; - if (!g || !width || width > sizeof(pad)) return; - memset(pad, 0, sizeof(pad)); - for (u32 i = 0; i < width; ++i) { - u32 shift = g->c->target.big_endian ? (width - 1u - i) * 8u : i * 8u; - pad[i] = (u8)(((uint64_t)target + (uint64_t)addend) >> shift); + if (!g) return; + if (!width || width > sizeof(pad)) { + compiler_panic(g->c, g->cur_loc, + "cfree_cg_data_label_addr: width must be 1..%u, got %u", + (unsigned)sizeof(pad), (unsigned)width); + return; + } + if (!g->mc) { + /* The C-source target has no MCEmitter and can't emit a relocation + * that resolves to an intra-function label address: GCC's `&&L` + * operator only works in function-local-static initializers, and + * the data path here writes to a file-scope C object. Silently + * writing zeros would produce a binary that jumps to NULL at + * runtime — fail loudly instead. Tests that intentionally exercise + * this can opt out via a `.cbackend.skip` file. */ + compiler_panic(g->c, g->cur_loc, + "cfree_cg_data_label_addr: --emit=c (C-source target) " + "does not support intra-function label addresses in " + "data sections (GCC's &&L is only valid in " + "function-local-static initializers)"); + return; } + /* Resolve the cfree_cg_label to a stable MCLabel. Under direct CG + * the two ids coincide; under opt the wrapper pre-allocated an + * MCLabel at w_label_new time and stashed it on the IR block. */ + if (g->target->cg_label_to_mc_label) { + ml = g->target->cg_label_to_mc_label(g->target, (Label)target); + } else { + ml = (MCLabel)target; + } + if (ml == MC_LABEL_NONE) { + compiler_panic(g->c, g->cur_loc, + "cfree_cg_data_label_addr: label has no MCLabel"); + return; + } + rk = api_data_reloc_kind(/*pcrel=*/0, width); + if (rk == R_NONE) { + compiler_panic(g->c, g->cur_loc, + "cfree_cg_data_label_addr: unsupported width %u", + (unsigned)width); + return; + } + memset(pad, 0, sizeof pad); if (g->data_tls_collect) { - api_data_tls_write(g, pad, width); + compiler_panic(g->c, g->cur_loc, + "cfree_cg_data_label_addr: TLS label-address data is not " + "supported"); return; } + data_offset = g->data_base + (u32)g->data_size; obj_write(g->obj, g->data_sec, pad, width); g->data_size += width; + /* The MCEmitter resolves this to obj_reloc(data_sec, data_offset, kind, + * cur_func_sym, label_offset - cur_func_start + addend) at label + * placement time. cur_func_sym/cur_func_start were set by the backend + * func_begin; at -O1 they're set when pass_emit replays the function. */ + g->mc->emit_label_data_reloc(g->mc, g->data_sec, data_offset, ml, rk, width, + addend); } void cfree_cg_data_pcrel(CfreeCg* g, CfreeCgSym target, int64_t addend, diff --git a/src/cg/internal.h b/src/cg/internal.h @@ -266,6 +266,7 @@ void cfree_cg_push_label_addr(CfreeCg* g, CfreeCgLabel label, CfreeCgTypeId ptr_type); void cfree_cg_computed_goto(CfreeCg* g, const CfreeCgLabel* valid_targets, uint32_t ntargets); + void cfree_cg_unreachable(CfreeCg* g); CfreeCgScope api_scope_handle(u32 idx, u32 generation); ApiCgScope* api_scope_from_handle(CfreeCg* g, CfreeCgScope scope, diff --git a/src/obj/obj.h b/src/obj/obj.h @@ -115,6 +115,10 @@ typedef enum RelocKind { R_AARCH64_TSTBR14, R_AARCH64_LD_PREL_LO19, R_AARCH64_ADR_PREL_LO21, + /* MCEmitter-only function-local label address materialization. The fixup + * patches a fixed 16-byte sequence as either ADR+B+literal when in range, + * or LDR-literal+B+relocated-literal when the ADR range is exceeded. */ + R_AARCH64_INTRA_LABEL_ADDR, R_AARCH64_ADR_PREL_PG_HI21, R_AARCH64_ADR_PREL_PG_HI21_NC, R_AARCH64_ADD_ABS_LO12_NC, @@ -194,6 +198,12 @@ typedef enum RelocKind { R_RV_PCREL_HI20, R_RV_PCREL_LO12_I, R_RV_PCREL_LO12_S, + /* Intra-section label address materialization via an AUIPC+ADDI pair. + * Used only by MCEmitter intra-section label fixups (CGTarget + * load_label_addr). Width is 8 bytes, covering both instructions; the + * fixup site is the AUIPC and the disp is the label byte offset + * relative to the AUIPC site. */ + R_RV_INTRA_AUIPC_ADDI, R_RV_GOT_HI20, R_RV_TPREL_HI20, R_RV_TPREL_LO12_I, diff --git a/src/opt/ir.c b/src/opt/ir.c @@ -93,9 +93,26 @@ u32 ir_block_new(Func* f) { b = &f->blocks[f->nblocks]; memset(b, 0, sizeof *b); b->id = f->nblocks; + b->succ = arena_zarray(f->arena, u32, 2); + b->succ_cap = 2; + b->mc_label = MC_LABEL_NONE; return f->nblocks++; } +void ir_block_set_nsucc(Func* f, u32 block, u32 n) { + Block* bl; + if (block >= f->nblocks) return; + bl = &f->blocks[block]; + if (n > bl->succ_cap) { + u32* nb = arena_zarray(f->arena, u32, n); + if (bl->succ && bl->nsucc) + memcpy(nb, bl->succ, sizeof(u32) * bl->nsucc); + bl->succ = nb; + bl->succ_cap = n; + } + bl->nsucc = n; +} + /* ---- emit order ---- */ void ir_note_emit(Func* f, u32 block) { diff --git a/src/opt/ir.h b/src/opt/ir.h @@ -58,6 +58,12 @@ typedef enum IROp { IR_CONDBR, /* opnds[0] cond REG; succ[0] = true, succ[1] = false. */ IR_CMP_BRANCH, /* fused. opnds = [a, b]; extra.imm = CmpOp; succ[0] = taken, succ[1] = fallthrough. */ + IR_SWITCH, /* multi-target structured switch. opnds[0] = selector; + extra.aux = IRSwitchAux; succ[0..ncases) = case + blocks, succ[ncases] = default block. */ + IR_INDIRECT_BRANCH, /* opnds[0] = addr REG; extra.aux = IRIndirectAux. + succ[0..nvalid) = the valid target blocks. */ + IR_LOAD_LABEL_ADDR, /* opnds[0] dst REG; extra.imm = target block id. */ IR_RET, /* extra.aux = IRRetAux* (NULL for void). */ IR_SCOPE_BEGIN, /* extra.aux = IRScopeAux. defs[0] = scope id Val. */ IR_SCOPE_ELSE, /* extra.imm = scope id (Val). */ @@ -96,6 +102,26 @@ typedef struct IRTlsAux { i64 addend; } IRTlsAux; +typedef struct IRSwitchAuxCase { + u64 value; + u32 block; /* successor block id */ +} IRSwitchAuxCase; + +typedef struct IRSwitchAux { + CfreeCgTypeId selector_type; + IRSwitchAuxCase* cases; + u32 ncases; + u32 default_block; /* if no default, this is the post-switch fallthrough */ + u8 has_default; /* 1 if frontend supplied an explicit default */ + u8 hint; /* CfreeCgSwitchHint */ + u8 pad[2]; +} IRSwitchAux; + +typedef struct IRIndirectAux { + u32* targets; /* successor block ids; same as Block.succ[0..ntargets) */ + u32 ntargets; +} IRIndirectAux; + typedef struct IRAggAux { AggregateAccess access; } IRAggAux; @@ -263,11 +289,21 @@ typedef struct Block { u32 ninsts, cap; u32* preds; u32 npreds; - u32 succ[2]; - u8 nsucc; - u8 loop_depth; - u16 pad; + /* Variable-length successor list. ir_block_new arena-allocates a + * 2-slot array (large enough for ordinary terminators); ops with + * more successors (IR_SWITCH, IR_INDIRECT_BRANCH) reallocate via + * ir_block_set_nsucc before writing. nsucc names the prefix in use. */ + u32* succ; + u32 nsucc; + u32 succ_cap; + u32 loop_depth; u32 frequency; + /* MCLabel id pre-allocated by w_label_new for blocks created via + * cg_label_new. Stable from recording through pass_emit. Blocks + * created internally by opt (fallthroughs, scope-implicit blocks) + * leave this at MC_LABEL_NONE; pass_emit's ensure_label mints one + * on demand for those. */ + MCLabel mc_label; } Block; typedef enum OptAllocKind { @@ -383,6 +419,10 @@ void ir_ensure_val(Func*, Val, CfreeCgTypeId, u8 cls); Inst* ir_emit(Func*, u32 block, IROp); +/* Resize a block's successor array. Used by ops with >2 successors + * (IR_SWITCH, IR_INDIRECT_BRANCH). Always sets nsucc to n. */ +void ir_block_set_nsucc(Func*, u32 block, u32 n); + /* Append `block` to f->emit_order if not already present. Called by * the wrapper whenever cur transitions to a block. */ void ir_note_emit(Func*, u32 block); diff --git a/src/opt/opt.c b/src/opt/opt.c @@ -534,7 +534,22 @@ static int w_resolve_reg_name(CGTarget* t, Sym name, Reg* out, static Label w_label_new(CGTarget* t) { OptImpl* o = impl_of(t); - return (Label)ir_block_new(o->f); + u32 block = ir_block_new(o->f); + /* Pre-allocate an MCLabel id so frontend code that needs a stable + * MCLabel before pass_emit replays (cfree_cg_data_label_addr in + * particular) has one. pass_emit places it through the wrapped + * target's label_place during replay. */ + if (o->target && o->target->mc) { + o->f->blocks[block].mc_label = o->target->mc->label_new(o->target->mc); + } + return (Label)block; +} + +static MCLabel w_cg_label_to_mc_label(CGTarget* t, Label l) { + OptImpl* o = impl_of(t); + u32 block = (u32)l; + if (block >= o->f->nblocks) return MC_LABEL_NONE; + return o->f->blocks[block].mc_label; } static void w_label_place(CGTarget* t, Label l) { @@ -580,6 +595,85 @@ static void w_cmp_branch(CGTarget* t, CmpOp op, Operand a, Operand b, Label l) { set_cur(o, ft); } +static void w_switch_(CGTarget* t, const CGSwitchDesc* d) { + OptImpl* o = impl_of(t); + Inst* in = rec(o, IR_SWITCH); + IRSwitchAux* aux = arena_znew(o->f->arena, IRSwitchAux); + Operand sel = d->selector; + in->opnds = dup_opnds(o->f, &sel, 1); + in->nopnds = 1; + aux->selector_type = d->selector_type; + aux->ncases = d->ncases; + aux->hint = d->hint; + aux->cases = NULL; + if (d->ncases) { + aux->cases = arena_array(o->f->arena, IRSwitchAuxCase, d->ncases); + for (u32 i = 0; i < d->ncases; ++i) { + aux->cases[i].value = d->cases[i].value; + aux->cases[i].block = (u32)d->cases[i].label; + } + } + u32 cur = o->cur; + /* Default label may be LABEL_NONE meaning "fall through past the + * switch." Materialize a fresh post-switch block to land on in that + * case so the CFG still has a single block as default successor. */ + u32 default_blk; + if (d->default_label != LABEL_NONE) { + aux->has_default = 1; + default_blk = (u32)d->default_label; + } else { + aux->has_default = 0; + default_blk = ir_block_new(o->f); + } + aux->default_block = default_blk; + in->extra.aux = aux; + + ir_block_set_nsucc(o->f, cur, d->ncases + 1u); + Block* cb = &o->f->blocks[cur]; + for (u32 i = 0; i < d->ncases; ++i) cb->succ[i] = (u32)d->cases[i].label; + cb->succ[d->ncases] = default_blk; + /* No-default fall-through: emit a fresh post-switch block as the + * continuation point. With an explicit default the next recorded + * instruction is unreachable until a label_place re-anchors cur. */ + if (!aux->has_default) { + set_cur(o, default_blk); + } else { + after_terminator(o); + } +} + +static void w_indirect_branch(CGTarget* t, Operand addr, + const Label* targets, u32 ntargets) { + OptImpl* o = impl_of(t); + Inst* in = rec(o, IR_INDIRECT_BRANCH); + IRIndirectAux* aux = arena_znew(o->f->arena, IRIndirectAux); + Operand a = addr; + in->opnds = dup_opnds(o->f, &a, 1); + in->nopnds = 1; + aux->ntargets = ntargets; + aux->targets = NULL; + if (ntargets) { + aux->targets = arena_array(o->f->arena, u32, ntargets); + for (u32 i = 0; i < ntargets; ++i) aux->targets[i] = (u32)targets[i]; + } + in->extra.aux = aux; + u32 cur = o->cur; + ir_block_set_nsucc(o->f, cur, ntargets); + Block* cb = &o->f->blocks[cur]; + for (u32 i = 0; i < ntargets; ++i) cb->succ[i] = (u32)targets[i]; + after_terminator(o); +} + +static void w_load_label_addr(CGTarget* t, Operand dst, Label l) { + OptImpl* o = impl_of(t); + ensure_operand(o->f, &dst); + Inst* in = rec(o, IR_LOAD_LABEL_ADDR); + in->opnds = dup_opnds(o->f, &dst, 1); + in->nopnds = 1; + in->extra.imm = (i64)(u32)l; + set_def(o->f, in, o->cur, (Val)dst.v.reg, dst.type); +} + /* ---- structured scopes ---- */ static u32 scope_register(Func* f, Inst* in) { @@ -1382,8 +1476,12 @@ CGTarget* opt_cgtarget_new(Compiler* c, CGTarget* target, int level) { t->label_new = w_label_new; t->label_place = w_label_place; + t->cg_label_to_mc_label = w_cg_label_to_mc_label; t->jump = w_jump; t->cmp_branch = w_cmp_branch; + t->switch_ = w_switch_; + t->indirect_branch = w_indirect_branch; + t->load_label_addr = w_load_label_addr; t->scope_begin = w_scope_begin; t->scope_else = w_scope_else; diff --git a/src/opt/pass_cfg.c b/src/opt/pass_cfg.c @@ -33,6 +33,8 @@ static int is_terminator(const Inst* in) { case IR_BR: case IR_CONDBR: case IR_CMP_BRANCH: + case IR_SWITCH: + case IR_INDIRECT_BRANCH: case IR_RET: case IR_BREAK_TO: case IR_CONTINUE_TO: @@ -103,8 +105,6 @@ static void prune_unreachable(Func* f, const u8* reachable) { bl->cap = 0; bl->preds = NULL; bl->npreds = 0; - bl->succ[0] = 0; - bl->succ[1] = 0; bl->nsucc = 0; } @@ -158,6 +158,10 @@ void opt_build_cfg(Func* f) { case IR_CMP_BRANCH: bl->nsucc = 2; break; + case IR_SWITCH: + case IR_INDIRECT_BRANCH: + /* nsucc was set by the recorder at emit time; trust it. */ + break; default: break; } diff --git a/src/opt/pass_combine.c b/src/opt/pass_combine.c @@ -132,6 +132,8 @@ static int inst_uses_phys_reg(const Inst* in, const Operand* r) { } case IR_CMP_BRANCH: case IR_CONDBR: + case IR_SWITCH: + case IR_INDIRECT_BRANCH: for (u32 i = 0; i < in->nopnds; ++i) n += count_operand_phys_uses(&in->opnds[i], r); break; @@ -199,6 +201,7 @@ static int inst_defines_phys_reg(const Inst* in, const Operand* r) { switch ((IROp)in->op) { case IR_LOAD_IMM: case IR_LOAD_CONST: + case IR_LOAD_LABEL_ADDR: case IR_COPY: case IR_LOAD: case IR_ADDR_OF: diff --git a/src/opt/pass_dce.c b/src/opt/pass_dce.c @@ -20,6 +20,8 @@ int opt_inst_has_side_effect(Func* f, const Inst* in) { case IR_BR: case IR_CONDBR: case IR_CMP_BRANCH: + case IR_SWITCH: + case IR_INDIRECT_BRANCH: case IR_RET: case IR_SCOPE_BEGIN: case IR_SCOPE_ELSE: diff --git a/src/opt/pass_emit.c b/src/opt/pass_emit.c @@ -428,7 +428,16 @@ static void replay_planned_call(ReplayCtx* r, const IRCallAux* aux) { static Label ensure_label(ReplayCtx* r, u32 b) { if (b >= r->f->nblocks) return LABEL_NONE; if (r->label_map[b] == LABEL_NONE) { - r->label_map[b] = r->tgt->label_new(r->tgt); + /* If w_label_new pre-allocated an MCLabel during recording (so + * cfree_cg_data_label_addr could queue a deferred fixup against + * it), reuse it here so the place we emit lines up with the + * existing pending fixup list. */ + Block* bl = &r->f->blocks[b]; + if (bl->mc_label != MC_LABEL_NONE) { + r->label_map[b] = (Label)bl->mc_label; + } else { + r->label_map[b] = r->tgt->label_new(r->tgt); + } } return r->label_map[b]; } @@ -593,6 +602,54 @@ static void replay_inst(ReplayCtx* r, u32 b, Inst* in) { w->cmp_branch(w, (CmpOp)in->extra.imm, a, bo, taken); break; } + case IR_SWITCH: { + IRSwitchAux* aux = (IRSwitchAux*)in->extra.aux; + Operand sel = xlat_op(r, in->opnds[0]); + CGSwitchDesc d; + CGSwitchCase* cases = NULL; + memset(&d, 0, sizeof d); + d.selector = sel; + d.selector_type = aux->selector_type; + /* default_block is always a real successor block in the IR (the + * recorder synthesizes one for no-default switches). Replay must + * emit an explicit jump to it so fall-through layout assumptions + * don't depend on block placement. */ + d.default_label = ensure_label(r, aux->default_block); + d.ncases = aux->ncases; + d.hint = aux->hint; + if (aux->ncases) { + cases = arena_array(r->f->arena, CGSwitchCase, aux->ncases); + for (u32 i = 0; i < aux->ncases; ++i) { + cases[i].value = aux->cases[i].value; + cases[i].label = ensure_label(r, aux->cases[i].block); + } + d.cases = cases; + } + if (w->switch_) { + w->switch_(w, &d); + } else { + cg_lower_switch_default(w, &d); + } + break; + } + case IR_INDIRECT_BRANCH: { + IRIndirectAux* aux = (IRIndirectAux*)in->extra.aux; + Operand addr = xlat_op(r, in->opnds[0]); + Label* labels = NULL; + if (aux->ntargets) { + labels = arena_array(r->f->arena, Label, aux->ntargets); + for (u32 i = 0; i < aux->ntargets; ++i) + labels[i] = ensure_label(r, aux->targets[i]); + } + w->indirect_branch(w, addr, labels, aux->ntargets); + break; + } + case IR_LOAD_LABEL_ADDR: { + Operand dst = xlat_op(r, in->opnds[0]); + Label l = ensure_label(r, (u32)in->extra.imm); + w->load_label_addr(w, dst, l); + break; + } case IR_RET: { IRRetAux* aux = (IRRetAux*)in->extra.aux; if (!aux || !aux->present) { diff --git a/src/opt/pass_hard_live.c b/src/opt/pass_hard_live.c @@ -140,8 +140,13 @@ void opt_hard_inst_use_def(Func* f, const Inst* in, OptHardRegSet* use, } case IR_CMP_BRANCH: case IR_CONDBR: + case IR_SWITCH: + case IR_INDIRECT_BRANCH: for (u32 i = 0; i < in->nopnds; ++i) hard_use_operand(use, &in->opnds[i]); break; + case IR_LOAD_LABEL_ADDR: + if (in->nopnds >= 1) hard_def_operand(def, &in->opnds[0]); + break; case IR_RET: { IRRetAux* aux = (IRRetAux*)in->extra.aux; if (aux && aux->present) hard_use_abivalue(use, &aux->val); diff --git a/test/toy/cases/119_static_labeladdr_data.cbackend.skip b/test/toy/cases/119_static_labeladdr_data.cbackend.skip @@ -0,0 +1 @@ +C target: toy's @[.static] lowers to a file-scope object, but GCC's &&L only works in function-local-static initializers diff --git a/test/toy/cases/123_spec_demo.cbackend.skip b/test/toy/cases/123_spec_demo.cbackend.skip @@ -0,0 +1 @@ +C target: toy's @[.static] lowers to a file-scope object, but GCC's &&L only works in function-local-static initializers