commit d2ab4e9a9375513bde35c2c4ecb85ed3b33cf6d7
parent fa5bef9f094356eb6e7054031eff88aa0a4c98b9
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 11 May 2026 15:15:00 -0700
refactor: split six >2KLOC files into focused modules
Each split is pure code motion — no semantic changes. Public headers
unchanged; new private internal headers (arch/<arch>/internal.h,
parse_priv.h, pp_priv.h) host cross-file forward decls.
- src/arch/{x64,rv64,aarch64}/ each split into emit.c (encoding +
prologue/epilogue), alloc.c (regpool + labels + control flow), and
ops.c (load/store/binop/call/atomics/intrinsics + vtable). Helpers
promoted across files are prefixed per arch (x64_*, rv64_*, aa64_*)
to avoid link-time collisions.
- src/parse/parse.c into parse_type.c, parse_expr.c, parse_init.c,
parse_stmt.c + residual TU driver/lex/scope.
- src/link/link_layout.c into link_resolve.c (archives + symbols + GC),
link_reloc_layout.c (vaddr binding + GOT/iPLT/stubs/relocs), and
residual link_layout.c (section placement).
- src/pp/pp.c into pp_expand.c (hideset + macros + expansion) and
pp_directive.c (#if eval + #include/#line/#pragma/#error/#embed) +
residual source stack + lifecycle.
Dead aa_panic() helper removed (all callers were replaced with real
implementations in prior commits).
Diffstat:
| D | src/arch/aarch64.c | | | 3457 | ------------------------------------------------------------------------------- |
| A | src/arch/aarch64/alloc.c | | | 318 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/arch/aarch64/emit.c | | | 546 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/arch/aarch64/internal.h | | | 312 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/arch/aarch64/ops.c | | | 1895 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| D | src/arch/rv64.c | | | 2765 | ------------------------------------------------------------------------------- |
| A | src/arch/rv64/alloc.c | | | 394 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/arch/rv64/emit.c | | | 332 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/arch/rv64/internal.h | | | 222 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/arch/rv64/ops.c | | | 1840 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| D | src/arch/x64.c | | | 3075 | ------------------------------------------------------------------------------- |
| A | src/arch/x64/alloc.c | | | 378 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/arch/x64/emit.c | | | 647 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/arch/x64/internal.h | | | 257 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/arch/x64/ops.c | | | 1916 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| M | src/link/link_internal.h | | | 81 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- |
| M | src/link/link_layout.c | | | 2528 | ++++--------------------------------------------------------------------------- |
| A | src/link/link_reloc_layout.c | | | 1236 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/link/link_resolve.c | | | 597 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| M | src/parse/parse.c | | | 5946 | ++----------------------------------------------------------------------------- |
| A | src/parse/parse_expr.c | | | 1795 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/parse/parse_init.c | | | 808 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/parse/parse_priv.h | | | 431 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/parse/parse_stmt.c | | | 689 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/parse/parse_type.c | | | 1121 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| M | src/pp/pp.c | | | 2460 | +------------------------------------------------------------------------------ |
| A | src/pp/pp_directive.c | | | 1252 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/pp/pp_expand.c | | | 1008 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/pp/pp_priv.h | | | 278 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
29 files changed, 18546 insertions(+), 20038 deletions(-)
diff --git a/src/arch/aarch64.c b/src/arch/aarch64.c
@@ -1,3457 +0,0 @@
-/* Minimal AArch64 CGTarget.
- *
- * Single-pass codegen for the cg test corpus (Groups A, B, C). Frame
- * layout uses a fixed-size prologue placeholder patched at func_end so
- * frame_size and the callee-save register count are knowable when the
- * prologue is finally written. FP-relative (x29) addressing is used for
- * local slots and incoming stack args so that per-slot offsets can be
- * assigned at frame_slot() time without depending on the eventual
- * frame_size or callee-save count. SP-relative addressing is used for
- * outgoing stack args.
- *
- * Frame layout (low SP -> high):
- * outgoing args (max_outgoing bytes, 16-aligned)
- * int reg saves (n_int_pairs * 16) -- x19/x20, x21/x22, ...
- * fp reg saves (n_fp_pairs * 16) -- d8/d9, d10/d11, ...
- * local slots (cum_off bytes)
- * x29, x30 save (16 bytes) -- x29 = sp + frame_size - 16
- *
- * Single-pass register allocator: a free-mask pool per class hands out
- * the lowest free index. INT pool covers x19..x28 (10 callee-saves);
- * FP pool covers v8..v23, with v8..v15 callee-saved and v16..v23
- * caller-saved scratch — lowest-bit-first allocation prefers callee-
- * saves. Only the prefix actually used (high-water mark) is saved by
- * the prologue. Width derives from Operand.type via type_is_64. CG
- * drives spill/reload through alloc_reg returning REG_NONE on
- * exhaustion plus the spill_reg/reload_reg vtable entries.
- *
- * Multi-function: each func_begin/func_end pair owns its own frame state
- * via the AAImpl fields, so the harness can build several functions in
- * one TU. */
-
-#include <string.h>
-
-#include "arch/aa64_asm.h"
-#include "arch/aa64_isa.h"
-#include "arch/aa64_regs.h"
-#include "arch/arch.h"
-#include "core/arena.h"
-#include "obj/obj.h"
-#include "type/type.h"
-
-/* ============================================================
- * Local encoding helpers (kept here, not in aa64_isa.h, while the
- * disassembler-shared table only needs the Group A/C subset).
- * ============================================================ */
-
-#define AA64_NOP 0xD503201Fu
-
-/* ADD/SUB immediate (aa64_add_imm / aa64_sub_imm) live in
- * arch/aa64_isa.h alongside the rest of the immediate-encoding family.
- * Rd/Rn = 31 means SP for these encodings (not ZR). */
-
-/* STP/LDP signed offset, X registers. Offset is byte offset, must be a
- * multiple of 8; encoded value = byte_offset / 8 in a signed 7-bit field
- * (range -512..504). */
-static inline u32 aa64_stp_x(u32 Rt, u32 Rt2, u32 Rn, i32 byte_off) {
- i32 sc = byte_off >> 3;
- return 0xA9000000u | (((u32)sc & 0x7fu) << 15) | ((Rt2 & 0x1f) << 10) |
- ((Rn & 0x1f) << 5) | (Rt & 0x1f);
-}
-static inline u32 aa64_ldp_x(u32 Rt, u32 Rt2, u32 Rn, i32 byte_off) {
- i32 sc = byte_off >> 3;
- return 0xA9400000u | (((u32)sc & 0x7fu) << 15) | ((Rt2 & 0x1f) << 10) |
- ((Rn & 0x1f) << 5) | (Rt & 0x1f);
-}
-/* STP/LDP signed offset, D registers (64-bit FP, scale 8). */
-static inline u32 aa64_stp_d(u32 Rt, u32 Rt2, u32 Rn, i32 byte_off) {
- i32 sc = byte_off >> 3;
- return 0x6D000000u | (((u32)sc & 0x7fu) << 15) | ((Rt2 & 0x1f) << 10) |
- ((Rn & 0x1f) << 5) | (Rt & 0x1f);
-}
-static inline u32 aa64_ldp_d(u32 Rt, u32 Rt2, u32 Rn, i32 byte_off) {
- i32 sc = byte_off >> 3;
- return 0x6D400000u | (((u32)sc & 0x7fu) << 15) | ((Rt2 & 0x1f) << 10) |
- ((Rn & 0x1f) << 5) | (Rt & 0x1f);
-}
-
-/* LDUR / STUR (general regs, unscaled simm9 in -256..255).
- * size: 0=B, 1=H, 2=W, 3=X. */
-static inline u32 aa64_stur(u32 size, u32 Rt, u32 Rn, i32 simm9) {
- return 0x38000000u | (size << 30) | (((u32)simm9 & 0x1ffu) << 12) |
- ((Rn & 0x1f) << 5) | (Rt & 0x1f);
-}
-static inline u32 aa64_ldur(u32 size, u32 Rt, u32 Rn, i32 simm9) {
- return 0x38400000u | (size << 30) | (((u32)simm9 & 0x1ffu) << 12) |
- ((Rn & 0x1f) << 5) | (Rt & 0x1f);
-}
-/* LDUR/STUR for SIMD & FP registers (V=1). size: 2=S (32-bit), 3=D (64-bit). */
-static inline u32 aa64_stur_fp(u32 size, u32 Rt, u32 Rn, i32 simm9) {
- return 0x3C000000u | (size << 30) | (((u32)simm9 & 0x1ffu) << 12) |
- ((Rn & 0x1f) << 5) | (Rt & 0x1f);
-}
-static inline u32 aa64_ldur_fp(u32 size, u32 Rt, u32 Rn, i32 simm9) {
- return 0x3C400000u | (size << 30) | (((u32)simm9 & 0x1ffu) << 12) |
- ((Rn & 0x1f) << 5) | (Rt & 0x1f);
-}
-
-/* STR/LDR scaled (unsigned imm12). byte_off must be a multiple of (1<<size). */
-static inline u32 aa64_str_uimm(u32 size, u32 Rt, u32 Rn, u32 byte_off) {
- u32 sc = byte_off >> size;
- return 0x39000000u | (size << 30) | ((sc & 0xfffu) << 10) |
- ((Rn & 0x1f) << 5) | (Rt & 0x1f);
-}
-static inline u32 aa64_ldr_uimm(u32 size, u32 Rt, u32 Rn, u32 byte_off) {
- u32 sc = byte_off >> size;
- return 0x39400000u | (size << 30) | ((sc & 0xfffu) << 10) |
- ((Rn & 0x1f) << 5) | (Rt & 0x1f);
-}
-/* STR (SIMD & FP, unsigned offset). size: 2=S (32), 3=D (64). */
-static inline u32 aa64_str_fp_uimm(u32 size, u32 Rt, u32 Rn, u32 byte_off) {
- u32 sc = byte_off >> size;
- return 0x3D000000u | (size << 30) | ((sc & 0xfffu) << 10) |
- ((Rn & 0x1f) << 5) | (Rt & 0x1f);
-}
-
-/* MRS Xt, TPIDR_EL0 — read AArch64 user thread pointer. */
-static inline u32 aa64_mrs_tpidr_el0(u32 Rt) {
- return 0xD53BD040u | (Rt & 0x1fu);
-}
-/* Branch (unconditional, 26-bit imm). Emitted with imm26=0 when paired
- * with a JUMP26/CALL26 relocation; the patcher fills in imm26. */
-static inline u32 aa64_b_base(void) { return 0x14000000u; }
-static inline u32 aa64_bl_base(void) { return 0x94000000u; }
-
-/* ADRP base (Rd in low 5 bits). imm bits filled by relocation. */
-static inline u32 aa64_adrp_base(u32 Rd) { return 0x90000000u | (Rd & 0x1f); }
-
-/* LDR (unsigned offset) for SIMD & FP, used after ADRP for FP literals.
- * size 2 => S (32-bit). imm12 patched by linker. */
-static inline u32 aa64_ldr_fp_uimm(u32 size, u32 Rt, u32 Rn, u32 byte_off) {
- u32 sc = byte_off >> size;
- return 0x3D400000u | (size << 30) | ((sc & 0xfffu) << 10) |
- ((Rn & 0x1f) << 5) | (Rt & 0x1f);
-}
-
-/* FMOV (scalar register). type: 0=single, 1=double. */
-static inline u32 aa64_fmov_reg(u32 type, u32 Rd, u32 Rn) {
- return 0x1E204000u | ((type & 3) << 22) | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
-}
-
-/* SUBS immediate (used to encode CMP Xn, #imm via SUBS ZR, Xn, #imm). */
-static inline u32 aa64_subs_imm(u32 sf, u32 Rd, u32 Rn, u32 imm12) {
- return 0x71000000u | (sf << 31) | ((imm12 & 0xfff) << 10) |
- ((Rn & 0x1f) << 5) | (Rd & 0x1f);
-}
-
-/* CSET Wd/Xd, EQ — alias of CSINC Rd, ZR, ZR, NE (inverted EQ). */
-static inline u32 aa64_cset_eq(u32 sf, u32 Rd) {
- return 0x1A800400u | (sf << 31) | (31u << 16) | (0x1u << 12) | (31u << 5) |
- (Rd & 0x1f);
-}
-
-/* FCVTZS (scalar fp -> integer, round toward zero, signed).
- * sf: 0=W, 1=X. type: 0=S, 1=D. */
-static inline u32 aa64_fcvtzs(u32 sf, u32 type, u32 Rd, u32 Rn) {
- return 0x1E380000u | (sf << 31) | ((type & 3) << 22) | ((Rn & 0x1f) << 5) |
- (Rd & 0x1f);
-}
-static inline u32 aa64_fcvtzu(u32 sf, u32 type, u32 Rd, u32 Rn) {
- return 0x1E390000u | (sf << 31) | ((type & 3) << 22) | ((Rn & 0x1f) << 5) |
- (Rd & 0x1f);
-}
-static inline u32 aa64_scvtf(u32 sf, u32 type, u32 Rd, u32 Rn) {
- return 0x1E220000u | (sf << 31) | ((type & 3) << 22) | ((Rn & 0x1f) << 5) |
- (Rd & 0x1f);
-}
-static inline u32 aa64_ucvtf(u32 sf, u32 type, u32 Rd, u32 Rn) {
- return 0x1E230000u | (sf << 31) | ((type & 3) << 22) | ((Rn & 0x1f) << 5) |
- (Rd & 0x1f);
-}
-
-/* FCVT — between FP precisions. S→D widens; D→S narrows. */
-static inline u32 aa64_fcvt_d_s(u32 Rd, u32 Rn) {
- return 0x1E22C000u | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
-}
-static inline u32 aa64_fcvt_s_d(u32 Rd, u32 Rn) {
- return 0x1E624000u | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
-}
-
-/* FMOV between FP and GPR (BITCAST). */
-static inline u32 aa64_fmov_s_w(u32 Rd, u32 Rn) /* GPR→FP, single */
-{
- return 0x1E270000u | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
-}
-static inline u32 aa64_fmov_w_s(u32 Rd, u32 Rn) /* FP→GPR, single */
-{
- return 0x1E260000u | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
-}
-static inline u32 aa64_fmov_d_x(u32 Rd, u32 Rn) /* GPR→FP, double */
-{
- return 0x9E670000u | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
-}
-static inline u32 aa64_fmov_x_d(u32 Rd, u32 Rn) /* FP→GPR, double */
-{
- return 0x9E660000u | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
-}
-
-/* SUB (extended register), 64-bit, UXTX, shift 0. Unlike SUB shifted-reg
- * (where Rd=31 means ZR), this form treats Rd/Rn=31 as SP — needed to
- * decrement SP by a register amount during alloca. */
-static inline u32 aa64_sub_extreg_x_uxtx(u32 Rd, u32 Rn, u32 Rm) {
- return 0xCB206000u | ((Rm & 0x1f) << 16) | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
-}
-
-/* SUBS shifted register (Rd=ZR encodes CMP). */
-static inline u32 aa64_subs_reg(u32 sf, u32 Rd, u32 Rn, u32 Rm) {
- return 0x6B000000u | (sf << 31) | ((Rm & 0x1f) << 16) | ((Rn & 0x1f) << 5) |
- (Rd & 0x1f);
-}
-
-/* B.cond — imm19 at bits 5..23 left as zero; patched by linker / MCEmitter. */
-static inline u32 aa64_b_cond(u32 cond) { return 0x54000000u | (cond & 0xfu); }
-
-/* CSINC Rd, Rn, Rm, cond (CSEL family with op2=01). CSET Rd, cond
- * is CSINC Rd, ZR, ZR, !cond. */
-static inline u32 aa64_csinc(u32 sf, u32 Rd, u32 Rn, u32 Rm, u32 cond) {
- return 0x1A800400u | (sf << 31) | ((Rm & 0x1f) << 16) |
- ((cond & 0xfu) << 12) | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
-}
-static inline u32 aa64_cset(u32 sf, u32 Rd, u32 cond) {
- return aa64_csinc(sf, Rd, 31u, 31u, cond ^ 1u);
-}
-
-/* FADD / FSUB / FMUL / FDIV (scalar). type: 0=S (float), 1=D (double). */
-static inline u32 aa64_fadd(u32 type, u32 Rd, u32 Rn, u32 Rm) {
- return 0x1E202800u | ((type & 3) << 22) | ((Rm & 0x1f) << 16) |
- ((Rn & 0x1f) << 5) | (Rd & 0x1f);
-}
-static inline u32 aa64_fsub(u32 type, u32 Rd, u32 Rn, u32 Rm) {
- return 0x1E203800u | ((type & 3) << 22) | ((Rm & 0x1f) << 16) |
- ((Rn & 0x1f) << 5) | (Rd & 0x1f);
-}
-static inline u32 aa64_fmul(u32 type, u32 Rd, u32 Rn, u32 Rm) {
- return 0x1E200800u | ((type & 3) << 22) | ((Rm & 0x1f) << 16) |
- ((Rn & 0x1f) << 5) | (Rd & 0x1f);
-}
-static inline u32 aa64_fdiv(u32 type, u32 Rd, u32 Rn, u32 Rm) {
- return 0x1E201800u | ((type & 3) << 22) | ((Rm & 0x1f) << 16) |
- ((Rn & 0x1f) << 5) | (Rd & 0x1f);
-}
-
-/* SBFM / UBFM / BFM (bitfield move family).
- * sf opc(2) 100110 N immr(6) imms(6) Rn(5) Rd(5)
- * opc: 00=SBFM, 01=BFM, 10=UBFM. N must equal sf. */
-static inline u32 aa64_sbfm(u32 sf, u32 Rd, u32 Rn, u32 immr, u32 imms) {
- return 0x13000000u | (sf << 31) | (sf << 22) | ((immr & 0x3fu) << 16) |
- ((imms & 0x3fu) << 10) | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
-}
-static inline u32 aa64_ubfm(u32 sf, u32 Rd, u32 Rn, u32 immr, u32 imms) {
- return 0x53000000u | (sf << 31) | (sf << 22) | ((immr & 0x3fu) << 16) |
- ((imms & 0x3fu) << 10) | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
-}
-static inline u32 aa64_bfm(u32 sf, u32 Rd, u32 Rn, u32 immr, u32 imms) {
- return 0x33000000u | (sf << 31) | (sf << 22) | ((immr & 0x3fu) << 16) |
- ((imms & 0x3fu) << 10) | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
-}
-
-/* ============================================================
- * Per-class register pool (free-mask + high-water mark).
- *
- * The mask uses bit i for the i-th register in the class's contiguous
- * range, so allocation is `__builtin_ctz` over the free mask and
- * deallocation is bit-set. `hwm` records the highest-index-+1 ever
- * allocated, which the prologue/epilogue uses to size the callee-save
- * area. 32-bit masks suffice for every aarch64/x86_64/RISC-V class.
- * ============================================================ */
-
-typedef struct RegPool {
- u32 free; /* bit i set ⇔ regs[base + i] is free */
- u32 hwm; /* highest-index-+1 ever allocated */
- u8 base; /* first physical reg in the class */
- u8 nregs; /* count; bits [nregs..32) are always 0 */
- u8 pad[2];
-} RegPool;
-
-static void regpool_init(RegPool* p, u8 base, u8 nregs) {
- p->base = base;
- p->nregs = nregs;
- p->hwm = 0;
- p->free = (nregs >= 32u) ? 0xFFFFFFFFu : ((1u << nregs) - 1u);
-}
-
-static Reg regpool_alloc(RegPool* p) {
- if (p->free == 0) return (Reg)REG_NONE;
- u32 idx = (u32)__builtin_ctz(p->free);
- p->free &= ~(1u << idx);
- if (idx + 1u > p->hwm) p->hwm = idx + 1u;
- return (Reg)(p->base + idx);
-}
-
-/* Returns 1 on successful free, 0 if `r` is outside this pool's range,
- * -1 on double-free (caller is expected to panic). */
-static int regpool_free(RegPool* p, Reg r) {
- u32 rn = (u32)r;
- if (rn < p->base || rn >= (u32)(p->base + p->nregs)) return 0;
- u32 idx = rn - p->base;
- u32 bit = 1u << idx;
- if (p->free & bit) return -1;
- p->free |= bit;
- return 1;
-}
-
-/* ============================================================
- * AAImpl
- * ============================================================ */
-
-#define AA_PROLOGUE_WORDS \
- 12u /* worst case: sub sp + stp/add fp + 5 int + 4 fp = 11 */
-
-typedef struct AASlot {
- u32 off; /* bytes below fp; address = x29 - off */
- u32 size;
- u32 align;
- u8 kind; /* FrameSlotKind */
- u8 pad[3];
-} AASlot;
-
-typedef struct AAScope {
- u8 kind; /* ScopeKind */
- u8 has_else;
- u8 pad[2];
- MCLabel else_label; /* SCOPE_IF: false branch target / end-of-then */
- MCLabel end_label; /* SCOPE_IF: join point past the whole if/else */
- Label break_label; /* SCOPE_LOOP/BLOCK: explicit break target */
- Label continue_label; /* SCOPE_LOOP: explicit continue target */
-} AAScope;
-
-typedef struct AAImpl {
- CGTarget base;
- SrcLoc loc;
- const CGFuncDesc* fd;
-
- /* Function emission. */
- u32 func_start;
- u32 prologue_pos;
- MCLabel epilogue_label;
-
- /* Frame layout (in bytes; final frame_size computed at func_end). */
- AASlot* slots;
- u32 nslots;
- u32 slots_cap;
- u32 cum_off; /* total bytes consumed by local slots */
- u32 max_outgoing; /* max stack arg bytes for any call */
-
- /* Param incoming tracking — set by func_begin from ABIFuncInfo. */
- u32 next_param_int; /* x0..x7 consumed so far */
- u32 next_param_fp; /* v0..v7 consumed so far */
- u32 next_param_stack; /* offset into caller's stack arg area */
- u8 has_sret; /* sret pointer arrived in x8 */
- FrameSlot sret_ptr_slot; /* hidden slot holding incoming x8 */
-
- /* Reg allocator pools. Bit i set in `free` means the i-th register in
- * the class's contiguous range (base..base+nregs-1) is available. The
- * high-water mark `hwm` is the largest index+1 ever allocated for the
- * class — used by the prologue to decide how many callee-saves to push.
- *
- * INT pool: base = 19, nregs = 10 (x19..x28).
- * FP pool : base = 8, nregs = 16 (v8..v23). The first 8 (v8..v15) are
- * AAPCS64 callee-saves; v16..v23 are caller-saved scratch handed out
- * after the callee-saved range fills. Allocation is lowest-bit-first
- * so callee-saves are still preferred. */
- RegPool int_pool;
- RegPool fp_pool;
-
- /* Structured-scope stack. Entries are not popped — IDs returned to
- * the caller are stable indices into this array for the lifetime
- * of the function. nscopes is reset at func_begin. */
- AAScope* scopes;
- u32 nscopes;
- u32 scopes_cap;
-
- /* alloca: each call emits an `ADD result, SP, #0` placeholder; at
- * func_end the imm12 is patched with the final max_outgoing. Tracks
- * (instruction pos, dst reg) for each placeholder. has_alloca also
- * triggers SP-from-FP restoration in the epilogue. */
- u8 has_alloca;
- struct AAAllocaPatch {
- u32 pos;
- u32 dst_reg;
- }* add_patches;
- u32 nadd_patches;
- u32 add_patches_cap;
-
- /* Variadic — AAPCS64 register save areas reserved at function entry.
- * gp_save_slot holds 8*8=64 bytes (x0..x7); fp_save_slot holds 8*16=128
- * bytes (v0..v7 with 16-byte stride). Saves are emitted in func_begin
- * after the prologue placeholder so FP is already valid when they run. */
- u8 is_variadic;
- FrameSlot gp_save_slot;
- FrameSlot fp_save_slot;
-} AAImpl;
-
-static AAImpl* impl_of(CGTarget* t) { return (AAImpl*)t; }
-
-/* Forward decls used before definition. */
-static FrameSlot aa_frame_slot(CGTarget* t, const FrameSlotDesc* d);
-static AASlot* slot_get(AAImpl* a, FrameSlot fs);
-static u32 force_reg_int(CGTarget* t, Operand op, u32 sf, u32 scratch);
-static void aa_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma);
-static void aa_store(CGTarget* t, Operand addr, Operand src, MemAccess ma);
-static void aa_free_reg(CGTarget* t, Reg r, RegClass cls);
-
-/* ---- helpers ---- */
-
-static int type_is_64(const Type* t) {
- if (!t) return 0;
- switch (t->kind) {
- case TY_LONG:
- case TY_ULONG:
- case TY_LLONG:
- case TY_ULLONG:
- case TY_PTR:
- case TY_DOUBLE:
- return 1;
- default:
- return 0;
- }
-}
-
-static int type_is_fp_double(const Type* t) {
- return t && (t->kind == TY_DOUBLE || t->kind == TY_LDOUBLE);
-}
-
-static int type_is_signed(const Type* t) {
- if (!t) return 0;
- switch (t->kind) {
- case TY_CHAR:
- case TY_SCHAR:
- case TY_SHORT:
- case TY_INT:
- case TY_LONG:
- case TY_LLONG:
- return 1;
- default:
- return 0;
- }
-}
-
-static u32 type_byte_size(const Type* t) {
- if (!t) return 4;
- switch (t->kind) {
- case TY_CHAR:
- case TY_SCHAR:
- case TY_UCHAR:
- case TY_BOOL:
- return 1;
- case TY_SHORT:
- case TY_USHORT:
- return 2;
- case TY_INT:
- case TY_UINT:
- case TY_FLOAT:
- return 4;
- case TY_LONG:
- case TY_ULONG:
- case TY_LLONG:
- case TY_ULLONG:
- case TY_PTR:
- case TY_DOUBLE:
- return 8;
- default:
- return 8;
- }
-}
-
-/* Encode size index for STUR/LDUR (0=B,1=H,2=W,3=X). */
-static u32 size_idx_for_bytes(u32 nbytes) {
- switch (nbytes) {
- case 1:
- return 0;
- case 2:
- return 1;
- case 4:
- return 2;
- case 8:
- return 3;
- default:
- return 3;
- }
-}
-
-static u32 reg_num(Operand op) { return op.v.reg & 0x1fu; }
-
-/* Single new producer-side dependency from the backend on Debug. Per
- * doc/DWARF.md §3.2 the only Debug call the aarch64 backend makes is
- * debug_emit_row, fed (text_section, offset_at_emit_start, pending_loc).
- * The forward decl of `Debug` lives in arch/arch.h; we declare the
- * function here so the backend doesn't need to include debug/debug.h. */
-extern void debug_emit_row(Debug*, ObjSecId text_section, u32 offset, SrcLoc);
-
-static void emit32(MCEmitter* mc, u32 word) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- u8 b[4];
- b[0] = (u8)(word & 0xff);
- b[1] = (u8)((word >> 8) & 0xff);
- b[2] = (u8)((word >> 16) & 0xff);
- b[3] = (u8)((word >> 24) & 0xff);
- mc->emit_bytes(mc, b, 4);
- if (mc->debug) {
- /* (section, offset, pending_loc) row. Per §3.1 Class 2: granularity is
- * per-instruction; Debug deduplicates identical consecutive rows so a
- * multi-instruction CG op with a single set_loc is cheap. The pending
- * loc lives on MCEmitter (set by m_set_loc) so emit32 can read it
- * without reaching into the per-arch impl. */
- debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
- }
-}
-
-static void patch32(ObjBuilder* obj, u32 sec_id, u32 ofs, u32 word) {
- u8 b[4];
- b[0] = (u8)(word & 0xff);
- b[1] = (u8)((word >> 8) & 0xff);
- b[2] = (u8)((word >> 16) & 0xff);
- b[3] = (u8)((word >> 24) & 0xff);
- obj_patch(obj, sec_id, ofs, b, 4);
-}
-
-static _Noreturn void aa_panic(CGTarget* t, const char* what) {
- SrcLoc loc = impl_of(t)->loc;
- compiler_panic(t->c, loc, "aarch64: %s not implemented", what);
-}
-
-/* ---- AArch64 immediate encoding helpers ---- */
-
-/* Materialize a u64 into a register using MOVZ/MOVN/MOVK. Used both for
- * the public load_imm() and internally for synthesizing immediates. */
-static void emit_load_imm(MCEmitter* mc, u32 sf, u32 Rd, i64 imm) {
- const u32 nslots = sf ? 4u : 2u;
- u64 v = sf ? (u64)imm : ((u64)imm & 0xffffffffu);
-
- for (u32 i = 0; i < nslots; ++i) {
- u32 slot = (u32)((v >> (i * 16)) & 0xffffu);
- u64 cleared = v & ~((u64)0xffffu << (i * 16));
- if (slot != 0 && cleared == 0) {
- emit32(mc, aa64_movz(sf, Rd, slot, i));
- return;
- }
- }
-
- {
- u64 inv = sf ? ~v : ((~v) & 0xffffffffu);
- for (u32 i = 0; i < nslots; ++i) {
- u32 slot = (u32)((inv >> (i * 16)) & 0xffffu);
- u64 cleared = inv & ~((u64)0xffffu << (i * 16));
- if (cleared == 0) {
- emit32(mc, aa64_movn(sf, Rd, slot, i));
- return;
- }
- }
- }
-
- int placed = 0;
- for (u32 i = 0; i < nslots; ++i) {
- u32 slot = (u32)((v >> (i * 16)) & 0xffffu);
- if (!placed) {
- if (slot == 0) continue;
- emit32(mc, aa64_movz(sf, Rd, slot, i));
- placed = 1;
- } else if (slot != 0) {
- emit32(mc, aa64_movk(sf, Rd, slot, i));
- }
- }
- if (!placed) emit32(mc, aa64_movz(sf, Rd, 0, 0));
-}
-
-static void emit_sp_add(MCEmitter* mc, u32 imm) {
- if (imm <= 0xfff) {
- emit32(mc, aa64_add_imm(1, 31, 31, imm, 0));
- } else if ((imm & 0xfff) == 0 && (imm >> 12) <= 0xfff) {
- emit32(mc, aa64_add_imm(1, 31, 31, imm >> 12, 1));
- } else {
- emit32(mc, aa64_add_imm(1, 31, 31, (imm >> 12) & 0xfff, 1));
- emit32(mc, aa64_add_imm(1, 31, 31, imm & 0xfff, 0));
- }
-}
-
-/* ---- function lifecycle ---- */
-
-static void aa_func_begin(CGTarget* t, const CGFuncDesc* fd) {
- AAImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
-
- mc->set_section(mc, fd->text_section_id);
- mc->emit_align(mc, 4, 0);
-
- a->fd = fd;
- a->func_start = mc->pos(mc);
- a->next_param_int = 0;
- a->next_param_fp = 0;
- a->next_param_stack = 0;
- a->has_sret = (fd->abi && fd->abi->has_sret) ? 1 : 0;
- a->cum_off = 0;
- a->max_outgoing = 0;
- regpool_init(&a->int_pool, /*base=*/19u, /*nregs=*/10u); /* x19..x28 */
- regpool_init(&a->fp_pool, /*base=*/8u, /*nregs=*/16u); /* v8..v23 */
- a->nslots = 0;
- a->nscopes = 0;
- a->has_alloca = 0;
- a->nadd_patches = 0;
- a->sret_ptr_slot = FRAME_SLOT_NONE;
- a->is_variadic = (fd->abi && fd->abi->variadic) ? 1 : 0;
- a->gp_save_slot = FRAME_SLOT_NONE;
- a->fp_save_slot = FRAME_SLOT_NONE;
- a->epilogue_label = mc->label_new(mc);
-
- mc->cfi_startproc(mc);
-
- /* Reserve a fixed-size prologue placeholder, NOP-filled. We patch the
- * prefix at func_end with the real prologue once frame_size and the
- * callee-save count are known. */
- a->prologue_pos = mc->pos(mc);
- for (u32 i = 0; i < AA_PROLOGUE_WORDS; ++i) emit32(mc, AA64_NOP);
-
- /* If the function returns indirect (sret), x8 holds the destination
- * pointer on entry. Reserve a hidden slot to spill it into so the
- * body can use x8 as scratch and ret can recover the dest pointer. */
- if (a->has_sret) {
- FrameSlotDesc fsd = {
- .type = NULL,
- .name = 0,
- .loc = (SrcLoc){0, 0, 0},
- .size = 8,
- .align = 8,
- .kind = FS_SPILL,
- .flags = 0,
- };
- a->sret_ptr_slot = aa_frame_slot(t, &fsd);
- }
-
- /* Variadic: reserve GP and FP register save areas and emit saves of
- * x0..x7 / d0..d7 here, after the prologue placeholder, so FP is set
- * up. Param stores below run after these saves but before any user
- * code clobbers x0..x7. */
- if (a->is_variadic) {
- FrameSlotDesc gpd = {
- .type = NULL,
- .name = 0,
- .loc = (SrcLoc){0, 0, 0},
- .size = 64,
- .align = 8,
- .kind = FS_SPILL,
- .flags = 0,
- };
- a->gp_save_slot = aa_frame_slot(t, &gpd);
- FrameSlotDesc fpd = {
- .type = NULL,
- .name = 0,
- .loc = (SrcLoc){0, 0, 0},
- .size = 128,
- .align = 16,
- .kind = FS_SPILL,
- .flags = 0,
- };
- a->fp_save_slot = aa_frame_slot(t, &fpd);
- AASlot* gs = slot_get(a, a->gp_save_slot);
- AASlot* fs = slot_get(a, a->fp_save_slot);
- for (u32 i = 0; i < 8; ++i) {
- emit32(mc, aa64_stur(3, i, 29, -(i32)gs->off + (i32)i * 8));
- }
- for (u32 i = 0; i < 8; ++i) {
- emit32(mc, aa64_stur_fp(3, i, 29, -(i32)fs->off + (i32)i * 16));
- }
- }
-}
-
-static void aa_func_end(CGTarget* t) {
- AAImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
-
- /* Compute callee-save layout. Only v8..v15 are callee-saved; the
- * caller-saved v16..v23 are handed out by alloc_reg too but never
- * appear in prologue saves. */
- u32 n_int_pairs = (a->int_pool.hwm + 1) / 2; /* round up */
- u32 used_fp_cs = a->fp_pool.hwm > 8 ? 8u : a->fp_pool.hwm;
- u32 n_fp_pairs = (used_fp_cs + 1) / 2;
-
- u32 outgoing_off = 0;
- u32 int_save_off = a->max_outgoing;
- u32 fp_save_off = int_save_off + n_int_pairs * 16;
- u32 locals_off = fp_save_off + n_fp_pairs * 16;
- u32 fp_lr_off = locals_off + a->cum_off;
- u32 frame_size = fp_lr_off + 16;
- /* round to 16. */
- frame_size = (frame_size + 15u) & ~15u;
- fp_lr_off = frame_size - 16;
-
- (void)outgoing_off;
-
- /* Emit epilogue at current pos, then place label. The label we emit
- * must point at the first instruction of the epilogue so `b epilogue`
- * branches land here. */
- mc->label_place(mc, a->epilogue_label);
-
- /* If the body called alloca, SP may sit below the locals area.
- * Restore SP from FP before reloading callee-saves, since those use
- * SP-relative offsets. */
- if (a->has_alloca) {
- if (fp_lr_off <= 0xfff) {
- emit32(mc, aa64_sub_imm(1, /*Rd=SP*/ 31, /*Rn=*/29, fp_lr_off, 0));
- } else {
- compiler_panic(t->c, a->loc,
- "aarch64: has_alloca + fp_lr_off %u out of imm12 range",
- fp_lr_off);
- }
- }
-
- /* Restore FP saves, then INT saves, then fp/lr, then add sp + ret. */
- for (i32 i = (i32)n_fp_pairs - 1; i >= 0; --i) {
- u32 r0 = 8u + (u32)i * 2u;
- u32 r1 = r0 + 1u;
- emit32(mc, aa64_ldp_d(r0, r1, 31, (i32)(fp_save_off + (u32)i * 16u)));
- }
- for (i32 i = (i32)n_int_pairs - 1; i >= 0; --i) {
- u32 r0 = 19u + (u32)i * 2u;
- u32 r1 = r0 + 1u;
- emit32(mc, aa64_ldp_x(r0, r1, 31, (i32)(int_save_off + (u32)i * 16u)));
- }
- emit32(mc, aa64_ldp_x(29, 30, 31, (i32)fp_lr_off));
- emit_sp_add(mc, frame_size);
- emit32(mc, aa64_ret(AA64_LR));
-
- /* Now patch prologue placeholder. */
- u32 pos = a->prologue_pos;
- ObjBuilder* obj = t->obj;
- u32 sec = a->fd->text_section_id;
-
- u32 words[AA_PROLOGUE_WORDS];
- for (u32 i = 0; i < AA_PROLOGUE_WORDS; ++i) words[i] = AA64_NOP;
- u32 wi = 0;
-
- /* sub sp, sp, #frame_size — may take 2 insns if > 4095. */
- if (frame_size <= 0xfff) {
- words[wi++] = aa64_sub_imm(1, 31, 31, frame_size, 0);
- } else if ((frame_size & 0xfff) == 0 && (frame_size >> 12) <= 0xfff) {
- words[wi++] = aa64_sub_imm(1, 31, 31, frame_size >> 12, 1);
- } else {
- if (wi + 2 > AA_PROLOGUE_WORDS) {
- compiler_panic(t->c, a->loc,
- "aarch64: prologue overflow for frame_size %u",
- frame_size);
- }
- words[wi++] = aa64_sub_imm(1, 31, 31, (frame_size >> 12) & 0xfff, 1);
- words[wi++] = aa64_sub_imm(1, 31, 31, frame_size & 0xfff, 0);
- }
- /* stp x29, x30, [sp, #fp_lr_off]; add x29, sp, #fp_lr_off */
- words[wi++] = aa64_stp_x(29, 30, 31, (i32)fp_lr_off);
- words[wi++] = aa64_add_imm(1, 29, 31, fp_lr_off, 0);
- /* If sret, save incoming x8 (caller's destination pointer). */
- if (a->has_sret && a->sret_ptr_slot != FRAME_SLOT_NONE) {
- AASlot* s = slot_get(a, a->sret_ptr_slot);
- if (s) {
- if (wi >= AA_PROLOGUE_WORDS) goto overflow;
- words[wi++] = aa64_stur(3, 8, 29, -(i32)s->off);
- }
- }
- /* INT pair saves. */
- for (u32 i = 0; i < n_int_pairs; ++i) {
- u32 r0 = 19u + i * 2u;
- u32 r1 = r0 + 1u;
- if (wi >= AA_PROLOGUE_WORDS) goto overflow;
- words[wi++] = aa64_stp_x(r0, r1, 31, (i32)(int_save_off + i * 16u));
- }
- for (u32 i = 0; i < n_fp_pairs; ++i) {
- u32 r0 = 8u + i * 2u;
- u32 r1 = r0 + 1u;
- if (wi >= AA_PROLOGUE_WORDS) goto overflow;
- words[wi++] = aa64_stp_d(r0, r1, 31, (i32)(fp_save_off + i * 16u));
- }
- if (0) {
- overflow:
- compiler_panic(
- t->c, a->loc,
- "aarch64: prologue placeholder too small (used %u of %u words)", wi,
- AA_PROLOGUE_WORDS);
- }
-
- for (u32 i = 0; i < AA_PROLOGUE_WORDS; ++i) {
- patch32(obj, sec, pos + i * 4u, words[i]);
- }
-
- /* Patch each alloca's `ADD dst, SP, #0` placeholder with the final
- * max_outgoing offset, now that the high-water mark is known. */
- if (a->max_outgoing > 0xfff) {
- compiler_panic(
- t->c, a->loc,
- "aarch64: max_outgoing %u out of imm12 range for alloca patch",
- a->max_outgoing);
- }
- for (u32 i = 0; i < a->nadd_patches; ++i) {
- u32 dr = a->add_patches[i].dst_reg;
- u32 word = aa64_add_imm(1, dr, /*Rn=SP*/ 31, a->max_outgoing, 0);
- patch32(obj, sec, a->add_patches[i].pos, word);
- }
-
- /* Define the function symbol. */
- u32 end = mc->pos(mc);
- obj_symbol_define(obj, a->fd->sym, sec, (u64)a->func_start,
- (u64)(end - a->func_start));
-
- mc->cfi_endproc(mc);
- a->fd = NULL;
-}
-
-/* ---- registers / frame ---- */
-
-static Reg aa_alloc_reg(CGTarget* t, RegClass cls, const Type* ty) {
- AAImpl* a = impl_of(t);
- (void)ty;
- /* Lowest-bit-first allocation hands out callee-saves before caller-
- * saves on the FP side (v8..v15 then v16..v23) — short-lived
- * materializations (e.g. j06 building 9 FP arg regs with no
- * intervening call) thus reach into the caller-saved range. */
- if (cls == RC_INT) return regpool_alloc(&a->int_pool);
- if (cls == RC_FP) return regpool_alloc(&a->fp_pool);
- compiler_panic(t->c, a->loc, "aarch64 alloc_reg: class %d unimpl", (int)cls);
-}
-
-static void aa_free_reg(CGTarget* t, Reg r, RegClass cls) {
- AAImpl* a = impl_of(t);
- RegPool* p;
- switch (cls) {
- case RC_INT: p = &a->int_pool; break;
- case RC_FP: p = &a->fp_pool; break;
- default:
- compiler_panic(t->c, a->loc, "aarch64 free_reg: class %d unimpl",
- (int)cls);
- }
- int rc = regpool_free(p, r);
- if (rc == 1) return;
- if (rc == -1) {
- compiler_panic(t->c, a->loc,
- "aarch64 free_reg: reg %u already free in %s pool",
- (unsigned)r, cls == RC_FP ? "fp" : "int");
- }
- compiler_panic(t->c, a->loc, "aarch64 free_reg: reg %u not in %s pool",
- (unsigned)r, cls == RC_FP ? "fp" : "int");
-}
-
-static FrameSlot aa_frame_slot(CGTarget* t, const FrameSlotDesc* d) {
- AAImpl* a = impl_of(t);
- if (a->nslots == a->slots_cap) {
- u32 ncap = a->slots_cap ? a->slots_cap * 2 : 8;
- AASlot* nbuf = arena_array(t->c->tu, AASlot, ncap);
- if (a->slots) memcpy(nbuf, a->slots, sizeof(AASlot) * a->nslots);
- a->slots = nbuf;
- a->slots_cap = ncap;
- }
- u32 size = d->size ? d->size : 8;
- u32 align = d->align ? d->align : 1;
- u32 next = a->cum_off + size;
- /* Round up so that slot start (= fp - off) is align-aligned. fp is
- * 16-aligned, so requiring off aligned to `align` suffices. */
- u32 mask = align - 1;
- next = (next + mask) & ~mask;
-
- AASlot* s = &a->slots[a->nslots];
- s->off = next;
- s->size = size;
- s->align = align;
- s->kind = d->kind;
-
- a->cum_off = next;
- a->nslots++;
- return (FrameSlot)(a->nslots); /* 1-based; FRAME_SLOT_NONE == 0 */
-}
-
-static AASlot* slot_get(AAImpl* a, FrameSlot fs) {
- if (fs == FRAME_SLOT_NONE || fs > a->nslots) return NULL;
- return &a->slots[fs - 1];
-}
-
-/* ---- param: store incoming arg(s) into the home slot ---- */
-
-static void aa_param(CGTarget* t, const CGParamDesc* p) {
- AAImpl* a = impl_of(t);
- AASlot* s = slot_get(a, p->slot);
- if (!s) {
- compiler_panic(t->c, a->loc, "aarch64 param: bad slot");
- }
- const ABIArgInfo* ai = p->abi;
-
- if (ai->kind == ABI_ARG_IGNORE) return;
- if (ai->kind == ABI_ARG_INDIRECT) {
- /* Caller passes a pointer to a copy. Materialize that pointer
- * into a scratch reg, then memcpy `s->size` bytes from there
- * into the slot — so subsequent LOCAL_op(slot) reads/writes the
- * struct contents directly, not the pointer. */
- u32 ptr_reg;
- if (a->next_param_int < 8) {
- ptr_reg = a->next_param_int++;
- } else {
- u32 caller_off = a->next_param_stack;
- a->next_param_stack += 8;
- emit32(t->mc, aa64_ldur(3, 9, 29, (i32)(16 + caller_off)));
- ptr_reg = 9;
- }
- u32 nbytes = s->size;
- u32 i = 0;
- while (i + 8 <= nbytes) {
- emit32(t->mc, aa64_ldur(3, 10, ptr_reg, (i32)i));
- emit32(t->mc, aa64_stur(3, 10, 29, -(i32)s->off + (i32)i));
- i += 8;
- }
- while (i + 4 <= nbytes) {
- emit32(t->mc, aa64_ldur(2, 10, ptr_reg, (i32)i));
- emit32(t->mc, aa64_stur(2, 10, 29, -(i32)s->off + (i32)i));
- i += 4;
- }
- while (i + 2 <= nbytes) {
- emit32(t->mc, aa64_ldur(1, 10, ptr_reg, (i32)i));
- emit32(t->mc, aa64_stur(1, 10, 29, -(i32)s->off + (i32)i));
- i += 2;
- }
- while (i < nbytes) {
- emit32(t->mc, aa64_ldur(0, 10, ptr_reg, (i32)i));
- emit32(t->mc, aa64_stur(0, 10, 29, -(i32)s->off + (i32)i));
- i += 1;
- }
- return;
- }
- /* DIRECT: place each part. */
- for (u16 i = 0; i < ai->nparts; ++i) {
- const ABIArgPart* pt = &ai->parts[i];
- u32 part_off = pt->src_offset;
- u32 sz = pt->size;
- u32 sidx = size_idx_for_bytes(sz);
-
- if (pt->cls == ABI_CLASS_INT) {
- if (a->next_param_int < 8) {
- u32 reg = a->next_param_int++;
- emit32(t->mc, aa64_stur(sidx, reg, 29, -(i32)s->off + (i32)part_off));
- } else {
- /* Each stack-passed slot is 8 bytes regardless of part size. */
- u32 caller_off = a->next_param_stack;
- a->next_param_stack += 8;
- emit32(t->mc, aa64_ldur(sidx, 9, 29, (i32)(16 + caller_off)));
- emit32(t->mc, aa64_stur(sidx, 9, 29, -(i32)s->off + (i32)part_off));
- }
- } else if (pt->cls == ABI_CLASS_FP) {
- if (a->next_param_fp < 8) {
- u32 reg = a->next_param_fp++;
- emit32(t->mc,
- aa64_stur_fp(sidx, reg, 29, -(i32)s->off + (i32)part_off));
- } else {
- u32 caller_off = a->next_param_stack;
- a->next_param_stack += 8;
- emit32(t->mc, aa64_ldur_fp(sidx, 0, 29, (i32)(16 + caller_off)));
- emit32(t->mc, aa64_stur_fp(sidx, 0, 29, -(i32)s->off + (i32)part_off));
- }
- } else {
- compiler_panic(t->c, a->loc, "aarch64 param: ABI class %d unimpl",
- (int)pt->cls);
- }
- }
-}
-
-static const Reg* aa_clobbers(CGTarget* t, RegClass c, u32* n) {
- (void)c;
- (void)n;
- aa_panic(t, "clobbers");
-}
-
-static int aa_resolve_reg_name(CGTarget* t, Sym name, Reg* out,
- RegClass* cls_out) {
- (void)t;
- size_t len = 0;
- const char* s = pool_str(t->c->global, name, &len);
- if (!s || !len) return 1;
- /* pool_str does not guarantee NUL-termination; copy into a small buffer. */
- char buf[8];
- if (len >= sizeof buf) return 1;
- memcpy(buf, s, len);
- buf[len] = '\0';
- u32 dwarf;
- if (aa64_register_index(buf, &dwarf) != 0) return 1;
- if (dwarf <= 30u) { /* x0..x30 */
- if (out) *out = (Reg)dwarf;
- if (cls_out) *cls_out = RC_INT;
- return 0;
- }
- if (dwarf >= 64u && dwarf <= 95u) { /* v0..v31 */
- if (out) *out = (Reg)(dwarf - 64u);
- if (cls_out) *cls_out = RC_FP;
- return 0;
- }
- /* sp/pc and others — not allocatable, treat as unresolvable. */
- return 1;
-}
-static void aa_spill_reg(CGTarget* t, Operand src, FrameSlot slot,
- MemAccess ma) {
- AAImpl* a = impl_of(t);
- if (src.kind != OPK_REG) {
- compiler_panic(t->c, a->loc, "aarch64 spill_reg: src is not OPK_REG");
- }
- Operand addr;
- memset(&addr, 0, sizeof addr);
- addr.kind = OPK_LOCAL;
- addr.cls = RC_INT;
- addr.type = ma.type;
- addr.v.frame_slot = slot;
- aa_store(t, addr, src, ma);
- aa_free_reg(t, src.v.reg, src.cls);
-}
-
-static void aa_reload_reg(CGTarget* t, Operand dst, FrameSlot slot,
- MemAccess ma) {
- AAImpl* a = impl_of(t);
- if (dst.kind != OPK_REG) {
- compiler_panic(t->c, a->loc, "aarch64 reload_reg: dst is not OPK_REG");
- }
- Operand addr;
- memset(&addr, 0, sizeof addr);
- addr.kind = OPK_LOCAL;
- addr.cls = RC_INT;
- addr.type = ma.type;
- addr.v.frame_slot = slot;
- aa_load(t, dst, addr, ma);
-}
-
-/* ---- labels / control flow ----
- *
- * Label is a transparent wrapper around MCLabel — the MCEmitter already
- * tracks placement and applies pending fixups. Jumps emit a B with
- * imm26=0 paired with R_AARCH64_JUMP26; conditional branches emit a
- * B.cond with imm19=0 paired with R_AARCH64_CONDBR19. */
-
-static Label aa_label_new(CGTarget* t) {
- return (Label)t->mc->label_new(t->mc);
-}
-
-static void aa_label_place(CGTarget* t, Label l) {
- t->mc->label_place(t->mc, (MCLabel)l);
-}
-
-static void aa_jump(CGTarget* t, Label l) {
- MCEmitter* mc = t->mc;
- emit32(mc, aa64_b_base());
- mc->emit_label_ref(mc, (MCLabel)l, R_AARCH64_JUMP26, 4, 0);
-}
-
-/* Map CmpOp → AArch64 condition code. Boolean (i1) "true" means take the
- * branch / set 1. */
-static u32 cmp_to_cond(CmpOp op) {
- switch (op) {
- case CMP_EQ:
- return 0x0u; /* EQ */
- case CMP_NE:
- return 0x1u; /* NE */
- case CMP_LT_U:
- return 0x3u; /* CC/LO */
- case CMP_LE_U:
- return 0x9u; /* LS */
- case CMP_GT_U:
- return 0x8u; /* HI */
- case CMP_GE_U:
- return 0x2u; /* CS/HS */
- case CMP_LT_S:
- return 0xbu; /* LT */
- case CMP_LE_S:
- return 0xdu; /* LE */
- case CMP_GT_S:
- return 0xcu; /* GT */
- case CMP_GE_S:
- return 0xau; /* GE */
- /* FP compares route through FCMP, not yet exercised here. */
- default:
- return 0x0u;
- }
-}
-
-/* Emit CMP a, b (= SUBS ZR, a, b). Uses the 12-bit-imm form when `b` is
- * an OPK_IMM that fits; otherwise materializes through scratch x9/x10
- * and uses the shifted-register form. CMP is not commutative across the
- * condition codes, so an IMM-on-LHS still materializes (the caller has
- * to swap the cond if it wants to swap the operands). Width comes from
- * `a`; signedness lives in the cond. */
-static void emit_cmp_ab(CGTarget* t, Operand a_op, Operand b_op) {
- MCEmitter* mc = t->mc;
- u32 sf = type_is_64(a_op.type) ? 1u : 0u;
- if (b_op.kind == OPK_IMM && a_op.kind != OPK_IMM) {
- u32 imm12, sh;
- if (aa64_addsub_imm_fits(b_op.v.imm, &imm12, &sh)) {
- u32 rn = force_reg_int(t, a_op, sf, 9);
- emit32(mc, aa64_subs_imm12(sf, /*Rd=ZR*/ 31u, rn, imm12, sh));
- return;
- }
- }
- u32 rn = force_reg_int(t, a_op, sf, 9);
- u32 rm = force_reg_int(t, b_op, sf, (rn == 9) ? 10u : 9u);
- emit32(mc, aa64_subs_reg(sf, /*Rd=ZR*/ 31u, rn, rm));
-}
-
-static void aa_cmp_branch(CGTarget* t, CmpOp op, Operand a, Operand b,
- Label l) {
- MCEmitter* mc = t->mc;
- emit_cmp_ab(t, a, b);
- emit32(mc, aa64_b_cond(cmp_to_cond(op)));
- mc->emit_label_ref(mc, (MCLabel)l, R_AARCH64_CONDBR19, 4, 0);
-}
-
-static void aa_cmp(CGTarget* t, CmpOp op, Operand dst, Operand a, Operand b) {
- emit_cmp_ab(t, a, b);
- u32 sf_dst = type_is_64(dst.type) ? 1u : 0u;
- emit32(t->mc, aa64_cset(sf_dst, reg_num(dst), cmp_to_cond(op)));
-}
-
-/* ---- structured scopes (SCOPE_IF only for v1) ---- */
-
-static CGScope aa_scope_begin(CGTarget* t, const CGScopeDesc* d) {
- AAImpl* a = impl_of(t);
- if (a->nscopes == a->scopes_cap) {
- u32 ncap = a->scopes_cap ? a->scopes_cap * 2u : 4u;
- AAScope* nb = arena_array(t->c->tu, AAScope, ncap);
- if (a->scopes) memcpy(nb, a->scopes, sizeof(AAScope) * a->nscopes);
- a->scopes = nb;
- a->scopes_cap = ncap;
- }
- AAScope* sc = &a->scopes[a->nscopes];
- sc->kind = (u8)d->kind;
- sc->has_else = 0;
- sc->else_label = 0;
- sc->end_label = 0;
- sc->break_label = d->break_label;
- sc->continue_label = d->continue_label;
-
- if (d->kind == SCOPE_IF) {
- sc->else_label = t->mc->label_new(t->mc);
- sc->end_label = t->mc->label_new(t->mc);
- /* Test cond against zero, branch to else_label on EQ (false). */
- u32 sf = type_is_64(d->cond.type) ? 1u : 0u;
- u32 rn = force_reg_int(t, d->cond, sf, 9);
- emit32(t->mc, aa64_subs_imm(sf, /*Rd=ZR*/ 31u, rn, 0));
- emit32(t->mc, aa64_b_cond(0x0u /*EQ*/));
- t->mc->emit_label_ref(t->mc, sc->else_label, R_AARCH64_CONDBR19, 4, 0);
- } else if (d->kind == SCOPE_LOOP || d->kind == SCOPE_BLOCK) {
- /* Structured loop/block: bookkeep only. The caller drives
- * label_place + jump itself; break_to/continue_to forward to the
- * recorded labels. No instructions emitted here. */
- } else {
- compiler_panic(t->c, a->loc,
- "aarch64 scope_begin: kind %d not yet implemented",
- (int)d->kind);
- }
-
- a->nscopes++;
- return (CGScope)a->nscopes; /* 1-based */
-}
-
-static void aa_scope_else(CGTarget* t, CGScope s) {
- AAImpl* a = impl_of(t);
- if (s == CG_SCOPE_NONE || s > a->nscopes) {
- compiler_panic(t->c, a->loc, "aarch64 scope_else: bad scope %u",
- (unsigned)s);
- }
- AAScope* sc = &a->scopes[s - 1];
- /* End of the then-arm: jump past the else body. */
- emit32(t->mc, aa64_b_base());
- t->mc->emit_label_ref(t->mc, sc->end_label, R_AARCH64_JUMP26, 4, 0);
- /* Begin of the else-arm. */
- t->mc->label_place(t->mc, sc->else_label);
- sc->has_else = 1;
-}
-
-static void aa_scope_end(CGTarget* t, CGScope s) {
- AAImpl* a = impl_of(t);
- if (s == CG_SCOPE_NONE || s > a->nscopes) {
- compiler_panic(t->c, a->loc, "aarch64 scope_end: bad scope %u",
- (unsigned)s);
- }
- AAScope* sc = &a->scopes[s - 1];
- if (sc->kind == SCOPE_IF) {
- if (!sc->has_else) {
- /* No else body — false-branch lands at scope_end. */
- t->mc->label_place(t->mc, sc->else_label);
- }
- t->mc->label_place(t->mc, sc->end_label);
- }
- /* SCOPE_LOOP / SCOPE_BLOCK: caller has already placed the break_label. */
-}
-
-static void aa_break_to(CGTarget* t, CGScope s) {
- AAImpl* a = impl_of(t);
- if (s == CG_SCOPE_NONE || s > a->nscopes) {
- compiler_panic(t->c, a->loc, "aarch64 break_to: bad scope %u", (unsigned)s);
- }
- AAScope* sc = &a->scopes[s - 1];
- aa_jump(t, sc->break_label);
-}
-
-static void aa_continue_to(CGTarget* t, CGScope s) {
- AAImpl* a = impl_of(t);
- if (s == CG_SCOPE_NONE || s > a->nscopes) {
- compiler_panic(t->c, a->loc, "aarch64 continue_to: bad scope %u",
- (unsigned)s);
- }
- AAScope* sc = &a->scopes[s - 1];
- aa_jump(t, sc->continue_label);
-}
-
-/* ---- data movement ---- */
-
-static void aa_load_imm(CGTarget* t, Operand dst, i64 imm) {
- u32 sf = type_is_64(dst.type) ? 1u : 0u;
- emit_load_imm(t->mc, sf, reg_num(dst), imm);
-}
-
-/* load_const: emit ADRP + LDR Sd, [Xt, #:lo12:sym] against a fresh
- * symbol in .rodata. Used by b08 to materialize a float bit pattern. */
-static void aa_load_const(CGTarget* t, Operand dst, ConstBytes cb) {
- AAImpl* a = impl_of(t);
- if (dst.cls != RC_FP) {
- compiler_panic(t->c, a->loc, "aarch64 load_const: only FP supported in v1");
- }
-
- /* Find or create .rodata. obj_align_to bumps the section's recorded
- * align as a side effect of placement, so we pass 1 here. */
- Sym ro_name = pool_intern_cstr(t->c->global, ".rodata");
- ObjSecId ro = obj_section(t->obj, ro_name, SEC_RODATA, SF_ALLOC, 1u);
-
- u32 cur_section = t->mc->section_id;
- t->mc->set_section(t->mc, ro);
- u32 ro_off = obj_align_to(t->obj, ro, cb.align ? cb.align : 4);
- t->mc->emit_bytes(t->mc, cb.bytes, cb.size);
-
- /* Local symbol pointing at the literal. */
- char namebuf[64];
- static u32 lit_seq = 0;
- int len = 0;
- {
- const char* prefix = ".LCFP";
- for (; prefix[len]; ++len) namebuf[len] = prefix[len];
- u32 v = lit_seq++;
- char tmp[16];
- int tn = 0;
- if (v == 0)
- tmp[tn++] = '0';
- else {
- while (v) {
- tmp[tn++] = '0' + (char)(v % 10);
- v /= 10;
- }
- }
- for (int i = tn - 1; i >= 0; --i) namebuf[len++] = tmp[i];
- namebuf[len] = 0;
- }
- Sym sname = pool_intern_cstr(t->c->global, namebuf);
- ObjSymId sym = obj_symbol(t->obj, sname, SB_LOCAL, SK_OBJ, ro, (u64)ro_off,
- (u64)cb.size);
-
- t->mc->set_section(t->mc, cur_section);
-
- /* ADRP X9, sym ; LDR Sd, [X9, #:lo12:sym] */
- u32 adrp_pos = t->mc->pos(t->mc);
- emit32(t->mc, aa64_adrp_base(9));
- t->mc->emit_reloc_at(t->mc, cur_section, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21,
- sym, 0, 0, 0);
-
- u32 ldr_pos = t->mc->pos(t->mc);
- u32 sidx = (cb.size == 8) ? 3u : 2u;
- emit32(t->mc, aa64_ldr_fp_uimm(sidx, reg_num(dst), 9, 0));
- RelocKind lo12 = (cb.size == 8) ? R_AARCH64_LDST64_ABS_LO12_NC
- : R_AARCH64_LDST32_ABS_LO12_NC;
- t->mc->emit_reloc_at(t->mc, cur_section, ldr_pos, lo12, sym, 0, 0, 0);
-}
-
-static void aa_copy(CGTarget* t, Operand dst, Operand src) {
- if (dst.cls == RC_FP || src.cls == RC_FP) {
- u32 type = type_is_fp_double(dst.type) ? 1u : 0u;
- emit32(t->mc, aa64_fmov_reg(type, reg_num(dst), reg_num(src)));
- return;
- }
- u32 sf = type_is_64(dst.type) ? 1u : 0u;
- emit32(t->mc, aa64_mov_reg(sf, reg_num(dst), reg_num(src)));
-}
-
-/* ---- load / store / addr_of ---- */
-
-/* Reloc kind for an LDR/STR (immediate, unsigned offset) of `nbytes`. */
-static RelocKind ldst_lo12_reloc_for(u32 nbytes) {
- switch (nbytes) {
- case 1:
- return R_AARCH64_LDST8_ABS_LO12_NC;
- case 2:
- return R_AARCH64_LDST16_ABS_LO12_NC;
- case 4:
- return R_AARCH64_LDST32_ABS_LO12_NC;
- case 8:
- return R_AARCH64_LDST64_ABS_LO12_NC;
- default:
- return R_AARCH64_LDST64_ABS_LO12_NC;
- }
-}
-
-/* Forward decl: addend fixup after a GOT load lands here when the
- * addend doesn't fit in a single imm12. Defined just below. */
-static void emit_addr_adjust(MCEmitter* mc, u32 Rd, u32 base, i32 off);
-
-/* True when the symbol must be reached via a GOT indirection slot at
- * this site: an undefined external on a target format that binds extern
- * data through __got / non-lazy pointers (Mach-O today). The policy
- * lives behind obj_format_extern_via_got so the backend never names a
- * specific OS/format.
- *
- * The "is undefined" test keys on section_id == OBJ_SEC_NONE — the
- * canonical marker per obj.h. SK_UNDEF as a kind is reserved for
- * symbols whose kind isn't known yet; the decl pass mints externs
- * with their intended SK_OBJ / SK_FUNC kind plus OBJ_SEC_NONE. */
-static int use_got_for_sym(CGTarget* t, ObjSymId sym) {
- return obj_symbol_extern_via_got(t->c, t->obj, sym);
-}
-
-/* Emit `ADRP dst, sym@GOTPAGE ; LDR Xdst, [dst, #sym@GOTPAGEOFF]`,
- * leaving the runtime address of `sym` in `dst_reg`. Addends are
- * deliberately omitted from the GOT relocs — most loaders disallow
- * nonzero addends on GOT-load fixups — so callers add any displacement
- * with a follow-on ADD/LDUR/STUR. */
-static void emit_got_load_addr(CGTarget* t, u32 dst_reg, ObjSymId sym) {
- MCEmitter* mc = t->mc;
- u32 sec = mc->section_id;
- u32 adrp_pos = mc->pos(mc);
- emit32(mc, aa64_adrp_base(dst_reg));
- mc->emit_reloc_at(mc, sec, adrp_pos, R_AARCH64_ADR_GOT_PAGE, sym, 0, 0, 0);
- u32 ldr_pos = mc->pos(mc);
- emit32(mc, aa64_ldr_uimm(/*size=*/3, dst_reg, dst_reg, 0));
- mc->emit_reloc_at(mc, sec, ldr_pos, R_AARCH64_LD64_GOT_LO12_NC, sym, 0, 0, 0);
-}
-
-/* Materialize &sym+addend into `dst_reg` via ADRP + ADD (LO12_NC), or
- * ADRP + LDR-from-GOT + (optional) ADD when the symbol must route
- * through an indirection slot. */
-static void emit_global_addr(CGTarget* t, u32 dst_reg, ObjSymId sym,
- i64 addend) {
- MCEmitter* mc = t->mc;
- if (use_got_for_sym(t, sym)) {
- emit_got_load_addr(t, dst_reg, sym);
- if (addend) emit_addr_adjust(mc, dst_reg, dst_reg, (i32)addend);
- return;
- }
- u32 sec = mc->section_id;
- u32 adrp_pos = mc->pos(mc);
- emit32(mc, aa64_adrp_base(dst_reg));
- mc->emit_reloc_at(mc, sec, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21, sym, addend,
- 0, 0);
- u32 add_pos = mc->pos(mc);
- emit32(mc, aa64_add_imm(1, dst_reg, dst_reg, 0, 0));
- mc->emit_reloc_at(mc, sec, add_pos, R_AARCH64_ADD_ABS_LO12_NC, sym, addend, 0,
- 0);
-}
-
-/* Materialize a SUB/ADD imm sequence that puts (base ± abs_off) into Rd.
- * abs_off must be representable as imm12 or imm12<<12 (or the sum). For
- * larger offsets, falls back to MOV+ADD via emit_load_imm. */
-static void emit_addr_adjust(MCEmitter* mc, u32 Rd, u32 base, i32 off) {
- if (off == 0) {
- emit32(mc, aa64_mov_reg(1, Rd, base));
- return;
- }
- u32 abs_off = (off < 0) ? (u32)(-off) : (u32)off;
- /* Single imm12. */
- if (abs_off <= 0xfff) {
- if (off < 0)
- emit32(mc, aa64_sub_imm(1, Rd, base, abs_off, 0));
- else
- emit32(mc, aa64_add_imm(1, Rd, base, abs_off, 0));
- return;
- }
- /* Two-shift form: hi12 + lo12 (when low is zero, hi only). */
- if ((abs_off >> 24) == 0) {
- u32 hi = (abs_off >> 12) & 0xfff;
- u32 lo = abs_off & 0xfff;
- if (off < 0) {
- if (hi) emit32(mc, aa64_sub_imm(1, Rd, base, hi, 1));
- if (lo) emit32(mc, aa64_sub_imm(1, Rd, hi ? Rd : base, lo, 0));
- } else {
- if (hi) emit32(mc, aa64_add_imm(1, Rd, base, hi, 1));
- if (lo) emit32(mc, aa64_add_imm(1, Rd, hi ? Rd : base, lo, 0));
- }
- return;
- }
- /* Generic: load constant into Rd, then add. */
- emit_load_imm(mc, 1, Rd, off);
- emit32(mc, aa64_add(1, Rd, base, Rd));
-}
-
-/* Resolve an address operand (LOCAL or INDIRECT) into (base_reg, signed
- * offset) via a possibly-temporary base register. Returns the base reg.
- * Frames larger than the STUR/LDUR ±256 window land here via tmp_reg —
- * the caller passes 0 as offset and uses the returned register directly. */
-static u32 addr_base(CGTarget* t, Operand addr, i32* out_off, u32 tmp_reg) {
- AAImpl* a = impl_of(t);
- if (addr.kind == OPK_LOCAL) {
- AASlot* s = slot_get(a, addr.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "aarch64 addr_base: bad slot");
- i32 off = -(i32)s->off;
- if (off >= -256 && off <= 255) {
- *out_off = off;
- return 29; /* x29 = fp */
- }
- /* Out of STUR range — synthesize the address into tmp_reg. */
- emit_addr_adjust(t->mc, tmp_reg, 29, off);
- *out_off = 0;
- return tmp_reg;
- }
- if (addr.kind == OPK_INDIRECT) {
- i32 off = addr.v.ind.ofs;
- u32 base = addr.v.ind.base & 0x1f;
- if (off >= -256 && off <= 255) {
- *out_off = off;
- return base;
- }
- emit_addr_adjust(t->mc, tmp_reg, base, off);
- *out_off = 0;
- return tmp_reg;
- }
- if (addr.kind == OPK_GLOBAL) {
- emit_global_addr(t, tmp_reg, addr.v.global.sym, addr.v.global.addend);
- *out_off = 0;
- return tmp_reg;
- }
- compiler_panic(t->c, a->loc, "aarch64 addr_base: unsupported kind %d",
- (int)addr.kind);
-}
-
-static void aa_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) {
- u32 sz = ma.size ? ma.size : type_byte_size(addr.type);
- u32 sidx = size_idx_for_bytes(sz);
-
- /* OPK_GLOBAL: ADRP scratch, sym ; LDR Wd, [scratch, #:lo12:sym].
- * The LO12_NC reloc requires the scaled-offset LDR encoding, not LDUR.
- *
- * Extern-via-GOT path: ADRP scratch, sym@GOTPAGE ;
- * LDR Xscratch, [scratch, #:gotoff:sym] ; LDUR Wd, [scratch, #addend]
- * The GOT load returns the symbol's runtime address; we then read the
- * value at +addend with a plain LDUR (no reloc, addend baked in). */
- if (addr.kind == OPK_GLOBAL) {
- MCEmitter* mc = t->mc;
- u32 sec = mc->section_id;
- ObjSymId sym = addr.v.global.sym;
- i64 add = addr.v.global.addend;
- if (use_got_for_sym(t, sym)) {
- emit_got_load_addr(t, /*dst=*/9, sym);
- if (dst.cls == RC_FP) {
- emit32(mc, aa64_ldur_fp(sidx, reg_num(dst), 9, (i32)add));
- } else {
- emit32(mc, aa64_ldur(sidx, reg_num(dst), 9, (i32)add));
- }
- return;
- }
- u32 adrp_pos = mc->pos(mc);
- emit32(mc, aa64_adrp_base(/*Rd=*/9));
- mc->emit_reloc_at(mc, sec, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21, sym, add,
- 0, 0);
- u32 ld_pos = mc->pos(mc);
- if (dst.cls == RC_FP) {
- emit32(mc, aa64_ldr_fp_uimm(sidx, reg_num(dst), 9, 0));
- } else {
- emit32(mc, aa64_ldr_uimm(sidx, reg_num(dst), 9, 0));
- }
- mc->emit_reloc_at(mc, sec, ld_pos, ldst_lo12_reloc_for(sz), sym, add, 0, 0);
- return;
- }
-
- i32 off;
- u32 base = addr_base(t, addr, &off, 9);
- if (dst.cls == RC_FP) {
- emit32(t->mc, aa64_ldur_fp(sidx, reg_num(dst), base, off));
- } else {
- emit32(t->mc, aa64_ldur(sidx, reg_num(dst), base, off));
- }
-}
-
-static void aa_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) {
- u32 sz = ma.size ? ma.size : type_byte_size(addr.type);
- u32 sidx = size_idx_for_bytes(sz);
-
- /* OPK_GLOBAL: ADRP scratch, sym ; STR Wt, [scratch, #:lo12:sym].
- * For OPK_IMM source, materialize the value first into x9, then use
- * x10 for the global base so the two scratches don't collide.
- *
- * Extern-via-GOT path: load the symbol's runtime address into the
- * base scratch via emit_got_load_addr, then STUR with addend baked
- * into the imm9 (no reloc on the store). */
- if (addr.kind == OPK_GLOBAL) {
- MCEmitter* mc = t->mc;
- u32 sec = mc->section_id;
- ObjSymId sym = addr.v.global.sym;
- i64 add = addr.v.global.addend;
-
- u32 src_reg;
- u32 src_is_fp = 0;
- if (src.kind == OPK_IMM) {
- u32 sf = (sz == 8) ? 1u : 0u;
- emit_load_imm(mc, sf, /*Rd=*/9, src.v.imm);
- src_reg = 9;
- } else if (src.cls == RC_FP) {
- src_reg = reg_num(src);
- src_is_fp = 1;
- } else {
- src_reg = reg_num(src);
- }
- u32 base = (src.kind == OPK_IMM) ? 10u : 9u;
- if (use_got_for_sym(t, sym)) {
- emit_got_load_addr(t, base, sym);
- if (src_is_fp) {
- emit32(mc, aa64_stur_fp(sidx, src_reg, base, (i32)add));
- } else {
- emit32(mc, aa64_stur(sidx, src_reg, base, (i32)add));
- }
- return;
- }
- u32 adrp_pos = mc->pos(mc);
- emit32(mc, aa64_adrp_base(base));
- mc->emit_reloc_at(mc, sec, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21, sym, add,
- 0, 0);
- u32 st_pos = mc->pos(mc);
- if (src_is_fp) {
- emit32(mc, aa64_str_fp_uimm(sidx, src_reg, base, 0));
- } else {
- emit32(mc, aa64_str_uimm(sidx, src_reg, base, 0));
- }
- mc->emit_reloc_at(mc, sec, st_pos, ldst_lo12_reloc_for(sz), sym, add, 0, 0);
- return;
- }
-
- i32 off;
- /* For OPK_IMM source we need x9 to materialize the value, so the
- * address synthesis (addr_base fallback) lands in x10. Otherwise x9
- * is free. */
- u32 addr_tmp = (src.kind == OPK_IMM) ? 10u : 9u;
- u32 base = addr_base(t, addr, &off, addr_tmp);
-
- if (src.kind == OPK_IMM) {
- /* Materialize through a scratch register. Use x9 (caller-saved). */
- u32 sf = (sz == 8) ? 1u : 0u;
- emit_load_imm(t->mc, sf, 9, src.v.imm);
- emit32(t->mc, aa64_stur(sidx, 9, base, off));
- return;
- }
- if (src.cls == RC_FP) {
- emit32(t->mc, aa64_stur_fp(sidx, reg_num(src), base, off));
- } else {
- emit32(t->mc, aa64_stur(sidx, reg_num(src), base, off));
- }
-}
-
-static void aa_addr_of(CGTarget* t, Operand dst, Operand lv) {
- AAImpl* a = impl_of(t);
- if (lv.kind == OPK_LOCAL) {
- AASlot* s = slot_get(a, lv.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "aarch64 addr_of: bad slot");
- /* dst = x29 - off */
- emit32(t->mc, aa64_sub_imm(1, reg_num(dst), 29, s->off, 0));
- return;
- }
- if (lv.kind == OPK_INDIRECT) {
- i32 ofs = lv.v.ind.ofs;
- u32 base = lv.v.ind.base & 0x1f;
- if (ofs == 0) {
- emit32(t->mc, aa64_mov_reg(1, reg_num(dst), base));
- } else if (ofs > 0 && ofs <= 0xfff) {
- emit32(t->mc, aa64_add_imm(1, reg_num(dst), base, (u32)ofs, 0));
- } else if (ofs < 0 && -ofs <= 0xfff) {
- emit32(t->mc, aa64_sub_imm(1, reg_num(dst), base, (u32)(-ofs), 0));
- } else {
- compiler_panic(t->c, a->loc,
- "aarch64 addr_of: indirect offset %d unsupported", ofs);
- }
- return;
- }
- if (lv.kind == OPK_GLOBAL) {
- /* ADRP Xd, sym ; ADD Xd, Xd, #:lo12:sym (with addend baked into both
- * relocations). Used to materialize a function or data pointer.
- *
- * Extern-via-GOT path: load the address from the GOT slot and then
- * apply the addend with a plain ADD/SUB (GOT relocs disallow addends). */
- u32 rd = reg_num(dst);
- ObjSymId sym = lv.v.global.sym;
- i64 addend = lv.v.global.addend;
- if (use_got_for_sym(t, sym)) {
- emit_got_load_addr(t, rd, sym);
- if (addend) emit_addr_adjust(t->mc, rd, rd, (i32)addend);
- return;
- }
- u32 sec = t->mc->section_id;
- u32 adrp_pos = t->mc->pos(t->mc);
- emit32(t->mc, aa64_adrp_base(rd));
- t->mc->emit_reloc_at(t->mc, sec, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21, sym,
- addend, 0, 0);
- u32 add_pos = t->mc->pos(t->mc);
- emit32(t->mc, aa64_add_imm(1, rd, rd, 0, 0));
- t->mc->emit_reloc_at(t->mc, sec, add_pos, R_AARCH64_ADD_ABS_LO12_NC, sym,
- addend, 0, 0);
- return;
- }
- aa_panic(t, "addr_of");
-}
-
-/* AArch64 TLS Local-Exec materialization.
- * mrs xtmp, tpidr_el0
- * add xdst, xtmp, #:tprel_hi12:sym, lsl #12
- * add xdst, xdst, #:tprel_lo12_nc:sym
- * The two ADDs carry HI12 / LO12_NC TLSLE relocations; the linker fills in
- * the per-target TP-relative offset (image offset + AARCH64_TCB_SIZE). */
-static void aa_tls_addr_of(CGTarget* t, Operand dst, ObjSymId sym, i64 addend) {
- MCEmitter* mc = t->mc;
- u32 sec = mc->section_id;
- u32 rd = reg_num(dst);
-
- /* Read thread pointer into x9 (scratch). */
- emit32(mc, aa64_mrs_tpidr_el0(/*Rt=*/9));
-
- /* add xdst, x9, #:tprel_hi12:sym, lsl #12 */
- u32 hi_pos = mc->pos(mc);
- emit32(mc, aa64_add_imm(/*sf=*/1, rd, /*Rn=*/9, /*imm12=*/0, /*sh=*/1));
- mc->emit_reloc_at(mc, sec, hi_pos, R_AARCH64_TLSLE_ADD_TPREL_HI12, sym,
- addend, 0, 0);
-
- /* add xdst, xdst, #:tprel_lo12_nc:sym */
- u32 lo_pos = mc->pos(mc);
- emit32(mc, aa64_add_imm(/*sf=*/1, rd, /*Rn=*/rd, /*imm12=*/0, /*sh=*/0));
- mc->emit_reloc_at(mc, sec, lo_pos, R_AARCH64_TLSLE_ADD_TPREL_LO12_NC, sym,
- addend, 0, 0);
-}
-
-/* Resolve a dst/src address operand for the aggregate ops below.
- * Accepts OPK_REG (already a pointer) and OPK_LOCAL (= fp - off);
- * for OPK_LOCAL we materialize the address into a scratch register. */
-static u32 agg_addr_reg(CGTarget* t, Operand op, u32 scratch) {
- if (op.kind == OPK_REG) return reg_num(op);
- if (op.kind == OPK_LOCAL) {
- AAImpl* a = impl_of(t);
- AASlot* s = slot_get(a, op.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "aarch64 agg: bad slot");
- emit32(t->mc, aa64_sub_imm(1, scratch, 29, s->off, 0));
- return scratch;
- }
- compiler_panic(t->c, impl_of(t)->loc,
- "aarch64 agg: address kind %d unsupported", (int)op.kind);
-}
-
-static void aa_copy_bytes(CGTarget* t, Operand dst_addr, Operand src_addr,
- AggregateAccess agg) {
- MCEmitter* mc = t->mc;
- u32 dr = agg_addr_reg(t, dst_addr, 9);
- u32 sr = agg_addr_reg(t, src_addr, (dr == 10) ? 11u : 10u);
- u32 nbytes = agg.size;
- u32 i = 0;
- /* Unrolled per-element copy through scratch x12. We use unscaled
- * LDUR/STUR so we don't depend on `agg.align` for legality. */
- while (i + 8 <= nbytes) {
- emit32(mc, aa64_ldur(3, 12, sr, (i32)i));
- emit32(mc, aa64_stur(3, 12, dr, (i32)i));
- i += 8;
- }
- while (i + 4 <= nbytes) {
- emit32(mc, aa64_ldur(2, 12, sr, (i32)i));
- emit32(mc, aa64_stur(2, 12, dr, (i32)i));
- i += 4;
- }
- while (i + 2 <= nbytes) {
- emit32(mc, aa64_ldur(1, 12, sr, (i32)i));
- emit32(mc, aa64_stur(1, 12, dr, (i32)i));
- i += 2;
- }
- while (i < nbytes) {
- emit32(mc, aa64_ldur(0, 12, sr, (i32)i));
- emit32(mc, aa64_stur(0, 12, dr, (i32)i));
- i += 1;
- }
-}
-
-static void aa_set_bytes(CGTarget* t, Operand dst_addr, Operand byte_value,
- AggregateAccess agg) {
- MCEmitter* mc = t->mc;
- u32 dr = agg_addr_reg(t, dst_addr, 9);
-
- u32 byte;
- if (byte_value.kind == OPK_IMM) {
- byte = (u32)(byte_value.v.imm & 0xffu);
- } else {
- compiler_panic(t->c, impl_of(t)->loc,
- "aarch64 set_bytes: REG byte not yet supported");
- }
- u32 nbytes = agg.size;
-
- if (byte == 0) {
- /* Use XZR/WZR directly — no broadcast register needed. */
- u32 i = 0;
- while (i + 8 <= nbytes) {
- emit32(mc, aa64_stur(3, 31, dr, (i32)i));
- i += 8;
- }
- while (i + 4 <= nbytes) {
- emit32(mc, aa64_stur(2, 31, dr, (i32)i));
- i += 4;
- }
- while (i + 2 <= nbytes) {
- emit32(mc, aa64_stur(1, 31, dr, (i32)i));
- i += 2;
- }
- while (i < nbytes) {
- emit32(mc, aa64_stur(0, 31, dr, (i32)i));
- i += 1;
- }
- return;
- }
-
- /* Broadcast byte into x12 then strided-store. */
- u64 b64 = byte;
- b64 |= b64 << 8;
- b64 |= b64 << 16;
- b64 |= b64 << 32;
- emit_load_imm(mc, /*sf=*/1u, /*Rd=*/12u, (i64)b64);
-
- u32 i = 0;
- while (i + 8 <= nbytes) {
- emit32(mc, aa64_stur(3, 12, dr, (i32)i));
- i += 8;
- }
- while (i + 4 <= nbytes) {
- emit32(mc, aa64_stur(2, 12, dr, (i32)i));
- i += 4;
- }
- while (i + 2 <= nbytes) {
- emit32(mc, aa64_stur(1, 12, dr, (i32)i));
- i += 2;
- }
- while (i < nbytes) {
- emit32(mc, aa64_stur(0, 12, dr, (i32)i));
- i += 1;
- }
-}
-
-static void aa_bitfield_load(CGTarget* t, Operand dst, Operand record_addr,
- BitFieldAccess bf) {
- MCEmitter* mc = t->mc;
- u32 base = agg_addr_reg(t, record_addr, 9);
- u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u;
- u32 sf = (storage_bytes == 8u) ? 1u : 0u;
- u32 sidx = size_idx_for_bytes(storage_bytes);
- u32 rd = reg_num(dst);
-
- /* Load the entire storage unit, then extract bf.bit_width bits at
- * bf.bit_offset. UBFX (zero-extend) or SBFX (sign-extend) per the
- * field's signedness. */
- emit32(mc, aa64_ldur(sidx, rd, base, (i32)bf.storage_offset));
- u32 lsb = bf.bit_offset;
- u32 width = bf.bit_width ? bf.bit_width : 1u;
- u32 imms = lsb + width - 1u;
- if (bf.signed_) {
- emit32(mc, aa64_sbfm(sf, rd, rd, lsb, imms));
- } else {
- emit32(mc, aa64_ubfm(sf, rd, rd, lsb, imms));
- }
-}
-
-static void aa_bitfield_store(CGTarget* t, Operand record_addr, Operand src,
- BitFieldAccess bf) {
- MCEmitter* mc = t->mc;
- u32 base = agg_addr_reg(t, record_addr, 9);
- u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u;
- u32 sf = (storage_bytes == 8u) ? 1u : 0u;
- u32 sidx = size_idx_for_bytes(storage_bytes);
-
- /* Read-modify-write through scratch registers x10 (storage) and x11
- * (the source value). */
- emit32(mc, aa64_ldur(sidx, /*Rt=*/10u, base, (i32)bf.storage_offset));
-
- u32 src_reg;
- if (src.kind == OPK_IMM) {
- emit_load_imm(mc, sf, /*Rd=*/11u, src.v.imm);
- src_reg = 11u;
- } else if (src.kind == OPK_REG) {
- src_reg = reg_num(src);
- } else {
- compiler_panic(t->c, impl_of(t)->loc,
- "aarch64 bitfield_store: src kind %d unsupported",
- (int)src.kind);
- }
-
- /* BFI Rd, Rn, #lsb, #width — insert width bits of Rn[0..width-1]
- * starting at bit lsb of Rd. Encoded as BFM with
- * immr = (RegSize - lsb) mod RegSize, imms = width - 1. */
- u32 reg_size = sf ? 64u : 32u;
- u32 lsb = bf.bit_offset;
- u32 width = bf.bit_width ? bf.bit_width : 1u;
- u32 immr = (reg_size - lsb) % reg_size;
- u32 imms = width - 1u;
- emit32(mc, aa64_bfm(sf, /*Rd=*/10u, src_reg, immr, imms));
-
- emit32(mc, aa64_stur(sidx, /*Rt=*/10u, base, (i32)bf.storage_offset));
-}
-
-/* ---- arithmetic ---- */
-
-/* Force an Operand into a register, materializing immediates via x9.
- * Returns the register number to use as Rn/Rm. */
-static u32 force_reg_int(CGTarget* t, Operand op, u32 sf, u32 scratch) {
- if (op.kind == OPK_REG) return reg_num(op);
- if (op.kind == OPK_IMM) {
- emit_load_imm(t->mc, sf, scratch, op.v.imm);
- return scratch;
- }
- compiler_panic(t->c, impl_of(t)->loc,
- "aarch64 binop: operand kind %d unsupported", (int)op.kind);
-}
-
-static void aa_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op,
- Operand b_op) {
- MCEmitter* mc = t->mc;
-
- /* FP binops route through scalar FADD/FSUB/FMUL/FDIV. */
- if (op == BO_FADD || op == BO_FSUB || op == BO_FMUL || op == BO_FDIV) {
- if (a_op.kind != OPK_REG || b_op.kind != OPK_REG || dst.cls != RC_FP) {
- compiler_panic(t->c, impl_of(t)->loc,
- "aarch64 binop: FP op requires REG operands");
- }
- u32 type = type_is_fp_double(dst.type) ? 1u : 0u;
- u32 rd = reg_num(dst);
- u32 rn = reg_num(a_op);
- u32 rm = reg_num(b_op);
- u32 w;
- switch (op) {
- case BO_FADD:
- w = aa64_fadd(type, rd, rn, rm);
- break;
- case BO_FSUB:
- w = aa64_fsub(type, rd, rn, rm);
- break;
- case BO_FMUL:
- w = aa64_fmul(type, rd, rn, rm);
- break;
- case BO_FDIV:
- w = aa64_fdiv(type, rd, rn, rm);
- break;
- default:
- w = 0;
- break; /* unreachable */
- }
- emit32(mc, w);
- return;
- }
-
- u32 sf = type_is_64(dst.type) ? 1u : 0u;
- u32 rd = reg_num(dst);
-
- /* Imm-form fast paths. For commutative ops (ADD/AND/OR/XOR), if the
- * LHS is the IMM swap to canonicalize (REG, IMM) and try to encode.
- * For SUB we don't swap — `SUB imm, reg` has no encoding without
- * materializing. Shifts take the imm as the count and require RHS-IMM
- * by definition. Anything that doesn't fit the encoding falls through
- * to force_reg_int + the shifted-register form, preserving the old
- * behavior. */
- u32 word;
- switch (op) {
- case BO_IADD:
- case BO_AND:
- case BO_OR:
- case BO_XOR: {
- if (a_op.kind == OPK_IMM && b_op.kind != OPK_IMM) {
- Operand t_op = a_op; a_op = b_op; b_op = t_op;
- }
- break;
- }
- default: break;
- }
-
- /* Try the imm-form before materializing. Each case sets `word` and
- * jumps to emit; misses fall through to the reg path below. */
- if (b_op.kind == OPK_IMM && a_op.kind != OPK_IMM) {
- u32 rn_reg = reg_num(a_op);
- i64 imm = b_op.v.imm;
- u32 imm12, sh, N, immr, imms;
- switch (op) {
- case BO_IADD:
- if (aa64_addsub_imm_fits(imm, &imm12, &sh)) {
- emit32(mc, aa64_add_imm(sf, rd, rn_reg, imm12, sh));
- return;
- }
- break;
- case BO_ISUB:
- if (aa64_addsub_imm_fits(imm, &imm12, &sh)) {
- emit32(mc, aa64_sub_imm(sf, rd, rn_reg, imm12, sh));
- return;
- }
- break;
- case BO_AND:
- if (aa64_logimm_encode((u64)imm, sf, &N, &immr, &imms)) {
- emit32(mc, aa64_and_imm(sf, rd, rn_reg, N, immr, imms));
- return;
- }
- break;
- case BO_OR:
- if (aa64_logimm_encode((u64)imm, sf, &N, &immr, &imms)) {
- emit32(mc, aa64_orr_imm(sf, rd, rn_reg, N, immr, imms));
- return;
- }
- break;
- case BO_XOR:
- if (aa64_logimm_encode((u64)imm, sf, &N, &immr, &imms)) {
- emit32(mc, aa64_eor_imm(sf, rd, rn_reg, N, immr, imms));
- return;
- }
- break;
- case BO_SHL: {
- /* C shifts by ≥ width are UB but we don't exploit it; mask the
- * count to width-1 to match the variable-shift behavior. */
- u32 width = sf ? 64u : 32u;
- u32 sh_amt = (u32)((u64)imm & (width - 1u));
- if (aa64_lsl_imm_fields(sh_amt, sf, &immr, &imms)) {
- emit32(mc, aa64_ubfm(sf, rd, rn_reg, immr, imms));
- return;
- }
- break;
- }
- case BO_SHR_U: {
- u32 width = sf ? 64u : 32u;
- u32 sh_amt = (u32)((u64)imm & (width - 1u));
- if (aa64_lsr_imm_fields(sh_amt, sf, &immr, &imms)) {
- emit32(mc, aa64_ubfm(sf, rd, rn_reg, immr, imms));
- return;
- }
- break;
- }
- case BO_SHR_S: {
- u32 width = sf ? 64u : 32u;
- u32 sh_amt = (u32)((u64)imm & (width - 1u));
- if (aa64_asr_imm_fields(sh_amt, sf, &immr, &imms)) {
- emit32(mc, aa64_sbfm(sf, rd, rn_reg, immr, imms));
- return;
- }
- break;
- }
- default: break;
- }
- }
-
- u32 rn = force_reg_int(t, a_op, sf, 9);
- u32 rm = force_reg_int(t, b_op, sf, (rn == 9) ? 10 : 9);
-
- switch (op) {
- case BO_IADD:
- word = aa64_add(sf, rd, rn, rm);
- break;
- case BO_ISUB:
- word = aa64_sub(sf, rd, rn, rm);
- break;
- case BO_IMUL:
- word = aa64_mul(sf, rd, rn, rm);
- break;
- case BO_AND:
- word = aa64_and(sf, rd, rn, rm);
- break;
- case BO_OR:
- word = aa64_orr(sf, rd, rn, rm);
- break;
- case BO_XOR:
- word = aa64_eor(sf, rd, rn, rm);
- break;
- case BO_SHL:
- word = aa64_lslv(sf, rd, rn, rm);
- break;
- case BO_SHR_U:
- word = aa64_lsrv(sf, rd, rn, rm);
- break;
- case BO_SHR_S:
- word = aa64_asrv(sf, rd, rn, rm);
- break;
- case BO_UDIV:
- word = aa64_udiv(sf, rd, rn, rm);
- break;
- case BO_SDIV:
- word = aa64_sdiv(sf, rd, rn, rm);
- break;
- /* rem = a - (a/b)*b → SDIV/UDIV into x11, then MSUB rd, x11, b, a. */
- case BO_SREM:
- emit32(mc, aa64_sdiv(sf, 11, rn, rm));
- word = aa64_msub(sf, rd, 11, rm, rn);
- break;
- case BO_UREM:
- emit32(mc, aa64_udiv(sf, 11, rn, rm));
- word = aa64_msub(sf, rd, 11, rm, rn);
- break;
- case BO_FADD:
- case BO_FSUB:
- case BO_FMUL:
- case BO_FDIV:
- default:
- compiler_panic(t->c, impl_of(t)->loc, "aarch64 binop: op %d unimpl",
- (int)op);
- }
- emit32(mc, word);
-}
-
-static void aa_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) {
- MCEmitter* mc = t->mc;
- u32 sf = type_is_64(dst.type) ? 1u : 0u;
- u32 rd = reg_num(dst);
- /* OPK_IMM is legal per the CGTarget contract (arch.h); force_reg_int
- * materializes into x9 when the operand isn't already a register.
- * cg folds literal unops upstream (cg_fold_unop), so the IMM path
- * here is only reached from opt's emit when the IR carries an
- * unfolded literal — still a contract case we must honor. */
- u32 rn = force_reg_int(t, a_op, sf, 9);
- u32 word;
-
- switch (op) {
- case UO_NEG:
- word = aa64_neg(sf, rd, rn);
- break;
- case UO_BNOT:
- word = aa64_mvn(sf, rd, rn);
- break;
- case UO_NOT:
- /* !x → cmp Xn, #0 ; cset Xd, EQ */
- emit32(mc, aa64_subs_imm(sf, /*ZR=*/31, rn, 0));
- word = aa64_cset_eq(sf, rd);
- break;
- default:
- compiler_panic(t->c, impl_of(t)->loc, "aarch64 unop: op %d unimpl",
- (int)op);
- }
- emit32(mc, word);
-}
-
-static void aa_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) {
- AAImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
- u32 rd = reg_num(dst);
- u32 rn = reg_num(src);
-
- switch (k) {
- case CV_SEXT: {
- if (src.cls != RC_INT || dst.cls != RC_INT) {
- compiler_panic(t->c, a->loc, "aarch64 convert SEXT: bad classes");
- }
- u32 src_bits = type_byte_size(src.type) * 8u;
- u32 sf_dst = type_is_64(dst.type) ? 1u : 0u;
- emit32(mc, aa64_sbfm(sf_dst, rd, rn, /*immr=*/0, /*imms=*/src_bits - 1u));
- return;
- }
- case CV_ZEXT: {
- if (src.cls != RC_INT || dst.cls != RC_INT) {
- compiler_panic(t->c, a->loc, "aarch64 convert ZEXT: bad classes");
- }
- u32 src_bits = type_byte_size(src.type) * 8u;
- if (src_bits == 32u) {
- /* MOV Wd, Wn auto-zero-extends into the X register. */
- emit32(mc, aa64_mov_reg(0, rd, rn));
- } else {
- emit32(mc, aa64_ubfm(0, rd, rn, /*immr=*/0, /*imms=*/src_bits - 1u));
- }
- return;
- }
- case CV_TRUNC: {
- /* Reading the W view of any X register zeros the upper 32 bits.
- * For narrower truncations the consumer (store / ret) selects
- * the byte width — leaving extra high bits is harmless. */
- emit32(mc, aa64_mov_reg(0, rd, rn));
- return;
- }
- case CV_ITOF_S: {
- u32 sf_src = type_is_64(src.type) ? 1u : 0u;
- u32 type = type_is_fp_double(dst.type) ? 1u : 0u;
- emit32(mc, aa64_scvtf(sf_src, type, rd, rn));
- return;
- }
- case CV_ITOF_U: {
- u32 sf_src = type_is_64(src.type) ? 1u : 0u;
- u32 type = type_is_fp_double(dst.type) ? 1u : 0u;
- emit32(mc, aa64_ucvtf(sf_src, type, rd, rn));
- return;
- }
- case CV_FTOI_S: {
- if (src.cls != RC_FP || dst.cls != RC_INT) {
- compiler_panic(t->c, a->loc, "aarch64 convert FTOI_S: bad classes");
- }
- u32 sf = type_is_64(dst.type) ? 1u : 0u;
- u32 type = type_is_fp_double(src.type) ? 1u : 0u;
- emit32(mc, aa64_fcvtzs(sf, type, rd, rn));
- return;
- }
- case CV_FTOI_U: {
- if (src.cls != RC_FP || dst.cls != RC_INT) {
- compiler_panic(t->c, a->loc, "aarch64 convert FTOI_U: bad classes");
- }
- u32 sf = type_is_64(dst.type) ? 1u : 0u;
- u32 type = type_is_fp_double(src.type) ? 1u : 0u;
- emit32(mc, aa64_fcvtzu(sf, type, rd, rn));
- return;
- }
- case CV_FEXT: {
- /* float (S) → double (D). */
- emit32(mc, aa64_fcvt_d_s(rd, rn));
- return;
- }
- case CV_FTRUNC: {
- /* double (D) → float (S). */
- emit32(mc, aa64_fcvt_s_d(rd, rn));
- return;
- }
- case CV_BITCAST: {
- /* Same-size cross-class reinterpret (i32↔f32, i64↔f64). */
- if (src.cls == RC_INT && dst.cls == RC_FP) {
- u32 sz = type_byte_size(dst.type);
- emit32(mc, sz == 8 ? aa64_fmov_d_x(rd, rn) : aa64_fmov_s_w(rd, rn));
- } else if (src.cls == RC_FP && dst.cls == RC_INT) {
- u32 sz = type_byte_size(src.type);
- emit32(mc, sz == 8 ? aa64_fmov_x_d(rd, rn) : aa64_fmov_w_s(rd, rn));
- } else {
- compiler_panic(t->c, a->loc,
- "aarch64 convert BITCAST: same-class not yet supported");
- }
- return;
- }
- default:
- compiler_panic(t->c, a->loc, "aarch64 convert kind %d unimpl", (int)k);
- }
-}
-
-/* ---- calls / return ---- */
-
-/* Materialize a CGABIValue into the outgoing argument slots: register
- * arguments go to x0..x7 / v0..v7; overflow goes to [sp, #stack_off].
- * For BYVAL/INDIRECT the caller's `storage` is the address of the source
- * data; we either load chunks into the next register pair (DIRECT
- * aggregate) or pass the address itself (INDIRECT). */
-static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi,
- const CGABIValue* av, u32* next_int, u32* next_fp,
- u32* stack_off) {
- AAImpl* a = impl_of(t);
- /* Synthesize a one-part DIRECT ABIArgInfo for var args (av->abi is NULL
- * past the fixed-param count). AAPCS64 routes var args through the same
- * register/stack rules as fixed scalars, so this matches what
- * abi_func_info would have produced.
- *
- * Apple ARM64 diverges: variadic args go on the stack only. The
- * ABIFuncInfo.vararg_on_stack trait carries that policy out of the
- * backend — we bump the next-int / next-fp cursors past the register
- * pool so the part below falls through to stack placement. */
- ABIArgInfo va_ai;
- ABIArgPart va_pt;
- const ABIArgInfo* ai = av->abi;
- if (!ai) {
- u32 sz = type_byte_size(av->type);
- memset(&va_ai, 0, sizeof va_ai);
- memset(&va_pt, 0, sizeof va_pt);
- va_ai.kind = ABI_ARG_DIRECT;
- va_ai.parts = &va_pt;
- va_ai.nparts = 1;
- va_pt.cls = (av->storage.cls == RC_FP) ? ABI_CLASS_FP : ABI_CLASS_INT;
- va_pt.size = sz;
- va_pt.align = sz;
- va_pt.src_offset = 0;
- ai = &va_ai;
- if (fi && fi->vararg_on_stack) {
- *next_int = 8;
- *next_fp = 8;
- }
- }
- if (ai->kind == ABI_ARG_IGNORE) return;
-
- if (ai->kind == ABI_ARG_INDIRECT) {
- /* Pass the address of the storage. storage is OPK_LOCAL holding
- * the byval source. */
- u32 dst_reg;
- int to_stack = (*next_int >= 8);
- if (!to_stack)
- dst_reg = (*next_int)++;
- else
- dst_reg = 9;
- if (av->storage.kind == OPK_LOCAL) {
- AASlot* s = slot_get(a, av->storage.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "aarch64 call: bad byval slot");
- emit32(t->mc, aa64_sub_imm(1, dst_reg, 29, s->off, 0));
- } else if (av->storage.kind == OPK_INDIRECT) {
- /* BYVAL from an indirect lvalue: pass the address `base + ind.ofs`
- * itself in the arg register. */
- emit_addr_adjust(t->mc, dst_reg, av->storage.v.ind.base & 0x1f,
- av->storage.v.ind.ofs);
- } else {
- compiler_panic(t->c, a->loc,
- "aarch64 call: INDIRECT arg storage kind %d unsupported",
- (int)av->storage.kind);
- }
- if (to_stack) {
- emit32(t->mc, aa64_str_uimm(3, dst_reg, 31, *stack_off));
- *stack_off += 8;
- }
- return;
- }
-
- /* DIRECT — possibly multiple parts. */
- for (u16 i = 0; i < ai->nparts; ++i) {
- const ABIArgPart* pt = &ai->parts[i];
- u32 sz = pt->size;
- u32 sidx = size_idx_for_bytes(sz);
-
- if (pt->cls == ABI_CLASS_INT) {
- int to_stack = (*next_int >= 8);
- u32 dst_reg = to_stack ? 9u : (*next_int)++;
- /* Source bits for this part. */
- switch (av->storage.kind) {
- case OPK_IMM: {
- u32 sf = (sz == 8) ? 1u : 0u;
- emit_load_imm(t->mc, sf, dst_reg, av->storage.v.imm);
- break;
- }
- case OPK_REG: {
- u32 sf = (sz == 8) ? 1u : 0u;
- emit32(t->mc, aa64_mov_reg(sf, dst_reg, reg_num(av->storage)));
- break;
- }
- case OPK_LOCAL: {
- /* BYVAL aggregate carried in registers: load chunks from
- * the source local's address + part->src_offset. */
- AASlot* s = slot_get(a, av->storage.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "aarch64 call: bad arg slot");
- i32 off = -(i32)s->off + (i32)pt->src_offset;
- emit32(t->mc, aa64_ldur(sidx, dst_reg, 29, off));
- break;
- }
- case OPK_INDIRECT: {
- /* Source is `[base + ind.ofs]`. Load each part from
- * `[base, ind.ofs + part->src_offset]`. The cg layer hands out
- * INDIRECT base regs from the callee-saved pool (x19..x28),
- * which is disjoint from arg regs (x0..x7) and the x9 scratch,
- * so the base survives every iteration of the part loop. */
- Operand src;
- memset(&src, 0, sizeof src);
- src.kind = OPK_INDIRECT;
- src.v.ind.base = av->storage.v.ind.base;
- src.v.ind.ofs = av->storage.v.ind.ofs + (i32)pt->src_offset;
- i32 off;
- u32 base = addr_base(t, src, &off, /*tmp=*/9);
- emit32(t->mc, aa64_ldur(sidx, dst_reg, base, off));
- break;
- }
- default:
- compiler_panic(t->c, a->loc,
- "aarch64 call: arg storage kind %d unsupported",
- (int)av->storage.kind);
- }
- if (to_stack) {
- emit32(t->mc, aa64_str_uimm(3, dst_reg, 31, *stack_off));
- *stack_off += 8;
- }
- } else if (pt->cls == ABI_CLASS_FP) {
- int to_stack = (*next_fp >= 8);
- if (!to_stack) {
- u32 dst_reg = (*next_fp)++;
- switch (av->storage.kind) {
- case OPK_REG: {
- u32 type = (sz == 8) ? 1u : 0u;
- emit32(t->mc, aa64_fmov_reg(type, dst_reg, reg_num(av->storage)));
- break;
- }
- case OPK_INDIRECT: {
- Operand src;
- memset(&src, 0, sizeof src);
- src.kind = OPK_INDIRECT;
- src.v.ind.base = av->storage.v.ind.base;
- src.v.ind.ofs = av->storage.v.ind.ofs + (i32)pt->src_offset;
- i32 off;
- u32 base = addr_base(t, src, &off, /*tmp=*/9);
- emit32(t->mc, aa64_ldur_fp(sidx, dst_reg, base, off));
- break;
- }
- default:
- compiler_panic(t->c, a->loc,
- "aarch64 call: FP arg storage kind %d unsupported",
- (int)av->storage.kind);
- }
- } else {
- /* Store source FP reg directly into the stack slot — going
- * through v0/v1 would corrupt args already placed in the
- * register save area. */
- switch (av->storage.kind) {
- case OPK_REG:
- emit32(t->mc, aa64_stur_fp(sidx, reg_num(av->storage), 31,
- (i32)*stack_off));
- break;
- case OPK_INDIRECT: {
- /* No direct mem-to-mem on aa64: route through a caller-saved
- * scratch FP reg (v16) to avoid clobbering v0..v7 already
- * loaded with earlier FP args. */
- Operand src;
- memset(&src, 0, sizeof src);
- src.kind = OPK_INDIRECT;
- src.v.ind.base = av->storage.v.ind.base;
- src.v.ind.ofs = av->storage.v.ind.ofs + (i32)pt->src_offset;
- i32 off;
- u32 base = addr_base(t, src, &off, /*tmp=*/9);
- emit32(t->mc, aa64_ldur_fp(sidx, /*Vt=*/16u, base, off));
- emit32(t->mc, aa64_stur_fp(sidx, /*Vt=*/16u, 31, (i32)*stack_off));
- break;
- }
- default:
- compiler_panic(
- t->c, a->loc,
- "aarch64 call: FP stack-arg storage kind %d unsupported",
- (int)av->storage.kind);
- }
- *stack_off += 8;
- }
- } else {
- compiler_panic(t->c, a->loc, "aarch64 call: ABI class %d unimpl",
- (int)pt->cls);
- }
- }
-}
-
-static void aa_call(CGTarget* t, const CGCallDesc* d) {
- AAImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
-
- /* Pass 1: classify args, set up argument registers/stack. */
- u32 next_int = 0, next_fp = 0, stack_off = 0;
-
- /* sret: caller passes destination pointer in x8. */
- if (d->abi && d->abi->has_sret) {
- if (d->ret.storage.kind != OPK_LOCAL) {
- compiler_panic(t->c, a->loc,
- "aarch64 call: sret destination must be LOCAL");
- }
- AASlot* s = slot_get(a, d->ret.storage.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "aarch64 call: bad sret slot");
- emit32(mc, aa64_sub_imm(1, 8, 29, s->off, 0));
- }
-
- for (u32 i = 0; i < d->nargs; ++i) {
- emit_arg_value(t, d->abi, &d->args[i], &next_int, &next_fp, &stack_off);
- }
-
- /* Track outgoing-arg high-water mark, 16-aligned. */
- u32 needed = (stack_off + 15u) & ~15u;
- if (needed > a->max_outgoing) a->max_outgoing = needed;
-
- /* Direct (BL <sym>) vs. indirect (BLR Xn). */
- if (d->callee.kind == OPK_GLOBAL) {
- u32 bl_pos = mc->pos(mc);
- emit32(mc, aa64_bl_base());
- mc->emit_reloc_at(mc, mc->section_id, bl_pos, R_AARCH64_CALL26,
- d->callee.v.global.sym, d->callee.v.global.addend, 0, 0);
- } else if (d->callee.kind == OPK_REG) {
- emit32(mc, aa64_blr(reg_num(d->callee)));
- } else {
- compiler_panic(t->c, a->loc, "aarch64 call: callee kind %d unsupported",
- (int)d->callee.kind);
- }
-
- /* Receive return value. */
- const ABIArgInfo* ri = &d->abi->ret;
- if (ri->kind == ABI_ARG_IGNORE || ri->kind == ABI_ARG_INDIRECT) {
- /* Nothing to copy — sret was placed directly into the dst slot. */
- return;
- }
- if (ri->nparts == 0) return;
-
- Operand rs = d->ret.storage;
- /* Walk parts; INT parts come from x0, x1, ...; FP parts from v0, v1, .... */
- u32 next_int_ret = 0, next_fp_ret = 0;
- for (u16 i = 0; i < ri->nparts; ++i) {
- const ABIArgPart* p = &ri->parts[i];
- u32 src_reg;
- if (p->cls == ABI_CLASS_INT) {
- src_reg = next_int_ret++;
- } else if (p->cls == ABI_CLASS_FP) {
- src_reg = next_fp_ret++;
- } else {
- compiler_panic(t->c, a->loc, "aarch64 call: ret part cls %d unimpl",
- (int)p->cls);
- }
-
- if (rs.kind == OPK_REG) {
- if (ri->nparts != 1) {
- compiler_panic(t->c, a->loc,
- "aarch64 call: REG ret_storage with %u parts",
- (unsigned)ri->nparts);
- }
- if (p->cls == ABI_CLASS_INT) {
- u32 sf = (p->size == 8) ? 1u : 0u;
- emit32(mc, aa64_mov_reg(sf, reg_num(rs), src_reg));
- } else {
- u32 type = (p->size == 8) ? 1u : 0u;
- emit32(mc, aa64_fmov_reg(type, reg_num(rs), src_reg));
- }
- } else if (rs.kind == OPK_LOCAL || rs.kind == OPK_INDIRECT) {
- u32 base_reg;
- i32 base_off;
- if (rs.kind == OPK_LOCAL) {
- AASlot* s = slot_get(a, rs.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "aarch64 call: bad ret slot");
- base_reg = 29;
- base_off = -(i32)s->off;
- } else {
- base_reg = rs.v.ind.base & 0x1f;
- base_off = rs.v.ind.ofs;
- }
- u32 sidx = size_idx_for_bytes(p->size);
- i32 off = base_off + (i32)p->src_offset;
- if (p->cls == ABI_CLASS_INT) {
- emit32(mc, aa64_stur(sidx, src_reg, base_reg, off));
- } else {
- emit32(mc, aa64_stur_fp(sidx, src_reg, base_reg, off));
- }
- } else if (rs.kind == OPK_IMM && rs.type && rs.type->kind == TY_VOID) {
- /* Void return placeholder — nothing to do. */
- } else {
- compiler_panic(t->c, a->loc,
- "aarch64 call: ret_storage kind %d unsupported",
- (int)rs.kind);
- }
- }
-}
-
-/* Materialize the return value, then branch to the function epilogue. */
-static void aa_ret(CGTarget* t, const CGABIValue* val) {
- AAImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
-
- if (val) {
- const ABIArgInfo* ri = val->abi;
- if (ri && ri->kind == ABI_ARG_INDIRECT) {
- /* sret: caller passed the destination pointer in x8 at entry,
- * which we spilled into sret_ptr_slot. Reload x8 from there,
- * then memcpy the source storage into [x8]. */
- if (val->storage.kind == OPK_LOCAL) {
- AASlot* s = slot_get(a, val->storage.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "aarch64 ret: bad sret slot");
- if (a->sret_ptr_slot != FRAME_SLOT_NONE) {
- AASlot* sp = slot_get(a, a->sret_ptr_slot);
- if (sp) emit32(mc, aa64_ldur(3, 8, 29, -(i32)sp->off));
- }
- u32 nbytes = s->size;
- u32 i = 0;
- while (i + 8 <= nbytes) {
- emit32(mc, aa64_ldur(3, 9, 29, -(i32)s->off + (i32)i));
- emit32(mc, aa64_str_uimm(3, 9, 8, i));
- i += 8;
- }
- while (i + 4 <= nbytes) {
- emit32(mc, aa64_ldur(2, 9, 29, -(i32)s->off + (i32)i));
- emit32(mc, aa64_str_uimm(2, 9, 8, i));
- i += 4;
- }
- while (i + 2 <= nbytes) {
- emit32(mc, aa64_ldur(1, 9, 29, -(i32)s->off + (i32)i));
- emit32(mc, aa64_str_uimm(1, 9, 8, i));
- i += 2;
- }
- while (i < nbytes) {
- emit32(mc, aa64_ldur(0, 9, 29, -(i32)s->off + (i32)i));
- emit32(mc, aa64_str_uimm(0, 9, 8, i));
- i += 1;
- }
- } else if (val->storage.kind == OPK_INDIRECT) {
- /* sret memcpy from `[base + ind.ofs]` into [x8]. cg populates
- * `val->size` with the aggregate byte count. */
- u32 nbytes = val->size;
- if (!nbytes) {
- compiler_panic(t->c, a->loc,
- "aarch64 ret indirect: missing aggregate size");
- }
- if (a->sret_ptr_slot != FRAME_SLOT_NONE) {
- AASlot* sp = slot_get(a, a->sret_ptr_slot);
- if (sp) emit32(mc, aa64_ldur(3, 8, 29, -(i32)sp->off));
- }
- u32 base_reg = val->storage.v.ind.base & 0x1f;
- i32 base_off = val->storage.v.ind.ofs;
- u32 i = 0;
- while (i + 8 <= nbytes) {
- emit32(mc, aa64_ldur(3, 9, base_reg, base_off + (i32)i));
- emit32(mc, aa64_str_uimm(3, 9, 8, i));
- i += 8;
- }
- while (i + 4 <= nbytes) {
- emit32(mc, aa64_ldur(2, 9, base_reg, base_off + (i32)i));
- emit32(mc, aa64_str_uimm(2, 9, 8, i));
- i += 4;
- }
- while (i + 2 <= nbytes) {
- emit32(mc, aa64_ldur(1, 9, base_reg, base_off + (i32)i));
- emit32(mc, aa64_str_uimm(1, 9, 8, i));
- i += 2;
- }
- while (i < nbytes) {
- emit32(mc, aa64_ldur(0, 9, base_reg, base_off + (i32)i));
- emit32(mc, aa64_str_uimm(0, 9, 8, i));
- i += 1;
- }
- } else {
- compiler_panic(t->c, a->loc,
- "aarch64 ret indirect: storage kind %d unsupported",
- (int)val->storage.kind);
- }
- } else if (val->storage.kind == OPK_REG) {
- if (val->storage.cls == RC_FP) {
- u32 type = type_is_fp_double(val->storage.type) ? 1u : 0u;
- emit32(mc, aa64_fmov_reg(type, /*Rd=*/0, reg_num(val->storage)));
- } else {
- u32 sf = type_is_64(val->storage.type) ? 1u : 0u;
- emit32(mc, aa64_mov_reg(sf, /*Rd=*/0, reg_num(val->storage)));
- }
- } else if (val->storage.kind == OPK_IMM) {
- u32 sf = type_is_64(val->storage.type) ? 1u : 0u;
- emit_load_imm(mc, sf, /*Rd=*/0, val->storage.v.imm);
- } else if (val->storage.kind == OPK_LOCAL ||
- val->storage.kind == OPK_INDIRECT) {
- /* DIRECT return whose source is a local or an indirect lvalue:
- * load each part into x0/x1 (or v0/v1) per the ABI classification.
- * cg hands out INDIRECT base regs from x19..x28, disjoint from the
- * x0/x1 (v0/v1) return regs, so the base survives the part loop. */
- u32 base_reg;
- i32 base_off;
- if (val->storage.kind == OPK_LOCAL) {
- AASlot* s = slot_get(a, val->storage.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "aarch64 ret: bad local slot");
- base_reg = 29; /* fp */
- base_off = -(i32)s->off;
- } else {
- base_reg = val->storage.v.ind.base & 0x1f;
- base_off = val->storage.v.ind.ofs;
- }
- const ABIArgInfo* ri = val->abi;
- for (u16 i = 0; i < (ri ? ri->nparts : 0); ++i) {
- const ABIArgPart* pt = &ri->parts[i];
- u32 sidx = size_idx_for_bytes(pt->size);
- i32 off = base_off + (i32)pt->src_offset;
- if (pt->cls == ABI_CLASS_INT) {
- emit32(mc, aa64_ldur(sidx, /*Rt=*/i, base_reg, off));
- } else if (pt->cls == ABI_CLASS_FP) {
- emit32(mc, aa64_ldur_fp(sidx, /*Rt=*/i, base_reg, off));
- } else {
- compiler_panic(t->c, a->loc, "aarch64 ret: ret part cls %d unimpl",
- (int)pt->cls);
- }
- }
- }
- }
- /* Branch to the epilogue. mc->emit_label_ref records a fixup that
- * resolves to a JUMP26-encoded displacement when the label is placed. */
- u32 bpos = mc->pos(mc);
- emit32(mc, aa64_b_base());
- mc->emit_label_ref(mc, a->epilogue_label, R_AARCH64_JUMP26, 4, 0);
- (void)bpos;
-}
-
-/* Dynamic stack allocation. Layout: outgoing-args (max_outgoing bytes,
- * 16-aligned) sit at the bottom of SP; the alloca block goes immediately
- * above. After lowering SP by an aligned size, the new block's address is
- * (SP + max_outgoing). max_outgoing is only known at func_end, so each
- * alloca emits a placeholder `ADD dst, SP, #0` and registers a patch site;
- * func_end rewrites the imm12 with the final max_outgoing. */
-static void aa_alloca_(CGTarget* t, Operand d, Operand sz, u32 align) {
- AAImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
-
- if (d.kind != OPK_REG) {
- compiler_panic(t->c, a->loc, "aarch64 alloca: dst must be REG");
- }
- /* SP is 16-aligned and we lower it by an aligned amount, so result
- * inherits 16-byte alignment. Larger requests would need an
- * additional mask on the result; reject so we notice. */
- if (align > 16) {
- compiler_panic(t->c, a->loc,
- "aarch64 alloca: align %u > 16 not yet supported", align);
- }
-
- if (sz.kind == OPK_IMM) {
- i64 v = sz.v.imm;
- if (v < 0) {
- compiler_panic(t->c, a->loc, "aarch64 alloca: negative size");
- }
- u64 aligned = ((u64)v + 15u) & ~(u64)15u;
- if (aligned == 0) aligned = 16; /* keep SP changing */
- if (aligned > 0xfffu) {
- compiler_panic(t->c, a->loc,
- "aarch64 alloca: const size %llu too large for v1",
- (unsigned long long)aligned);
- }
- emit32(mc, aa64_sub_imm(1, /*Rd=SP*/ 31, /*Rn=SP*/ 31, (u32)aligned, 0));
- } else if (sz.kind == OPK_REG) {
- /* Round size up to a 16-byte multiple, then `sub sp, sp, x9`
- * (extended-register form so Rd/Rn=SP work). */
- u32 sz_reg = reg_num(sz);
- emit32(mc, aa64_add_imm(1, 9, sz_reg, 15u, 0)); /* x9 = size+15 */
- emit32(mc, aa64_ubfm(1, 9, 9, 4, 63)); /* lsr x9, x9, #4 */
- emit32(mc, aa64_ubfm(1, 9, 9, 60, 59)); /* lsl x9, x9, #4 */
- emit32(mc, aa64_sub_extreg_x_uxtx(/*SP*/ 31, /*SP*/ 31, 9));
- } else {
- compiler_panic(t->c, a->loc, "aarch64 alloca: size kind %d unsupported",
- (int)sz.kind);
- }
-
- /* Placeholder ADD dst, SP, #<max_outgoing>. Patched at func_end. */
- if (a->nadd_patches == a->add_patches_cap) {
- u32 ncap = a->add_patches_cap ? a->add_patches_cap * 2 : 4;
- struct AAAllocaPatch* nb =
- arena_array(t->c->tu, struct AAAllocaPatch, ncap);
- if (a->add_patches)
- memcpy(nb, a->add_patches, sizeof(*nb) * a->nadd_patches);
- a->add_patches = nb;
- a->add_patches_cap = ncap;
- }
- u32 dst_reg = reg_num(d);
- a->add_patches[a->nadd_patches].pos = mc->pos(mc);
- a->add_patches[a->nadd_patches].dst_reg = dst_reg;
- a->nadd_patches++;
- emit32(mc, aa64_add_imm(1, dst_reg, /*Rn=SP*/ 31, 0, 0));
- a->has_alloca = 1;
-}
-/* AAPCS64 va_list (32 bytes):
- * off 0 void* __stack next stack-passed var arg
- * off 8 void* __gr_top one past end of GP save area
- * off 16 void* __vr_top one past end of FP save area
- * off 24 int __gr_offs current GP offset (negative; >= 0 → use stack)
- * off 28 int __vr_offs current FP offset (negative; >= 0 → use stack)
- *
- * va_start populates the struct from the function's reg-save areas and
- * the named-param consumption already tracked on AAImpl. va_arg dispatches
- * by RegClass: int args walk the GP save area at 8-byte stride; FP args
- * walk the FP save area at 16-byte stride (q-register-sized slots). When
- * the offset reaches 0, fall through to the stack at 8-byte stride. */
-static void emit_fp_off(MCEmitter* mc, u32 dst, i32 ofs) {
- if (ofs == 0)
- emit32(mc, aa64_mov_reg(1, dst, 29));
- else if (ofs > 0 && (u32)ofs <= 0xfff)
- emit32(mc, aa64_add_imm(1, dst, 29, (u32)ofs, 0));
- else if (ofs < 0 && (u32)(-ofs) <= 0xfff)
- emit32(mc, aa64_sub_imm(1, dst, 29, (u32)(-ofs), 0));
- else {
- emit_load_imm(mc, 1, dst, ofs);
- emit32(mc, aa64_add(1, dst, 29, dst));
- }
-}
-
-static void aa_va_start_(CGTarget* t, Operand ap_op) {
- AAImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
- if (!a->is_variadic) {
- compiler_panic(t->c, a->loc, "aarch64 va_start: function not variadic");
- }
- u32 ap = reg_num(ap_op);
- AASlot* gs = slot_get(a, a->gp_save_slot);
- AASlot* fs = slot_get(a, a->fp_save_slot);
-
- /* __stack = fp + 16 + named-stack-args-bytes */
- {
- u32 ofs = 16u + a->next_param_stack;
- if (ofs <= 0xfff)
- emit32(mc, aa64_add_imm(1, 9, 29, ofs, 0));
- else {
- emit_load_imm(mc, 1, 9, (i64)ofs);
- emit32(mc, aa64_add(1, 9, 29, 9));
- }
- emit32(mc, aa64_str_uimm(3, 9, ap, 0));
- }
- /* __gr_top = fp - gs->off + gs->size */
- emit_fp_off(mc, 9, -(i32)gs->off + (i32)gs->size);
- emit32(mc, aa64_str_uimm(3, 9, ap, 8));
- /* __vr_top = fp - fs->off + fs->size */
- emit_fp_off(mc, 9, -(i32)fs->off + (i32)fs->size);
- emit32(mc, aa64_str_uimm(3, 9, ap, 16));
- /* __gr_offs = named_int*8 - 64 */
- emit_load_imm(mc, 0, 9, (i64)((i32)(a->next_param_int * 8u) - 64));
- emit32(mc, aa64_str_uimm(2, 9, ap, 24));
- /* __vr_offs = named_fp*16 - 128 */
- emit_load_imm(mc, 0, 9, (i64)((i32)(a->next_param_fp * 16u) - 128));
- emit32(mc, aa64_str_uimm(2, 9, ap, 28));
-}
-
-static void aa_va_arg_(CGTarget* t, Operand dst, Operand ap_op,
- const Type* ty) {
- MCEmitter* mc = t->mc;
- u32 ap = reg_num(ap_op);
- int is_fp = (dst.cls == RC_FP);
- u32 offs_field = is_fp ? 28u : 24u;
- u32 top_field = is_fp ? 16u : 8u;
- u32 stride_reg = is_fp ? 16u : 8u;
- u32 sz = type_byte_size(ty);
- u32 sidx = size_idx_for_bytes(sz);
-
- MCLabel L_stack = mc->label_new(mc);
- MCLabel L_done = mc->label_new(mc);
-
- /* w9 = ap.offs ; cmp; b.ge L_stack (>=0 means save area exhausted) */
- emit32(mc, aa64_ldur(2, 9, ap, (i32)offs_field));
- emit32(mc, aa64_subs_imm(0, 31, 9, 0));
- emit32(mc, aa64_b_cond(0xa /*GE*/));
- mc->emit_label_ref(mc, L_stack, R_AARCH64_CONDBR19, 4, 0);
-
- /* save-area path:
- * x10 = ap.top
- * x12 = sxtw(w9)
- * x11 = x10 + x12
- * load dst, [x11]
- * w9 += stride_reg ; ap.offs = w9 ; b L_done */
- emit32(mc, aa64_ldur(3, 10, ap, (i32)top_field));
- emit32(mc, aa64_sbfm(1, 12, 9, 0, 31));
- emit32(mc, aa64_add(1, 11, 10, 12));
- if (is_fp)
- emit32(mc, aa64_ldur_fp(sidx, reg_num(dst), 11, 0));
- else
- emit32(mc, aa64_ldur(sidx, reg_num(dst), 11, 0));
- emit32(mc, aa64_add_imm(0, 9, 9, stride_reg, 0));
- emit32(mc, aa64_stur(2, 9, ap, (i32)offs_field));
- emit32(mc, aa64_b_base());
- mc->emit_label_ref(mc, L_done, R_AARCH64_JUMP26, 4, 0);
-
- /* L_stack: x10 = ap.stack ; load dst,[x10] ; x10+=8 ; ap.stack=x10 */
- mc->label_place(mc, L_stack);
- emit32(mc, aa64_ldur(3, 10, ap, 0));
- if (is_fp)
- emit32(mc, aa64_ldur_fp(sidx, reg_num(dst), 10, 0));
- else
- emit32(mc, aa64_ldur(sidx, reg_num(dst), 10, 0));
- emit32(mc, aa64_add_imm(1, 10, 10, 8u, 0));
- emit32(mc, aa64_stur(3, 10, ap, 0));
-
- mc->label_place(mc, L_done);
-}
-
-static void aa_va_end_(CGTarget* t, Operand a) {
- (void)t;
- (void)a;
-}
-
-static void aa_va_copy_(CGTarget* t, Operand d, Operand s) {
- MCEmitter* mc = t->mc;
- u32 dr = reg_num(d);
- u32 sr = reg_num(s);
- /* va_list is 32 bytes — 4 x 8-byte LDR/STR pairs. */
- for (u32 i = 0; i < 32u; i += 8u) {
- emit32(mc, aa64_ldur(3, 9, sr, (i32)i));
- emit32(mc, aa64_stur(3, 9, dr, (i32)i));
- }
-}
-
-/* ---- atomics ----
- *
- * Lowering uses ARMv8.0 LL/SC (LDXR/STXR family) — no FEAT_LSE assumption.
- * Acquire/Release semantics ride the load/store form chosen by MemOrder
- * (LDAR/STLR for plain accesses; LDAXR/STLXR inside the LL/SC loop).
- * fence() emits DMB ISH (data memory barrier, inner shareable). */
-
-/* Encoder helpers — inline since only used here. */
-static inline u32 aa64_ldar(u32 sf64, u32 Rt, u32 Rn) {
- return (sf64 ? 0xC8DFFC00u : 0x88DFFC00u) | ((Rn & 0x1f) << 5) | (Rt & 0x1f);
-}
-static inline u32 aa64_stlr(u32 sf64, u32 Rt, u32 Rn) {
- return (sf64 ? 0xC89FFC00u : 0x889FFC00u) | ((Rn & 0x1f) << 5) | (Rt & 0x1f);
-}
-static inline u32 aa64_ldxr(u32 sf64, u32 Rt, u32 Rn) {
- return (sf64 ? 0xC85F7C00u : 0x885F7C00u) | ((Rn & 0x1f) << 5) | (Rt & 0x1f);
-}
-static inline u32 aa64_ldaxr(u32 sf64, u32 Rt, u32 Rn) {
- return (sf64 ? 0xC85FFC00u : 0x885FFC00u) | ((Rn & 0x1f) << 5) | (Rt & 0x1f);
-}
-static inline u32 aa64_stxr(u32 sf64, u32 Rs, u32 Rt, u32 Rn) {
- return (sf64 ? 0xC8007C00u : 0x88007C00u) | ((Rs & 0x1f) << 16) |
- ((Rn & 0x1f) << 5) | (Rt & 0x1f);
-}
-static inline u32 aa64_stlxr(u32 sf64, u32 Rs, u32 Rt, u32 Rn) {
- return (sf64 ? 0xC800FC00u : 0x8800FC00u) | ((Rs & 0x1f) << 16) |
- ((Rn & 0x1f) << 5) | (Rt & 0x1f);
-}
-/* CBNZ Rt, imm19 */
-static inline u32 aa64_cbnz(u32 sf64, u32 Rt) {
- return 0x35000000u | (sf64 << 31) | (Rt & 0x1f);
-}
-
-static int mem_order_is_acquire(MemOrder o) {
- return o == MO_ACQUIRE || o == MO_ACQ_REL || o == MO_SEQ_CST ||
- o == MO_CONSUME;
-}
-static int mem_order_is_release(MemOrder o) {
- return o == MO_RELEASE || o == MO_ACQ_REL || o == MO_SEQ_CST;
-}
-
-static void aa_atomic_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma,
- MemOrder ord) {
- AAImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
- u32 sf = (ma.size == 8) ? 1u : 0u;
-
- /* Resolve addr to a base register; LDAR/LDR-exclusive both want a
- * pointer in a GPR, no offset form. */
- u32 base;
- if (addr.kind == OPK_REG) {
- base = reg_num(addr);
- } else if (addr.kind == OPK_LOCAL) {
- AASlot* s = slot_get(a, addr.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_load: bad slot");
- base = 9u;
- emit32(mc, aa64_sub_imm(1, base, 29, s->off, 0));
- } else {
- compiler_panic(t->c, a->loc,
- "aarch64 atomic_load: addr kind %d unsupported",
- (int)addr.kind);
- }
- if (mem_order_is_acquire(ord)) {
- emit32(mc, aa64_ldar(sf, reg_num(dst), base));
- } else {
- u32 sidx = size_idx_for_bytes(ma.size);
- emit32(mc, aa64_ldur(sidx, reg_num(dst), base, 0));
- }
-}
-
-static void aa_atomic_store(CGTarget* t, Operand addr, Operand src,
- MemAccess ma, MemOrder ord) {
- AAImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
- u32 sf = (ma.size == 8) ? 1u : 0u;
-
- /* Materialize src into a register if needed. */
- u32 src_reg;
- if (src.kind == OPK_IMM) {
- src_reg = 10u;
- emit_load_imm(mc, sf, src_reg, src.v.imm);
- } else if (src.kind == OPK_REG) {
- src_reg = reg_num(src);
- } else {
- compiler_panic(t->c, a->loc,
- "aarch64 atomic_store: src kind %d unsupported",
- (int)src.kind);
- }
- /* Base reg. */
- u32 base;
- if (addr.kind == OPK_REG) {
- base = reg_num(addr);
- } else if (addr.kind == OPK_LOCAL) {
- AASlot* s = slot_get(a, addr.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_store: bad slot");
- base = 9u;
- emit32(mc, aa64_sub_imm(1, base, 29, s->off, 0));
- } else {
- compiler_panic(t->c, a->loc,
- "aarch64 atomic_store: addr kind %d unsupported",
- (int)addr.kind);
- }
- if (mem_order_is_release(ord)) {
- emit32(mc, aa64_stlr(sf, src_reg, base));
- } else {
- u32 sidx = size_idx_for_bytes(ma.size);
- emit32(mc, aa64_stur(sidx, src_reg, base, 0));
- }
-}
-
-/* Apply rmw op: new = f(prior, val). prior, val, dst are W/X based on sf.
- * Uses scratch x12 if a temporary is needed (e.g. NAND). */
-static void emit_rmw_combine(MCEmitter* mc, AtomicOp op, u32 sf, u32 dst_new,
- u32 prior, u32 val) {
- switch (op) {
- case AO_XCHG:
- emit32(mc, aa64_mov_reg(sf, dst_new, val));
- break;
- case AO_ADD:
- emit32(mc, aa64_add(sf, dst_new, prior, val));
- break;
- case AO_SUB:
- emit32(mc, aa64_sub(sf, dst_new, prior, val));
- break;
- case AO_AND:
- emit32(mc, aa64_and(sf, dst_new, prior, val));
- break;
- case AO_OR:
- emit32(mc, aa64_orr(sf, dst_new, prior, val));
- break;
- case AO_XOR:
- emit32(mc, aa64_eor(sf, dst_new, prior, val));
- break;
- case AO_NAND:
- /* NAND: new = ~(prior & val). AArch64 has no NAND; use AND then MVN. */
- emit32(mc, aa64_and(sf, dst_new, prior, val));
- emit32(mc, aa64_mvn(sf, dst_new, dst_new));
- break;
- default:
- emit32(mc, aa64_mov_reg(sf, dst_new, val));
- break;
- }
-}
-
-static void aa_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, Operand addr,
- Operand val, MemAccess ma, MemOrder ord) {
- AAImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
- u32 sf = (ma.size == 8) ? 1u : 0u;
-
- /* Pin operands into scratch regs:
- * x9 = base (atomic addr)
- * x10 = val
- * x11 = new (computed)
- * w12 = stxr status flag
- * dst (prior) is the user-provided destination reg. */
- u32 base = 9u;
- if (addr.kind == OPK_REG) {
- emit32(mc, aa64_mov_reg(1, 9, reg_num(addr)));
- } else if (addr.kind == OPK_LOCAL) {
- AASlot* s = slot_get(a, addr.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_rmw: bad slot");
- emit32(mc, aa64_sub_imm(1, 9, 29, s->off, 0));
- } else {
- compiler_panic(t->c, a->loc, "aarch64 atomic_rmw: addr kind %d unsupported",
- (int)addr.kind);
- }
- u32 vreg = 10u;
- if (val.kind == OPK_IMM) {
- emit_load_imm(mc, sf, vreg, val.v.imm);
- } else if (val.kind == OPK_REG) {
- emit32(mc, aa64_mov_reg(sf, vreg, reg_num(val)));
- } else {
- compiler_panic(t->c, a->loc, "aarch64 atomic_rmw: val kind %d unsupported",
- (int)val.kind);
- }
-
- int do_acq = mem_order_is_acquire(ord);
- int do_rel = mem_order_is_release(ord);
-
- MCLabel L_retry = mc->label_new(mc);
- mc->label_place(mc, L_retry);
-
- /* prior <- ldxr/ldaxr [base] */
- if (do_acq)
- emit32(mc, aa64_ldaxr(sf, reg_num(dst), base));
- else
- emit32(mc, aa64_ldxr(sf, reg_num(dst), base));
-
- /* new = combine(prior, val) into x11 */
- emit_rmw_combine(mc, op, sf, /*new=*/11u, /*prior=*/reg_num(dst), vreg);
-
- /* status <- stxr/stlxr [base], new ; cbnz status, retry */
- if (do_rel)
- emit32(mc, aa64_stlxr(sf, /*Rs=*/12u, /*Rt=*/11u, base));
- else
- emit32(mc, aa64_stxr(sf, /*Rs=*/12u, /*Rt=*/11u, base));
-
- u32 cbnz_pos = mc->pos(mc);
- emit32(mc, aa64_cbnz(0, /*Rt=*/12u));
- mc->emit_label_ref(mc, L_retry, R_AARCH64_CONDBR19, 4, 0);
- (void)cbnz_pos;
-}
-
-static void aa_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr,
- Operand expected, Operand desired, MemAccess ma,
- MemOrder succ, MemOrder fail) {
- AAImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
- u32 sf = (ma.size == 8) ? 1u : 0u;
- (void)fail;
-
- /* Pin operands:
- * x9 = base
- * x10 = expected (compare against prior)
- * x11 = desired (store on match)
- * w12 = stxr status flag */
- u32 base = 9u;
- if (addr.kind == OPK_REG)
- emit32(mc, aa64_mov_reg(1, 9, reg_num(addr)));
- else if (addr.kind == OPK_LOCAL) {
- AASlot* s = slot_get(a, addr.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_cas: bad slot");
- emit32(mc, aa64_sub_imm(1, 9, 29, s->off, 0));
- } else {
- compiler_panic(t->c, a->loc, "aarch64 atomic_cas: addr kind %d unsupported",
- (int)addr.kind);
- }
- if (expected.kind == OPK_IMM)
- emit_load_imm(mc, sf, 10, expected.v.imm);
- else if (expected.kind == OPK_REG)
- emit32(mc, aa64_mov_reg(sf, 10, reg_num(expected)));
- else
- compiler_panic(t->c, a->loc, "aarch64 atomic_cas: exp kind %d unsupported",
- (int)expected.kind);
- if (desired.kind == OPK_IMM)
- emit_load_imm(mc, sf, 11, desired.v.imm);
- else if (desired.kind == OPK_REG)
- emit32(mc, aa64_mov_reg(sf, 11, reg_num(desired)));
- else
- compiler_panic(t->c, a->loc, "aarch64 atomic_cas: des kind %d unsupported",
- (int)desired.kind);
-
- int do_acq = mem_order_is_acquire(succ);
- int do_rel = mem_order_is_release(succ);
-
- MCLabel L_retry = mc->label_new(mc);
- MCLabel L_fail = mc->label_new(mc);
- MCLabel L_done = mc->label_new(mc);
-
- mc->label_place(mc, L_retry);
- if (do_acq)
- emit32(mc, aa64_ldaxr(sf, reg_num(prior), base));
- else
- emit32(mc, aa64_ldxr(sf, reg_num(prior), base));
-
- /* if (prior != expected) -> fail (clrex + ok=0) */
- emit32(mc, aa64_subs_reg(sf, /*Rd=ZR*/ 31u, reg_num(prior), 10u));
- emit32(mc, aa64_b_cond(0x1u /*NE*/));
- mc->emit_label_ref(mc, L_fail, R_AARCH64_CONDBR19, 4, 0);
-
- /* try store; retry on stxr failure */
- if (do_rel)
- emit32(mc, aa64_stlxr(sf, 12u, 11u, base));
- else
- emit32(mc, aa64_stxr(sf, 12u, 11u, base));
- emit32(mc, aa64_cbnz(0, 12u));
- mc->emit_label_ref(mc, L_retry, R_AARCH64_CONDBR19, 4, 0);
-
- /* ok = 1 ; jump done */
- emit_load_imm(mc, 0, reg_num(ok), 1);
- emit32(mc, aa64_b_base());
- mc->emit_label_ref(mc, L_done, R_AARCH64_JUMP26, 4, 0);
-
- /* L_fail: clear monitor; ok = 0 */
- mc->label_place(mc, L_fail);
- emit32(mc, aa64_clrex(AA64_BARRIER_OPT_SY));
- emit_load_imm(mc, 0, reg_num(ok), 0);
-
- mc->label_place(mc, L_done);
-}
-
-static void aa_fence(CGTarget* t, MemOrder o) {
- (void)o;
- /* Conservative: full-system DMB ISH for any release/acquire/seq_cst.
- * RELAXED fence is a no-op. */
- if (o == MO_RELAXED) return;
- emit32(t->mc, aa64_dmb(AA64_BARRIER_OPT_ISH));
-}
-
-/* ---- intrinsics ---- */
-
-/* Data-processing (1 source) — REV16 / REV / REV32 / RBIT / CLZ.
- * Family base 0x5AC00000 (sf=0); set sf<<31 for 64-bit forms. */
-static inline u32 aa64_rev16_w(u32 Rd, u32 Rn) {
- return 0x5AC00400u | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
-}
-static inline u32 aa64_rev_w(u32 Rd, u32 Rn) {
- return 0x5AC00800u | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
-}
-static inline u32 aa64_rev_x(u32 Rd, u32 Rn) {
- return 0xDAC00C00u | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
-}
-static inline u32 aa64_rbit(u32 sf64, u32 Rd, u32 Rn) {
- return (sf64 ? 0xDAC00000u : 0x5AC00000u) | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
-}
-static inline u32 aa64_clz(u32 sf64, u32 Rd, u32 Rn) {
- return (sf64 ? 0xDAC01000u : 0x5AC01000u) | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
-}
-
-/* SIMD CNT (Vd.<T>, Vn.<T>) and ADDV (Bd, Vn.8B). 8B form, Q=0. */
-static inline u32 aa64_cnt_8b(u32 Vd, u32 Vn) {
- return 0x0E205800u | ((Vn & 0x1f) << 5) | (Vd & 0x1f);
-}
-static inline u32 aa64_addv_b_8b(u32 Vd, u32 Vn) {
- return 0x0E31B800u | ((Vn & 0x1f) << 5) | (Vd & 0x1f);
-}
-
-/* ADDS / SUBS shifted register (S=1; sets NZCV including V for signed ovf). */
-static inline u32 aa64_adds_reg(u32 sf, u32 Rd, u32 Rn, u32 Rm) {
- return 0x2B000000u | (sf << 31) | ((Rm & 0x1f) << 16) | ((Rn & 0x1f) << 5) |
- (Rd & 0x1f);
-}
-
-/* SMADDL / UMADDL → SMULL / UMULL with Ra = ZR. 64-bit dst, 32-bit srcs. */
-static inline u32 aa64_smaddl(u32 Rd, u32 Rn, u32 Rm, u32 Ra) {
- return aa64_dp3_pack((AA64DP3){
- .sf = 1, .op31 = 1, .o0 = 0, .Rm = Rm, .Ra = Ra, .Rn = Rn, .Rd = Rd});
-}
-static inline u32 aa64_smull(u32 Rd, u32 Rn, u32 Rm) {
- return aa64_smaddl(Rd, Rn, Rm, AA64_ZR);
-}
-
-/* SUBS Xd, Xn, Wm, SXTW — extended-register form, used for the
- * mul_overflow check (compare full 64-bit product to sign-extended low 32). */
-static inline u32 aa64_subs_extreg_x_sxtw(u32 Rd, u32 Rn, u32 Rm) {
- return 0xEB200000u | ((Rm & 0x1f) << 16) | (6u << 13) | ((Rn & 0x1f) << 5) |
- (Rd & 0x1f);
-}
-
-/* BRK #imm16 (TRAP/UNREACHABLE landing pads) lives in arch/aa64_isa.h
- * alongside the rest of the exception-generation family. */
-
-static void aa_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd,
- const Operand* args, u32 na) {
- AAImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
- (void)nd;
-
- switch (kind) {
- case INTRIN_POPCOUNT: {
- /* fmov v0, src ; cnt v0.8b, v0.8b ; addv b0, v0.8b ; fmov w_dst, s0 */
- Operand src = args[0];
- Operand dst = dsts[0];
- u32 sz_in = type_byte_size(src.type);
- if (sz_in == 8)
- emit32(mc, aa64_fmov_d_x(0, reg_num(src)));
- else
- emit32(mc, aa64_fmov_s_w(0, reg_num(src)));
- emit32(mc, aa64_cnt_8b(0, 0));
- emit32(mc, aa64_addv_b_8b(0, 0));
- emit32(mc, aa64_fmov_w_s(reg_num(dst), 0));
- return;
- }
- case INTRIN_CLZ: {
- Operand src = args[0];
- Operand dst = dsts[0];
- u32 sf = type_is_64(src.type) ? 1u : 0u;
- emit32(mc, aa64_clz(sf, reg_num(dst), reg_num(src)));
- return;
- }
- case INTRIN_CTZ: {
- /* ctz(x) = clz(rbit(x)) */
- Operand src = args[0];
- Operand dst = dsts[0];
- u32 sf = type_is_64(src.type) ? 1u : 0u;
- emit32(mc, aa64_rbit(sf, reg_num(dst), reg_num(src)));
- emit32(mc, aa64_clz(sf, reg_num(dst), reg_num(dst)));
- return;
- }
- case INTRIN_BSWAP16: {
- emit32(mc, aa64_rev16_w(reg_num(dsts[0]), reg_num(args[0])));
- return;
- }
- case INTRIN_BSWAP32: {
- emit32(mc, aa64_rev_w(reg_num(dsts[0]), reg_num(args[0])));
- return;
- }
- case INTRIN_BSWAP64: {
- emit32(mc, aa64_rev_x(reg_num(dsts[0]), reg_num(args[0])));
- return;
- }
- case INTRIN_MEMCPY:
- case INTRIN_MEMMOVE: {
- /* args = (dst_addr, src_addr, n_bytes). v1 only handles a constant
- * n: unroll forward (memcpy) or backward (memmove). */
- Operand da = args[0], sa = args[1], nb = args[2];
- if (da.kind != OPK_REG || sa.kind != OPK_REG || nb.kind != OPK_IMM) {
- compiler_panic(t->c, a->loc,
- "aarch64 intrinsic: %s with non-const n or non-REG ptr",
- kind == INTRIN_MEMCPY ? "memcpy" : "memmove");
- }
- u32 dr = reg_num(da);
- u32 sr = reg_num(sa);
- u32 n = (u32)nb.v.imm;
- if (kind == INTRIN_MEMCPY) {
- u32 i = 0;
- while (i + 8 <= n) {
- emit32(mc, aa64_ldur(3, 12, sr, (i32)i));
- emit32(mc, aa64_stur(3, 12, dr, (i32)i));
- i += 8;
- }
- while (i + 4 <= n) {
- emit32(mc, aa64_ldur(2, 12, sr, (i32)i));
- emit32(mc, aa64_stur(2, 12, dr, (i32)i));
- i += 4;
- }
- while (i + 2 <= n) {
- emit32(mc, aa64_ldur(1, 12, sr, (i32)i));
- emit32(mc, aa64_stur(1, 12, dr, (i32)i));
- i += 2;
- }
- while (i < n) {
- emit32(mc, aa64_ldur(0, 12, sr, (i32)i));
- emit32(mc, aa64_stur(0, 12, dr, (i32)i));
- i += 1;
- }
- } else {
- /* memmove: copy backward to handle dst > src overlap. */
- u32 i = n;
- while (i >= 8) {
- i -= 8;
- emit32(mc, aa64_ldur(3, 12, sr, (i32)i));
- emit32(mc, aa64_stur(3, 12, dr, (i32)i));
- }
- while (i >= 4) {
- i -= 4;
- emit32(mc, aa64_ldur(2, 12, sr, (i32)i));
- emit32(mc, aa64_stur(2, 12, dr, (i32)i));
- }
- while (i >= 2) {
- i -= 2;
- emit32(mc, aa64_ldur(1, 12, sr, (i32)i));
- emit32(mc, aa64_stur(1, 12, dr, (i32)i));
- }
- while (i >= 1) {
- i -= 1;
- emit32(mc, aa64_ldur(0, 12, sr, (i32)i));
- emit32(mc, aa64_stur(0, 12, dr, (i32)i));
- }
- }
- return;
- }
- case INTRIN_MEMSET: {
- /* args = (dst_addr, byte, n) */
- Operand da = args[0], bv = args[1], nb = args[2];
- if (da.kind != OPK_REG || nb.kind != OPK_IMM) {
- compiler_panic(
- t->c, a->loc,
- "aarch64 intrinsic: memset with non-const n / non-REG ptr");
- }
- u32 dr = reg_num(da);
- u32 n = (u32)nb.v.imm;
- u32 byte;
- u32 src_reg;
- if (bv.kind == OPK_IMM) {
- byte = (u32)(bv.v.imm & 0xffu);
- if (byte == 0) {
- src_reg = 31u; /* XZR / WZR */
- } else {
- u64 b64 = byte;
- b64 |= b64 << 8;
- b64 |= b64 << 16;
- b64 |= b64 << 32;
- emit_load_imm(mc, 1, 12, (i64)b64);
- src_reg = 12u;
- }
- } else if (bv.kind == OPK_REG) {
- /* Broadcast: dup low byte across all 8 bytes via ORR-immediate
- * trickery is awkward; use mul-by-0x0101010101010101. */
- emit_load_imm(mc, 1, 12, (i64)0x0101010101010101ll);
- emit32(mc, aa64_madd(1, 12, reg_num(bv), 12, AA64_ZR));
- src_reg = 12u;
- } else {
- compiler_panic(t->c, a->loc,
- "aarch64 intrinsic: memset byte kind %d unsupported",
- (int)bv.kind);
- }
- u32 i = 0;
- while (i + 8 <= n) {
- emit32(mc, aa64_stur(3, src_reg, dr, (i32)i));
- i += 8;
- }
- while (i + 4 <= n) {
- emit32(mc, aa64_stur(2, src_reg, dr, (i32)i));
- i += 4;
- }
- while (i + 2 <= n) {
- emit32(mc, aa64_stur(1, src_reg, dr, (i32)i));
- i += 2;
- }
- while (i < n) {
- emit32(mc, aa64_stur(0, src_reg, dr, (i32)i));
- i += 1;
- }
- return;
- }
- case INTRIN_PREFETCH:
- /* No-op hint. */
- (void)args;
- (void)na;
- return;
- case INTRIN_ASSUME_ALIGNED: {
- /* dst = src (alignment is a hint only). */
- Operand src = args[0];
- Operand dst = dsts[0];
- if (reg_num(src) != reg_num(dst)) {
- emit32(mc, aa64_mov_reg(1, reg_num(dst), reg_num(src)));
- }
- return;
- }
- case INTRIN_EXPECT: {
- /* dst = val (the "expected" hint is dropped). */
- Operand val = args[0];
- Operand dst = dsts[0];
- u32 sf = type_is_64(dst.type) ? 1u : 0u;
- if (val.kind == OPK_REG) {
- if (reg_num(val) != reg_num(dst)) {
- emit32(mc, aa64_mov_reg(sf, reg_num(dst), reg_num(val)));
- }
- } else if (val.kind == OPK_IMM) {
- emit_load_imm(mc, sf, reg_num(dst), val.v.imm);
- } else {
- compiler_panic(t->c, a->loc,
- "aarch64 intrinsic: expect val kind %d unsupported",
- (int)val.kind);
- }
- return;
- }
- case INTRIN_UNREACHABLE:
- case INTRIN_TRAP:
- emit32(mc, aa64_brk(kind == INTRIN_TRAP ? 1u : 0u));
- return;
- case INTRIN_ADD_OVERFLOW:
- case INTRIN_SUB_OVERFLOW: {
- /* dsts: [val, ovf]. ADDS/SUBS sets V on signed overflow; CSET VS. */
- Operand a_op = args[0], b_op = args[1];
- Operand dval = dsts[0], dovf = dsts[1];
- u32 sf = type_is_64(dval.type) ? 1u : 0u;
- u32 ra = force_reg_int(t, a_op, sf, 9);
- u32 rb = force_reg_int(t, b_op, sf, (ra == 9) ? 10u : 9u);
- u32 word = (kind == INTRIN_ADD_OVERFLOW)
- ? aa64_adds_reg(sf, reg_num(dval), ra, rb)
- : aa64_subs_reg(sf, reg_num(dval), ra, rb);
- emit32(mc, word);
- emit32(mc, aa64_cset(sf, reg_num(dovf), 0x6u /*VS*/));
- return;
- }
- case INTRIN_MUL_OVERFLOW: {
- /* SMULL Xtmp, Wn, Wm gives full 64-bit signed product.
- * ovf = (Xtmp != sxtw(Wtmp)) — i.e. upper 32 bits ≠ sign-ext of low.
- * dval gets the truncated low 32 bits. */
- Operand a_op = args[0], b_op = args[1];
- Operand dval = dsts[0], dovf = dsts[1];
- u32 sf = type_is_64(dval.type) ? 1u : 0u;
- if (sf) {
- compiler_panic(
- t->c, a->loc,
- "aarch64 intrinsic: mul_overflow on i64 not yet supported");
- }
- u32 ra = force_reg_int(t, a_op, 0, 9);
- u32 rb = force_reg_int(t, b_op, 0, (ra == 9) ? 10u : 9u);
- emit32(mc, aa64_smull(/*X*/ 11u, ra, rb));
- emit32(mc, aa64_subs_extreg_x_sxtw(/*XZR*/ 31u, /*Xn=*/11u, /*Wm=*/11u));
- emit32(mc, aa64_cset(0, reg_num(dovf), 0x1u /*NE*/));
- emit32(mc, aa64_mov_reg(0, reg_num(dval), 11u));
- return;
- }
- default:
- compiler_panic(t->c, a->loc, "aarch64 intrinsic: kind %d unsupported",
- (int)kind);
- }
-}
-
-static void aa_asm_block(CGTarget* t, const char* tmpl,
- const AsmConstraint* outs, u32 no, Operand* oo,
- const AsmConstraint* ins, u32 ni, const Operand* io,
- const Sym* clobs, u32 nc) {
- /* Per doc/INLINEASM.md §6: open a per-block AA64Asm, bind operands +
- * clobbers, walk the template substituting placeholders into asm
- * source text and dispatching each line through the standalone
- * aa64_asm_insn parser.
- *
- * cg_inline_asm has already spilled any live SValues bound to
- * physical regs named in `clobs` (via target->resolve_reg_name). Here
- * we additionally bump the callee-save high-water marks so the
- * prologue saves/restores any callee-saved reg the asm body trashes
- * even when no SValue ever used it. */
- AAImpl* a_impl = impl_of(t);
- for (u32 i = 0; i < nc; ++i) {
- Reg phys;
- RegClass cls;
- if (aa_resolve_reg_name(t, clobs[i], &phys, &cls) != 0) continue;
- if (cls == RC_INT) {
- u32 idx = (u32)phys;
- RegPool* p = &a_impl->int_pool;
- if (idx >= p->base && idx < (u32)(p->base + p->nregs)) {
- u32 off = idx - p->base + 1u;
- if (off > p->hwm) p->hwm = off;
- }
- } else if (cls == RC_FP) {
- u32 idx = (u32)phys;
- RegPool* p = &a_impl->fp_pool;
- if (idx >= p->base && idx < (u32)(p->base + p->nregs)) {
- u32 off = idx - p->base + 1u;
- if (off > p->hwm) p->hwm = off;
- }
- }
- }
- AA64Asm* a = aa64_asm_open(t->c);
- aa64_inline_bind(a, outs, no, oo, ins, ni, io, clobs, nc);
- aa64_asm_run_template(a, t->mc, tmpl);
- aa64_asm_close(a);
-}
-
-static void aa_set_loc(CGTarget* t, SrcLoc loc) {
- impl_of(t)->loc = loc;
- t->mc->set_loc(t->mc, loc);
-}
-
-static void aa_finalize(CGTarget* t) { (void)t; }
-
-static void aa_destroy(CGTarget* t) { (void)t; /* arena-backed */ }
-
-/* ---- construction ---- */
-
-static void cgt_cleanup(void* arg) { cgtarget_free((CGTarget*)arg); }
-
-CGTarget* aa64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) {
- AAImpl* a = arena_new(c->tu, AAImpl);
- memset(a, 0, sizeof *a);
-
- CGTarget* t = &a->base;
- t->c = c;
- t->obj = o;
- t->mc = m;
-
- t->func_begin = aa_func_begin;
- t->func_end = aa_func_end;
-
- t->alloc_reg = aa_alloc_reg;
- t->free_reg = aa_free_reg;
- t->frame_slot = aa_frame_slot;
- t->param = aa_param;
- t->clobbers = aa_clobbers;
- t->spill_reg = aa_spill_reg;
- t->reload_reg = aa_reload_reg;
-
- t->label_new = aa_label_new;
- t->label_place = aa_label_place;
- t->jump = aa_jump;
- t->cmp_branch = aa_cmp_branch;
-
- t->scope_begin = aa_scope_begin;
- t->scope_else = aa_scope_else;
- t->scope_end = aa_scope_end;
- t->break_to = aa_break_to;
- t->continue_to = aa_continue_to;
-
- t->load_imm = aa_load_imm;
- t->load_const = aa_load_const;
- t->copy = aa_copy;
- t->load = aa_load;
- t->store = aa_store;
- t->addr_of = aa_addr_of;
- t->tls_addr_of = aa_tls_addr_of;
- t->copy_bytes = aa_copy_bytes;
- t->set_bytes = aa_set_bytes;
- t->bitfield_load = aa_bitfield_load;
- t->bitfield_store = aa_bitfield_store;
-
- t->binop = aa_binop;
- t->unop = aa_unop;
- t->cmp = aa_cmp;
- t->convert = aa_convert;
-
- t->call = aa_call;
- t->ret = aa_ret;
-
- t->alloca_ = aa_alloca_;
- t->va_start_ = aa_va_start_;
- t->va_arg_ = aa_va_arg_;
- t->va_end_ = aa_va_end_;
- t->va_copy_ = aa_va_copy_;
-
- t->setjmp_ = NULL;
- t->longjmp_ = NULL;
-
- t->atomic_load = aa_atomic_load;
- t->atomic_store = aa_atomic_store;
- t->atomic_rmw = aa_atomic_rmw;
- t->atomic_cas = aa_atomic_cas;
- t->fence = aa_fence;
-
- t->intrinsic = aa_intrinsic;
- t->asm_block = aa_asm_block;
- t->resolve_reg_name = aa_resolve_reg_name;
-
- t->set_loc = aa_set_loc;
- t->finalize = aa_finalize;
- t->destroy = aa_destroy;
-
- /* Avoid signed/unsigned warning. */
- (void)type_is_signed;
-
- compiler_defer(c, cgt_cleanup, t);
- return t;
-}
diff --git a/src/arch/aarch64/alloc.c b/src/arch/aarch64/alloc.c
@@ -0,0 +1,318 @@
+/* aarch64/alloc.c — register pool, spill/reload, labels, control flow,
+ * structured scopes. */
+
+#include "arch/aarch64/internal.h"
+
+/* ============================================================
+ * AAImpl accessor
+ * ============================================================ */
+
+AAImpl* impl_of(CGTarget* t) { return (AAImpl*)t; }
+
+/* ============================================================
+ * RegPool
+ * ============================================================ */
+
+void regpool_init(RegPool* p, u8 base, u8 nregs) {
+ p->base = base;
+ p->nregs = nregs;
+ p->hwm = 0;
+ p->free = (nregs >= 32u) ? 0xFFFFFFFFu : ((1u << nregs) - 1u);
+}
+
+Reg regpool_alloc(RegPool* p) {
+ if (p->free == 0) return (Reg)REG_NONE;
+ u32 idx = (u32)__builtin_ctz(p->free);
+ p->free &= ~(1u << idx);
+ if (idx + 1u > p->hwm) p->hwm = idx + 1u;
+ return (Reg)(p->base + idx);
+}
+
+int regpool_free(RegPool* p, Reg r) {
+ u32 rn = (u32)r;
+ if (rn < p->base || rn >= (u32)(p->base + p->nregs)) return 0;
+ u32 idx = rn - p->base;
+ u32 bit = 1u << idx;
+ if (p->free & bit) return -1;
+ p->free |= bit;
+ return 1;
+}
+
+/* ============================================================
+ * Slot accessor
+ * ============================================================ */
+
+AASlot* aa64_slot_get(AAImpl* a, FrameSlot fs) {
+ if (fs == FRAME_SLOT_NONE || fs > a->nslots) return NULL;
+ return &a->slots[fs - 1];
+}
+
+/* ============================================================
+ * Register allocation / free
+ * ============================================================ */
+
+static Reg aa_alloc_reg(CGTarget* t, RegClass cls, const Type* ty) {
+ AAImpl* a = impl_of(t);
+ (void)ty;
+ if (cls == RC_INT) return regpool_alloc(&a->int_pool);
+ if (cls == RC_FP) return regpool_alloc(&a->fp_pool);
+ compiler_panic(t->c, a->loc, "aarch64 alloc_reg: class %d unimpl", (int)cls);
+}
+
+void aa_free_reg(CGTarget* t, Reg r, RegClass cls) {
+ AAImpl* a = impl_of(t);
+ RegPool* p;
+ switch (cls) {
+ case RC_INT: p = &a->int_pool; break;
+ case RC_FP: p = &a->fp_pool; break;
+ default:
+ compiler_panic(t->c, a->loc, "aarch64 free_reg: class %d unimpl",
+ (int)cls);
+ }
+ int rc = regpool_free(p, r);
+ if (rc == 1) return;
+ if (rc == -1) {
+ compiler_panic(t->c, a->loc,
+ "aarch64 free_reg: reg %u already free in %s pool",
+ (unsigned)r, cls == RC_FP ? "fp" : "int");
+ }
+ compiler_panic(t->c, a->loc, "aarch64 free_reg: reg %u not in %s pool",
+ (unsigned)r, cls == RC_FP ? "fp" : "int");
+}
+
+static const Reg* aa_clobbers(CGTarget* t, RegClass c, u32* n) {
+ (void)c;
+ (void)n;
+ compiler_panic(t->c, impl_of(t)->loc, "aarch64: clobbers not implemented");
+}
+
+static int aa_resolve_reg_name(CGTarget* t, Sym name, Reg* out,
+ RegClass* cls_out) {
+ (void)t;
+ size_t len = 0;
+ const char* s = pool_str(t->c->global, name, &len);
+ if (!s || !len) return 1;
+ char buf[8];
+ if (len >= sizeof buf) return 1;
+ memcpy(buf, s, len);
+ buf[len] = '\0';
+ u32 dwarf;
+ if (aa64_register_index(buf, &dwarf) != 0) return 1;
+ if (dwarf <= 30u) {
+ if (out) *out = (Reg)dwarf;
+ if (cls_out) *cls_out = RC_INT;
+ return 0;
+ }
+ if (dwarf >= 64u && dwarf <= 95u) {
+ if (out) *out = (Reg)(dwarf - 64u);
+ if (cls_out) *cls_out = RC_FP;
+ return 0;
+ }
+ return 1;
+}
+
+static void aa_spill_reg(CGTarget* t, Operand src, FrameSlot slot,
+ MemAccess ma) {
+ AAImpl* a = impl_of(t);
+ if (src.kind != OPK_REG) {
+ compiler_panic(t->c, a->loc, "aarch64 spill_reg: src is not OPK_REG");
+ }
+ Operand addr;
+ memset(&addr, 0, sizeof addr);
+ addr.kind = OPK_LOCAL;
+ addr.cls = RC_INT;
+ addr.type = ma.type;
+ addr.v.frame_slot = slot;
+ aa_store(t, addr, src, ma);
+ aa_free_reg(t, src.v.reg, src.cls);
+}
+
+static void aa_reload_reg(CGTarget* t, Operand dst, FrameSlot slot,
+ MemAccess ma) {
+ AAImpl* a = impl_of(t);
+ if (dst.kind != OPK_REG) {
+ compiler_panic(t->c, a->loc, "aarch64 reload_reg: dst is not OPK_REG");
+ }
+ Operand addr;
+ memset(&addr, 0, sizeof addr);
+ addr.kind = OPK_LOCAL;
+ addr.cls = RC_INT;
+ addr.type = ma.type;
+ addr.v.frame_slot = slot;
+ aa_load(t, dst, addr, ma);
+}
+
+/* ============================================================
+ * Labels / control flow
+ * ============================================================ */
+
+static Label aa_label_new(CGTarget* t) {
+ return (Label)t->mc->label_new(t->mc);
+}
+
+static void aa_label_place(CGTarget* t, Label l) {
+ t->mc->label_place(t->mc, (MCLabel)l);
+}
+
+void aa_jump(CGTarget* t, Label l) {
+ MCEmitter* mc = t->mc;
+ aa64_emit32(mc, aa64_b_base());
+ mc->emit_label_ref(mc, (MCLabel)l, R_AARCH64_JUMP26, 4, 0);
+}
+
+static u32 cmp_to_cond(CmpOp op) {
+ switch (op) {
+ case CMP_EQ: return 0x0u;
+ case CMP_NE: return 0x1u;
+ case CMP_LT_U: return 0x3u;
+ case CMP_LE_U: return 0x9u;
+ case CMP_GT_U: return 0x8u;
+ case CMP_GE_U: return 0x2u;
+ case CMP_LT_S: return 0xbu;
+ case CMP_LE_S: return 0xdu;
+ case CMP_GT_S: return 0xcu;
+ case CMP_GE_S: return 0xau;
+ default: return 0x0u;
+ }
+}
+
+void emit_cmp_ab(CGTarget* t, Operand a_op, Operand b_op) {
+ MCEmitter* mc = t->mc;
+ u32 sf = type_is_64(a_op.type) ? 1u : 0u;
+ if (b_op.kind == OPK_IMM && a_op.kind != OPK_IMM) {
+ u32 imm12, sh;
+ if (aa64_addsub_imm_fits(b_op.v.imm, &imm12, &sh)) {
+ u32 rn = aa64_force_reg_int(t, a_op, sf, 9);
+ aa64_emit32(mc, aa64_subs_imm12(sf, /*Rd=ZR*/ 31u, rn, imm12, sh));
+ return;
+ }
+ }
+ u32 rn = aa64_force_reg_int(t, a_op, sf, 9);
+ u32 rm = aa64_force_reg_int(t, b_op, sf, (rn == 9) ? 10u : 9u);
+ aa64_emit32(mc, aa64_subs_reg(sf, /*Rd=ZR*/ 31u, rn, rm));
+}
+
+static void aa_cmp_branch(CGTarget* t, CmpOp op, Operand a, Operand b,
+ Label l) {
+ MCEmitter* mc = t->mc;
+ emit_cmp_ab(t, a, b);
+ aa64_emit32(mc, aa64_b_cond(cmp_to_cond(op)));
+ mc->emit_label_ref(mc, (MCLabel)l, R_AARCH64_CONDBR19, 4, 0);
+}
+
+static void aa_cmp(CGTarget* t, CmpOp op, Operand dst, Operand a, Operand b) {
+ emit_cmp_ab(t, a, b);
+ u32 sf_dst = type_is_64(dst.type) ? 1u : 0u;
+ aa64_emit32(t->mc, aa64_cset(sf_dst, reg_num(dst), cmp_to_cond(op)));
+}
+
+/* ============================================================
+ * Structured scopes
+ * ============================================================ */
+
+static CGScope aa_scope_begin(CGTarget* t, const CGScopeDesc* d) {
+ AAImpl* a = impl_of(t);
+ if (a->nscopes == a->scopes_cap) {
+ u32 ncap = a->scopes_cap ? a->scopes_cap * 2u : 4u;
+ AAScope* nb = arena_array(t->c->tu, AAScope, ncap);
+ if (a->scopes) memcpy(nb, a->scopes, sizeof(AAScope) * a->nscopes);
+ a->scopes = nb;
+ a->scopes_cap = ncap;
+ }
+ AAScope* sc = &a->scopes[a->nscopes];
+ sc->kind = (u8)d->kind;
+ sc->has_else = 0;
+ sc->else_label = 0;
+ sc->end_label = 0;
+ sc->break_label = d->break_label;
+ sc->continue_label = d->continue_label;
+
+ if (d->kind == SCOPE_IF) {
+ sc->else_label = t->mc->label_new(t->mc);
+ sc->end_label = t->mc->label_new(t->mc);
+ u32 sf = type_is_64(d->cond.type) ? 1u : 0u;
+ u32 rn = aa64_force_reg_int(t, d->cond, sf, 9);
+ aa64_emit32(t->mc, aa64_subs_imm(sf, /*Rd=ZR*/ 31u, rn, 0));
+ aa64_emit32(t->mc, aa64_b_cond(0x0u /*EQ*/));
+ t->mc->emit_label_ref(t->mc, sc->else_label, R_AARCH64_CONDBR19, 4, 0);
+ } else if (d->kind == SCOPE_LOOP || d->kind == SCOPE_BLOCK) {
+ /* bookkeep only */
+ } else {
+ compiler_panic(t->c, a->loc,
+ "aarch64 scope_begin: kind %d not yet implemented",
+ (int)d->kind);
+ }
+
+ a->nscopes++;
+ return (CGScope)a->nscopes;
+}
+
+static void aa_scope_else(CGTarget* t, CGScope s) {
+ AAImpl* a = impl_of(t);
+ if (s == CG_SCOPE_NONE || s > a->nscopes) {
+ compiler_panic(t->c, a->loc, "aarch64 scope_else: bad scope %u",
+ (unsigned)s);
+ }
+ AAScope* sc = &a->scopes[s - 1];
+ aa64_emit32(t->mc, aa64_b_base());
+ t->mc->emit_label_ref(t->mc, sc->end_label, R_AARCH64_JUMP26, 4, 0);
+ t->mc->label_place(t->mc, sc->else_label);
+ sc->has_else = 1;
+}
+
+static void aa_scope_end(CGTarget* t, CGScope s) {
+ AAImpl* a = impl_of(t);
+ if (s == CG_SCOPE_NONE || s > a->nscopes) {
+ compiler_panic(t->c, a->loc, "aarch64 scope_end: bad scope %u",
+ (unsigned)s);
+ }
+ AAScope* sc = &a->scopes[s - 1];
+ if (sc->kind == SCOPE_IF) {
+ if (!sc->has_else) {
+ t->mc->label_place(t->mc, sc->else_label);
+ }
+ t->mc->label_place(t->mc, sc->end_label);
+ }
+}
+
+static void aa_break_to(CGTarget* t, CGScope s) {
+ AAImpl* a = impl_of(t);
+ if (s == CG_SCOPE_NONE || s > a->nscopes) {
+ compiler_panic(t->c, a->loc, "aarch64 break_to: bad scope %u", (unsigned)s);
+ }
+ AAScope* sc = &a->scopes[s - 1];
+ aa_jump(t, sc->break_label);
+}
+
+static void aa_continue_to(CGTarget* t, CGScope s) {
+ AAImpl* a = impl_of(t);
+ if (s == CG_SCOPE_NONE || s > a->nscopes) {
+ compiler_panic(t->c, a->loc, "aarch64 continue_to: bad scope %u",
+ (unsigned)s);
+ }
+ AAScope* sc = &a->scopes[s - 1];
+ aa_jump(t, sc->continue_label);
+}
+
+/* Expose vtable entries to ops.c constructor via a registration helper.
+ * ops.c calls this after the basic ops vtable is populated. */
+void aa_alloc_vtable_init(CGTarget* t) {
+ t->alloc_reg = aa_alloc_reg;
+ t->free_reg = aa_free_reg;
+ t->clobbers = aa_clobbers;
+ t->spill_reg = aa_spill_reg;
+ t->reload_reg = aa_reload_reg;
+ t->resolve_reg_name = aa_resolve_reg_name;
+
+ t->label_new = aa_label_new;
+ t->label_place = aa_label_place;
+ t->jump = aa_jump;
+ t->cmp_branch = aa_cmp_branch;
+ t->cmp = aa_cmp;
+
+ t->scope_begin = aa_scope_begin;
+ t->scope_else = aa_scope_else;
+ t->scope_end = aa_scope_end;
+ t->break_to = aa_break_to;
+ t->continue_to = aa_continue_to;
+}
diff --git a/src/arch/aarch64/emit.c b/src/arch/aarch64/emit.c
@@ -0,0 +1,546 @@
+/* aarch64/emit.c — instruction encoding helpers, function lifecycle,
+ * frame layout, parameter ABI, address materialization. */
+
+#include "arch/aarch64/internal.h"
+
+extern void debug_emit_row(Debug*, ObjSecId text_section, u32 offset, SrcLoc);
+
+/* ============================================================
+ * Shared type / operand helpers
+ * ============================================================ */
+
+int type_is_64(const Type* t) {
+ if (!t) return 0;
+ switch (t->kind) {
+ case TY_LONG:
+ case TY_ULONG:
+ case TY_LLONG:
+ case TY_ULLONG:
+ case TY_PTR:
+ case TY_DOUBLE:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+int type_is_fp_double(const Type* t) {
+ return t && (t->kind == TY_DOUBLE || t->kind == TY_LDOUBLE);
+}
+
+int type_is_signed(const Type* t) {
+ if (!t) return 0;
+ switch (t->kind) {
+ case TY_CHAR:
+ case TY_SCHAR:
+ case TY_SHORT:
+ case TY_INT:
+ case TY_LONG:
+ case TY_LLONG:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+u32 type_byte_size(const Type* t) {
+ if (!t) return 4;
+ switch (t->kind) {
+ case TY_CHAR:
+ case TY_SCHAR:
+ case TY_UCHAR:
+ case TY_BOOL:
+ return 1;
+ case TY_SHORT:
+ case TY_USHORT:
+ return 2;
+ case TY_INT:
+ case TY_UINT:
+ case TY_FLOAT:
+ return 4;
+ case TY_LONG:
+ case TY_ULONG:
+ case TY_LLONG:
+ case TY_ULLONG:
+ case TY_PTR:
+ case TY_DOUBLE:
+ return 8;
+ default:
+ return 8;
+ }
+}
+
+u32 size_idx_for_bytes(u32 nbytes) {
+ switch (nbytes) {
+ case 1:
+ return 0;
+ case 2:
+ return 1;
+ case 4:
+ return 2;
+ case 8:
+ return 3;
+ default:
+ return 3;
+ }
+}
+
+u32 reg_num(Operand op) { return op.v.reg & 0x1fu; }
+
+/* ============================================================
+ * Low-level emission
+ * ============================================================ */
+
+void aa64_emit32(MCEmitter* mc, u32 word) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ u8 b[4];
+ b[0] = (u8)(word & 0xff);
+ b[1] = (u8)((word >> 8) & 0xff);
+ b[2] = (u8)((word >> 16) & 0xff);
+ b[3] = (u8)((word >> 24) & 0xff);
+ mc->emit_bytes(mc, b, 4);
+ if (mc->debug) {
+ debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+ }
+}
+
+void aa64_patch32(ObjBuilder* obj, u32 sec_id, u32 ofs, u32 word) {
+ u8 b[4];
+ b[0] = (u8)(word & 0xff);
+ b[1] = (u8)((word >> 8) & 0xff);
+ b[2] = (u8)((word >> 16) & 0xff);
+ b[3] = (u8)((word >> 24) & 0xff);
+ obj_patch(obj, sec_id, ofs, b, 4);
+}
+
+/* ============================================================
+ * Immediate encoding helpers
+ * ============================================================ */
+
+void aa64_emit_load_imm(MCEmitter* mc, u32 sf, u32 Rd, i64 imm) {
+ const u32 nslots = sf ? 4u : 2u;
+ u64 v = sf ? (u64)imm : ((u64)imm & 0xffffffffu);
+
+ for (u32 i = 0; i < nslots; ++i) {
+ u32 slot = (u32)((v >> (i * 16)) & 0xffffu);
+ u64 cleared = v & ~((u64)0xffffu << (i * 16));
+ if (slot != 0 && cleared == 0) {
+ aa64_emit32(mc, aa64_movz(sf, Rd, slot, i));
+ return;
+ }
+ }
+
+ {
+ u64 inv = sf ? ~v : ((~v) & 0xffffffffu);
+ for (u32 i = 0; i < nslots; ++i) {
+ u32 slot = (u32)((inv >> (i * 16)) & 0xffffu);
+ u64 cleared = inv & ~((u64)0xffffu << (i * 16));
+ if (cleared == 0) {
+ aa64_emit32(mc, aa64_movn(sf, Rd, slot, i));
+ return;
+ }
+ }
+ }
+
+ int placed = 0;
+ for (u32 i = 0; i < nslots; ++i) {
+ u32 slot = (u32)((v >> (i * 16)) & 0xffffu);
+ if (!placed) {
+ if (slot == 0) continue;
+ aa64_emit32(mc, aa64_movz(sf, Rd, slot, i));
+ placed = 1;
+ } else if (slot != 0) {
+ aa64_emit32(mc, aa64_movk(sf, Rd, slot, i));
+ }
+ }
+ if (!placed) aa64_emit32(mc, aa64_movz(sf, Rd, 0, 0));
+}
+
+void emit_sp_add(MCEmitter* mc, u32 imm) {
+ if (imm <= 0xfff) {
+ aa64_emit32(mc, aa64_add_imm(1, 31, 31, imm, 0));
+ } else if ((imm & 0xfff) == 0 && (imm >> 12) <= 0xfff) {
+ aa64_emit32(mc, aa64_add_imm(1, 31, 31, imm >> 12, 1));
+ } else {
+ aa64_emit32(mc, aa64_add_imm(1, 31, 31, (imm >> 12) & 0xfff, 1));
+ aa64_emit32(mc, aa64_add_imm(1, 31, 31, imm & 0xfff, 0));
+ }
+}
+
+/* ============================================================
+ * Function lifecycle
+ * ============================================================ */
+
+void aa_func_begin(CGTarget* t, const CGFuncDesc* fd) {
+ AAImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+
+ mc->set_section(mc, fd->text_section_id);
+ mc->emit_align(mc, 4, 0);
+
+ a->fd = fd;
+ a->func_start = mc->pos(mc);
+ a->next_param_int = 0;
+ a->next_param_fp = 0;
+ a->next_param_stack = 0;
+ a->has_sret = (fd->abi && fd->abi->has_sret) ? 1 : 0;
+ a->cum_off = 0;
+ a->max_outgoing = 0;
+ regpool_init(&a->int_pool, /*base=*/19u, /*nregs=*/10u);
+ regpool_init(&a->fp_pool, /*base=*/8u, /*nregs=*/16u);
+ a->nslots = 0;
+ a->nscopes = 0;
+ a->has_alloca = 0;
+ a->nadd_patches = 0;
+ a->sret_ptr_slot = FRAME_SLOT_NONE;
+ a->is_variadic = (fd->abi && fd->abi->variadic) ? 1 : 0;
+ a->gp_save_slot = FRAME_SLOT_NONE;
+ a->fp_save_slot = FRAME_SLOT_NONE;
+ a->epilogue_label = mc->label_new(mc);
+
+ mc->cfi_startproc(mc);
+
+ a->prologue_pos = mc->pos(mc);
+ for (u32 i = 0; i < AA_PROLOGUE_WORDS; ++i) aa64_emit32(mc, AA64_NOP);
+
+ if (a->has_sret) {
+ FrameSlotDesc fsd = {
+ .type = NULL,
+ .name = 0,
+ .loc = (SrcLoc){0, 0, 0},
+ .size = 8,
+ .align = 8,
+ .kind = FS_SPILL,
+ .flags = 0,
+ };
+ a->sret_ptr_slot = aa_frame_slot(t, &fsd);
+ }
+
+ if (a->is_variadic) {
+ FrameSlotDesc gpd = {
+ .type = NULL,
+ .name = 0,
+ .loc = (SrcLoc){0, 0, 0},
+ .size = 64,
+ .align = 8,
+ .kind = FS_SPILL,
+ .flags = 0,
+ };
+ a->gp_save_slot = aa_frame_slot(t, &gpd);
+ FrameSlotDesc fpd = {
+ .type = NULL,
+ .name = 0,
+ .loc = (SrcLoc){0, 0, 0},
+ .size = 128,
+ .align = 16,
+ .kind = FS_SPILL,
+ .flags = 0,
+ };
+ a->fp_save_slot = aa_frame_slot(t, &fpd);
+ AASlot* gs = aa64_slot_get(a, a->gp_save_slot);
+ AASlot* fs = aa64_slot_get(a, a->fp_save_slot);
+ for (u32 i = 0; i < 8; ++i) {
+ aa64_emit32(mc, aa64_stur(3, i, 29, -(i32)gs->off + (i32)i * 8));
+ }
+ for (u32 i = 0; i < 8; ++i) {
+ aa64_emit32(mc, aa64_stur_fp(3, i, 29, -(i32)fs->off + (i32)i * 16));
+ }
+ }
+}
+
+void aa_func_end(CGTarget* t) {
+ AAImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+
+ u32 n_int_pairs = (a->int_pool.hwm + 1) / 2;
+ u32 used_fp_cs = a->fp_pool.hwm > 8 ? 8u : a->fp_pool.hwm;
+ u32 n_fp_pairs = (used_fp_cs + 1) / 2;
+
+ u32 outgoing_off = 0;
+ u32 int_save_off = a->max_outgoing;
+ u32 fp_save_off = int_save_off + n_int_pairs * 16;
+ u32 locals_off = fp_save_off + n_fp_pairs * 16;
+ u32 fp_lr_off = locals_off + a->cum_off;
+ u32 frame_size = fp_lr_off + 16;
+ frame_size = (frame_size + 15u) & ~15u;
+ fp_lr_off = frame_size - 16;
+
+ (void)outgoing_off;
+
+ mc->label_place(mc, a->epilogue_label);
+
+ if (a->has_alloca) {
+ if (fp_lr_off <= 0xfff) {
+ aa64_emit32(mc, aa64_sub_imm(1, /*Rd=SP*/ 31, /*Rn=*/29, fp_lr_off, 0));
+ } else {
+ compiler_panic(t->c, a->loc,
+ "aarch64: has_alloca + fp_lr_off %u out of imm12 range",
+ fp_lr_off);
+ }
+ }
+
+ for (i32 i = (i32)n_fp_pairs - 1; i >= 0; --i) {
+ u32 r0 = 8u + (u32)i * 2u;
+ u32 r1 = r0 + 1u;
+ aa64_emit32(mc, aa64_ldp_d(r0, r1, 31, (i32)(fp_save_off + (u32)i * 16u)));
+ }
+ for (i32 i = (i32)n_int_pairs - 1; i >= 0; --i) {
+ u32 r0 = 19u + (u32)i * 2u;
+ u32 r1 = r0 + 1u;
+ aa64_emit32(mc, aa64_ldp_x(r0, r1, 31, (i32)(int_save_off + (u32)i * 16u)));
+ }
+ aa64_emit32(mc, aa64_ldp_x(29, 30, 31, (i32)fp_lr_off));
+ emit_sp_add(mc, frame_size);
+ aa64_emit32(mc, aa64_ret(AA64_LR));
+
+ u32 pos = a->prologue_pos;
+ ObjBuilder* obj = t->obj;
+ u32 sec = a->fd->text_section_id;
+
+ u32 words[AA_PROLOGUE_WORDS];
+ for (u32 i = 0; i < AA_PROLOGUE_WORDS; ++i) words[i] = AA64_NOP;
+ u32 wi = 0;
+
+ if (frame_size <= 0xfff) {
+ words[wi++] = aa64_sub_imm(1, 31, 31, frame_size, 0);
+ } else if ((frame_size & 0xfff) == 0 && (frame_size >> 12) <= 0xfff) {
+ words[wi++] = aa64_sub_imm(1, 31, 31, frame_size >> 12, 1);
+ } else {
+ if (wi + 2 > AA_PROLOGUE_WORDS) {
+ compiler_panic(t->c, a->loc,
+ "aarch64: prologue overflow for frame_size %u",
+ frame_size);
+ }
+ words[wi++] = aa64_sub_imm(1, 31, 31, (frame_size >> 12) & 0xfff, 1);
+ words[wi++] = aa64_sub_imm(1, 31, 31, frame_size & 0xfff, 0);
+ }
+ words[wi++] = aa64_stp_x(29, 30, 31, (i32)fp_lr_off);
+ words[wi++] = aa64_add_imm(1, 29, 31, fp_lr_off, 0);
+ if (a->has_sret && a->sret_ptr_slot != FRAME_SLOT_NONE) {
+ AASlot* s = aa64_slot_get(a, a->sret_ptr_slot);
+ if (s) {
+ if (wi >= AA_PROLOGUE_WORDS) goto overflow;
+ words[wi++] = aa64_stur(3, 8, 29, -(i32)s->off);
+ }
+ }
+ for (u32 i = 0; i < n_int_pairs; ++i) {
+ u32 r0 = 19u + i * 2u;
+ u32 r1 = r0 + 1u;
+ if (wi >= AA_PROLOGUE_WORDS) goto overflow;
+ words[wi++] = aa64_stp_x(r0, r1, 31, (i32)(int_save_off + i * 16u));
+ }
+ for (u32 i = 0; i < n_fp_pairs; ++i) {
+ u32 r0 = 8u + i * 2u;
+ u32 r1 = r0 + 1u;
+ if (wi >= AA_PROLOGUE_WORDS) goto overflow;
+ words[wi++] = aa64_stp_d(r0, r1, 31, (i32)(fp_save_off + i * 16u));
+ }
+ if (0) {
+ overflow:
+ compiler_panic(
+ t->c, a->loc,
+ "aarch64: prologue placeholder too small (used %u of %u words)", wi,
+ AA_PROLOGUE_WORDS);
+ }
+
+ for (u32 i = 0; i < AA_PROLOGUE_WORDS; ++i) {
+ aa64_patch32(obj, sec, pos + i * 4u, words[i]);
+ }
+
+ if (a->max_outgoing > 0xfff) {
+ compiler_panic(
+ t->c, a->loc,
+ "aarch64: max_outgoing %u out of imm12 range for alloca patch",
+ a->max_outgoing);
+ }
+ for (u32 i = 0; i < a->nadd_patches; ++i) {
+ u32 dr = a->add_patches[i].dst_reg;
+ u32 word = aa64_add_imm(1, dr, /*Rn=SP*/ 31, a->max_outgoing, 0);
+ aa64_patch32(obj, sec, a->add_patches[i].pos, word);
+ }
+
+ u32 end = mc->pos(mc);
+ obj_symbol_define(obj, a->fd->sym, sec, (u64)a->func_start,
+ (u64)(end - a->func_start));
+
+ mc->cfi_endproc(mc);
+ a->fd = NULL;
+}
+
+/* ============================================================
+ * Frame slots
+ * ============================================================ */
+
+FrameSlot aa_frame_slot(CGTarget* t, const FrameSlotDesc* d) {
+ AAImpl* a = impl_of(t);
+ if (a->nslots == a->slots_cap) {
+ u32 ncap = a->slots_cap ? a->slots_cap * 2 : 8;
+ AASlot* nbuf = arena_array(t->c->tu, AASlot, ncap);
+ if (a->slots) memcpy(nbuf, a->slots, sizeof(AASlot) * a->nslots);
+ a->slots = nbuf;
+ a->slots_cap = ncap;
+ }
+ u32 size = d->size ? d->size : 8;
+ u32 align = d->align ? d->align : 1;
+ u32 next = a->cum_off + size;
+ u32 mask = align - 1;
+ next = (next + mask) & ~mask;
+
+ AASlot* s = &a->slots[a->nslots];
+ s->off = next;
+ s->size = size;
+ s->align = align;
+ s->kind = d->kind;
+
+ a->cum_off = next;
+ a->nslots++;
+ return (FrameSlot)(a->nslots);
+}
+
+/* ============================================================
+ * Parameters
+ * ============================================================ */
+
+void aa_param(CGTarget* t, const CGParamDesc* p) {
+ AAImpl* a = impl_of(t);
+ AASlot* s = aa64_slot_get(a, p->slot);
+ if (!s) {
+ compiler_panic(t->c, a->loc, "aarch64 param: bad slot");
+ }
+ const ABIArgInfo* ai = p->abi;
+
+ if (ai->kind == ABI_ARG_IGNORE) return;
+ if (ai->kind == ABI_ARG_INDIRECT) {
+ u32 ptr_reg;
+ if (a->next_param_int < 8) {
+ ptr_reg = a->next_param_int++;
+ } else {
+ u32 caller_off = a->next_param_stack;
+ a->next_param_stack += 8;
+ aa64_emit32(t->mc, aa64_ldur(3, 9, 29, (i32)(16 + caller_off)));
+ ptr_reg = 9;
+ }
+ u32 nbytes = s->size;
+ u32 i = 0;
+ while (i + 8 <= nbytes) {
+ aa64_emit32(t->mc, aa64_ldur(3, 10, ptr_reg, (i32)i));
+ aa64_emit32(t->mc, aa64_stur(3, 10, 29, -(i32)s->off + (i32)i));
+ i += 8;
+ }
+ while (i + 4 <= nbytes) {
+ aa64_emit32(t->mc, aa64_ldur(2, 10, ptr_reg, (i32)i));
+ aa64_emit32(t->mc, aa64_stur(2, 10, 29, -(i32)s->off + (i32)i));
+ i += 4;
+ }
+ while (i + 2 <= nbytes) {
+ aa64_emit32(t->mc, aa64_ldur(1, 10, ptr_reg, (i32)i));
+ aa64_emit32(t->mc, aa64_stur(1, 10, 29, -(i32)s->off + (i32)i));
+ i += 2;
+ }
+ while (i < nbytes) {
+ aa64_emit32(t->mc, aa64_ldur(0, 10, ptr_reg, (i32)i));
+ aa64_emit32(t->mc, aa64_stur(0, 10, 29, -(i32)s->off + (i32)i));
+ i += 1;
+ }
+ return;
+ }
+ for (u16 i = 0; i < ai->nparts; ++i) {
+ const ABIArgPart* pt = &ai->parts[i];
+ u32 part_off = pt->src_offset;
+ u32 sz = pt->size;
+ u32 sidx = size_idx_for_bytes(sz);
+
+ if (pt->cls == ABI_CLASS_INT) {
+ if (a->next_param_int < 8) {
+ u32 reg = a->next_param_int++;
+ aa64_emit32(t->mc, aa64_stur(sidx, reg, 29, -(i32)s->off + (i32)part_off));
+ } else {
+ u32 caller_off = a->next_param_stack;
+ a->next_param_stack += 8;
+ aa64_emit32(t->mc, aa64_ldur(sidx, 9, 29, (i32)(16 + caller_off)));
+ aa64_emit32(t->mc, aa64_stur(sidx, 9, 29, -(i32)s->off + (i32)part_off));
+ }
+ } else if (pt->cls == ABI_CLASS_FP) {
+ if (a->next_param_fp < 8) {
+ u32 reg = a->next_param_fp++;
+ aa64_emit32(t->mc,
+ aa64_stur_fp(sidx, reg, 29, -(i32)s->off + (i32)part_off));
+ } else {
+ u32 caller_off = a->next_param_stack;
+ a->next_param_stack += 8;
+ aa64_emit32(t->mc, aa64_ldur_fp(sidx, 0, 29, (i32)(16 + caller_off)));
+ aa64_emit32(t->mc, aa64_stur_fp(sidx, 0, 29, -(i32)s->off + (i32)part_off));
+ }
+ } else {
+ compiler_panic(t->c, a->loc, "aarch64 param: ABI class %d unimpl",
+ (int)pt->cls);
+ }
+ }
+}
+
+/* ============================================================
+ * Address materialization helpers
+ * ============================================================ */
+
+static int use_got_for_sym(CGTarget* t, ObjSymId sym) {
+ return obj_symbol_extern_via_got(t->c, t->obj, sym);
+}
+
+void aa64_emit_got_load_addr(CGTarget* t, u32 dst_reg, ObjSymId sym) {
+ MCEmitter* mc = t->mc;
+ u32 sec = mc->section_id;
+ u32 adrp_pos = mc->pos(mc);
+ aa64_emit32(mc, aa64_adrp_base(dst_reg));
+ mc->emit_reloc_at(mc, sec, adrp_pos, R_AARCH64_ADR_GOT_PAGE, sym, 0, 0, 0);
+ u32 ldr_pos = mc->pos(mc);
+ aa64_emit32(mc, aa64_ldr_uimm(/*size=*/3, dst_reg, dst_reg, 0));
+ mc->emit_reloc_at(mc, sec, ldr_pos, R_AARCH64_LD64_GOT_LO12_NC, sym, 0, 0, 0);
+}
+
+void emit_global_addr(CGTarget* t, u32 dst_reg, ObjSymId sym, i64 addend) {
+ MCEmitter* mc = t->mc;
+ if (use_got_for_sym(t, sym)) {
+ aa64_emit_got_load_addr(t, dst_reg, sym);
+ if (addend) aa64_emit_addr_adjust(mc, dst_reg, dst_reg, (i32)addend);
+ return;
+ }
+ u32 sec = mc->section_id;
+ u32 adrp_pos = mc->pos(mc);
+ aa64_emit32(mc, aa64_adrp_base(dst_reg));
+ mc->emit_reloc_at(mc, sec, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21, sym, addend,
+ 0, 0);
+ u32 add_pos = mc->pos(mc);
+ aa64_emit32(mc, aa64_add_imm(1, dst_reg, dst_reg, 0, 0));
+ mc->emit_reloc_at(mc, sec, add_pos, R_AARCH64_ADD_ABS_LO12_NC, sym, addend, 0,
+ 0);
+}
+
+void aa64_emit_addr_adjust(MCEmitter* mc, u32 Rd, u32 base, i32 off) {
+ if (off == 0) {
+ aa64_emit32(mc, aa64_mov_reg(1, Rd, base));
+ return;
+ }
+ u32 abs_off = (off < 0) ? (u32)(-off) : (u32)off;
+ if (abs_off <= 0xfff) {
+ if (off < 0)
+ aa64_emit32(mc, aa64_sub_imm(1, Rd, base, abs_off, 0));
+ else
+ aa64_emit32(mc, aa64_add_imm(1, Rd, base, abs_off, 0));
+ return;
+ }
+ if ((abs_off >> 24) == 0) {
+ u32 hi = (abs_off >> 12) & 0xfff;
+ u32 lo = abs_off & 0xfff;
+ if (off < 0) {
+ if (hi) aa64_emit32(mc, aa64_sub_imm(1, Rd, base, hi, 1));
+ if (lo) aa64_emit32(mc, aa64_sub_imm(1, Rd, hi ? Rd : base, lo, 0));
+ } else {
+ if (hi) aa64_emit32(mc, aa64_add_imm(1, Rd, base, hi, 1));
+ if (lo) aa64_emit32(mc, aa64_add_imm(1, Rd, hi ? Rd : base, lo, 0));
+ }
+ return;
+ }
+ aa64_emit_load_imm(mc, 1, Rd, off);
+ aa64_emit32(mc, aa64_add(1, Rd, base, Rd));
+}
+
diff --git a/src/arch/aarch64/internal.h b/src/arch/aarch64/internal.h
@@ -0,0 +1,312 @@
+/* aarch64/internal.h — private types and forward decls shared across
+ * emit.c / alloc.c / ops.c. NOT part of the public API. */
+#pragma once
+
+#include <string.h>
+
+#include "arch/aa64_asm.h"
+#include "arch/aa64_isa.h"
+#include "arch/aa64_regs.h"
+#include "arch/arch.h"
+#include "core/arena.h"
+#include "obj/obj.h"
+#include "type/type.h"
+
+/* ============================================================
+ * Local encoding helpers (kept here, not in aa64_isa.h).
+ * ============================================================ */
+
+#define AA64_NOP 0xD503201Fu
+
+static inline u32 aa64_stp_x(u32 Rt, u32 Rt2, u32 Rn, i32 byte_off) {
+ i32 sc = byte_off >> 3;
+ return 0xA9000000u | (((u32)sc & 0x7fu) << 15) | ((Rt2 & 0x1f) << 10) |
+ ((Rn & 0x1f) << 5) | (Rt & 0x1f);
+}
+static inline u32 aa64_ldp_x(u32 Rt, u32 Rt2, u32 Rn, i32 byte_off) {
+ i32 sc = byte_off >> 3;
+ return 0xA9400000u | (((u32)sc & 0x7fu) << 15) | ((Rt2 & 0x1f) << 10) |
+ ((Rn & 0x1f) << 5) | (Rt & 0x1f);
+}
+static inline u32 aa64_stp_d(u32 Rt, u32 Rt2, u32 Rn, i32 byte_off) {
+ i32 sc = byte_off >> 3;
+ return 0x6D000000u | (((u32)sc & 0x7fu) << 15) | ((Rt2 & 0x1f) << 10) |
+ ((Rn & 0x1f) << 5) | (Rt & 0x1f);
+}
+static inline u32 aa64_ldp_d(u32 Rt, u32 Rt2, u32 Rn, i32 byte_off) {
+ i32 sc = byte_off >> 3;
+ return 0x6D400000u | (((u32)sc & 0x7fu) << 15) | ((Rt2 & 0x1f) << 10) |
+ ((Rn & 0x1f) << 5) | (Rt & 0x1f);
+}
+
+static inline u32 aa64_stur(u32 size, u32 Rt, u32 Rn, i32 simm9) {
+ return 0x38000000u | (size << 30) | (((u32)simm9 & 0x1ffu) << 12) |
+ ((Rn & 0x1f) << 5) | (Rt & 0x1f);
+}
+static inline u32 aa64_ldur(u32 size, u32 Rt, u32 Rn, i32 simm9) {
+ return 0x38400000u | (size << 30) | (((u32)simm9 & 0x1ffu) << 12) |
+ ((Rn & 0x1f) << 5) | (Rt & 0x1f);
+}
+static inline u32 aa64_stur_fp(u32 size, u32 Rt, u32 Rn, i32 simm9) {
+ return 0x3C000000u | (size << 30) | (((u32)simm9 & 0x1ffu) << 12) |
+ ((Rn & 0x1f) << 5) | (Rt & 0x1f);
+}
+static inline u32 aa64_ldur_fp(u32 size, u32 Rt, u32 Rn, i32 simm9) {
+ return 0x3C400000u | (size << 30) | (((u32)simm9 & 0x1ffu) << 12) |
+ ((Rn & 0x1f) << 5) | (Rt & 0x1f);
+}
+
+static inline u32 aa64_str_uimm(u32 size, u32 Rt, u32 Rn, u32 byte_off) {
+ u32 sc = byte_off >> size;
+ return 0x39000000u | (size << 30) | ((sc & 0xfffu) << 10) |
+ ((Rn & 0x1f) << 5) | (Rt & 0x1f);
+}
+static inline u32 aa64_ldr_uimm(u32 size, u32 Rt, u32 Rn, u32 byte_off) {
+ u32 sc = byte_off >> size;
+ return 0x39400000u | (size << 30) | ((sc & 0xfffu) << 10) |
+ ((Rn & 0x1f) << 5) | (Rt & 0x1f);
+}
+static inline u32 aa64_str_fp_uimm(u32 size, u32 Rt, u32 Rn, u32 byte_off) {
+ u32 sc = byte_off >> size;
+ return 0x3D000000u | (size << 30) | ((sc & 0xfffu) << 10) |
+ ((Rn & 0x1f) << 5) | (Rt & 0x1f);
+}
+
+static inline u32 aa64_mrs_tpidr_el0(u32 Rt) {
+ return 0xD53BD040u | (Rt & 0x1fu);
+}
+static inline u32 aa64_b_base(void) { return 0x14000000u; }
+static inline u32 aa64_bl_base(void) { return 0x94000000u; }
+
+static inline u32 aa64_adrp_base(u32 Rd) { return 0x90000000u | (Rd & 0x1f); }
+
+static inline u32 aa64_ldr_fp_uimm(u32 size, u32 Rt, u32 Rn, u32 byte_off) {
+ u32 sc = byte_off >> size;
+ return 0x3D400000u | (size << 30) | ((sc & 0xfffu) << 10) |
+ ((Rn & 0x1f) << 5) | (Rt & 0x1f);
+}
+
+static inline u32 aa64_fmov_reg(u32 type, u32 Rd, u32 Rn) {
+ return 0x1E204000u | ((type & 3) << 22) | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
+}
+
+static inline u32 aa64_subs_imm(u32 sf, u32 Rd, u32 Rn, u32 imm12) {
+ return 0x71000000u | (sf << 31) | ((imm12 & 0xfff) << 10) |
+ ((Rn & 0x1f) << 5) | (Rd & 0x1f);
+}
+
+static inline u32 aa64_cset_eq(u32 sf, u32 Rd) {
+ return 0x1A800400u | (sf << 31) | (31u << 16) | (0x1u << 12) | (31u << 5) |
+ (Rd & 0x1f);
+}
+
+static inline u32 aa64_fcvtzs(u32 sf, u32 type, u32 Rd, u32 Rn) {
+ return 0x1E380000u | (sf << 31) | ((type & 3) << 22) | ((Rn & 0x1f) << 5) |
+ (Rd & 0x1f);
+}
+static inline u32 aa64_fcvtzu(u32 sf, u32 type, u32 Rd, u32 Rn) {
+ return 0x1E390000u | (sf << 31) | ((type & 3) << 22) | ((Rn & 0x1f) << 5) |
+ (Rd & 0x1f);
+}
+static inline u32 aa64_scvtf(u32 sf, u32 type, u32 Rd, u32 Rn) {
+ return 0x1E220000u | (sf << 31) | ((type & 3) << 22) | ((Rn & 0x1f) << 5) |
+ (Rd & 0x1f);
+}
+static inline u32 aa64_ucvtf(u32 sf, u32 type, u32 Rd, u32 Rn) {
+ return 0x1E230000u | (sf << 31) | ((type & 3) << 22) | ((Rn & 0x1f) << 5) |
+ (Rd & 0x1f);
+}
+
+static inline u32 aa64_fcvt_d_s(u32 Rd, u32 Rn) {
+ return 0x1E22C000u | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
+}
+static inline u32 aa64_fcvt_s_d(u32 Rd, u32 Rn) {
+ return 0x1E624000u | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
+}
+
+static inline u32 aa64_fmov_s_w(u32 Rd, u32 Rn) {
+ return 0x1E270000u | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
+}
+static inline u32 aa64_fmov_w_s(u32 Rd, u32 Rn) {
+ return 0x1E260000u | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
+}
+static inline u32 aa64_fmov_d_x(u32 Rd, u32 Rn) {
+ return 0x9E670000u | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
+}
+static inline u32 aa64_fmov_x_d(u32 Rd, u32 Rn) {
+ return 0x9E660000u | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
+}
+
+static inline u32 aa64_sub_extreg_x_uxtx(u32 Rd, u32 Rn, u32 Rm) {
+ return 0xCB206000u | ((Rm & 0x1f) << 16) | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
+}
+
+static inline u32 aa64_subs_reg(u32 sf, u32 Rd, u32 Rn, u32 Rm) {
+ return 0x6B000000u | (sf << 31) | ((Rm & 0x1f) << 16) | ((Rn & 0x1f) << 5) |
+ (Rd & 0x1f);
+}
+
+static inline u32 aa64_b_cond(u32 cond) { return 0x54000000u | (cond & 0xfu); }
+
+static inline u32 aa64_csinc(u32 sf, u32 Rd, u32 Rn, u32 Rm, u32 cond) {
+ return 0x1A800400u | (sf << 31) | ((Rm & 0x1f) << 16) |
+ ((cond & 0xfu) << 12) | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
+}
+static inline u32 aa64_cset(u32 sf, u32 Rd, u32 cond) {
+ return aa64_csinc(sf, Rd, 31u, 31u, cond ^ 1u);
+}
+
+static inline u32 aa64_fadd(u32 type, u32 Rd, u32 Rn, u32 Rm) {
+ return 0x1E202800u | ((type & 3) << 22) | ((Rm & 0x1f) << 16) |
+ ((Rn & 0x1f) << 5) | (Rd & 0x1f);
+}
+static inline u32 aa64_fsub(u32 type, u32 Rd, u32 Rn, u32 Rm) {
+ return 0x1E203800u | ((type & 3) << 22) | ((Rm & 0x1f) << 16) |
+ ((Rn & 0x1f) << 5) | (Rd & 0x1f);
+}
+static inline u32 aa64_fmul(u32 type, u32 Rd, u32 Rn, u32 Rm) {
+ return 0x1E200800u | ((type & 3) << 22) | ((Rm & 0x1f) << 16) |
+ ((Rn & 0x1f) << 5) | (Rd & 0x1f);
+}
+static inline u32 aa64_fdiv(u32 type, u32 Rd, u32 Rn, u32 Rm) {
+ return 0x1E201800u | ((type & 3) << 22) | ((Rm & 0x1f) << 16) |
+ ((Rn & 0x1f) << 5) | (Rd & 0x1f);
+}
+
+static inline u32 aa64_sbfm(u32 sf, u32 Rd, u32 Rn, u32 immr, u32 imms) {
+ return 0x13000000u | (sf << 31) | (sf << 22) | ((immr & 0x3fu) << 16) |
+ ((imms & 0x3fu) << 10) | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
+}
+static inline u32 aa64_ubfm(u32 sf, u32 Rd, u32 Rn, u32 immr, u32 imms) {
+ return 0x53000000u | (sf << 31) | (sf << 22) | ((immr & 0x3fu) << 16) |
+ ((imms & 0x3fu) << 10) | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
+}
+static inline u32 aa64_bfm(u32 sf, u32 Rd, u32 Rn, u32 immr, u32 imms) {
+ return 0x33000000u | (sf << 31) | (sf << 22) | ((immr & 0x3fu) << 16) |
+ ((imms & 0x3fu) << 10) | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
+}
+
+/* ============================================================
+ * RegPool
+ * ============================================================ */
+
+typedef struct RegPool {
+ u32 free;
+ u32 hwm;
+ u8 base;
+ u8 nregs;
+ u8 pad[2];
+} RegPool;
+
+/* ============================================================
+ * AAImpl types
+ * ============================================================ */
+
+#define AA_PROLOGUE_WORDS \
+ 12u /* worst case: sub sp + stp/add fp + 5 int + 4 fp = 11 */
+
+typedef struct AASlot {
+ u32 off;
+ u32 size;
+ u32 align;
+ u8 kind;
+ u8 pad[3];
+} AASlot;
+
+typedef struct AAScope {
+ u8 kind;
+ u8 has_else;
+ u8 pad[2];
+ MCLabel else_label;
+ MCLabel end_label;
+ Label break_label;
+ Label continue_label;
+} AAScope;
+
+typedef struct AAImpl {
+ CGTarget base;
+ SrcLoc loc;
+ const CGFuncDesc* fd;
+
+ u32 func_start;
+ u32 prologue_pos;
+ MCLabel epilogue_label;
+
+ AASlot* slots;
+ u32 nslots;
+ u32 slots_cap;
+ u32 cum_off;
+ u32 max_outgoing;
+
+ u32 next_param_int;
+ u32 next_param_fp;
+ u32 next_param_stack;
+ u8 has_sret;
+ FrameSlot sret_ptr_slot;
+
+ RegPool int_pool;
+ RegPool fp_pool;
+
+ AAScope* scopes;
+ u32 nscopes;
+ u32 scopes_cap;
+
+ u8 has_alloca;
+ struct AAAllocaPatch {
+ u32 pos;
+ u32 dst_reg;
+ }* add_patches;
+ u32 nadd_patches;
+ u32 add_patches_cap;
+
+ u8 is_variadic;
+ FrameSlot gp_save_slot;
+ FrameSlot fp_save_slot;
+} AAImpl;
+
+/* ============================================================
+ * Cross-file forward declarations
+ * ============================================================ */
+
+/* regpool (alloc.c) */
+void regpool_init(RegPool* p, u8 base, u8 nregs);
+Reg regpool_alloc(RegPool* p);
+int regpool_free(RegPool* p, Reg r);
+
+/* emit.c helpers used in alloc.c / ops.c */
+void aa64_emit32(MCEmitter* mc, u32 word);
+void aa64_patch32(ObjBuilder* obj, u32 sec_id, u32 ofs, u32 word);
+void aa64_emit_load_imm(MCEmitter* mc, u32 sf, u32 Rd, i64 imm);
+void emit_sp_add(MCEmitter* mc, u32 imm);
+void aa64_emit_addr_adjust(MCEmitter* mc, u32 Rd, u32 base, i32 off);
+void aa64_emit_got_load_addr(CGTarget* t, u32 dst_reg, ObjSymId sym);
+void emit_global_addr(CGTarget* t, u32 dst_reg, ObjSymId sym, i64 addend);
+
+/* emit.c public surface */
+FrameSlot aa_frame_slot(CGTarget* t, const FrameSlotDesc* d);
+void aa_func_begin(CGTarget* t, const CGFuncDesc* fd);
+void aa_func_end(CGTarget* t);
+void aa_param(CGTarget* t, const CGParamDesc* p);
+
+/* alloc.c helpers used in emit.c / ops.c */
+AAImpl* impl_of(CGTarget* t);
+AASlot* aa64_slot_get(AAImpl* a, FrameSlot fs);
+void aa_free_reg(CGTarget* t, Reg r, RegClass cls);
+void aa_jump(CGTarget* t, Label l);
+
+/* ops.c helpers used in alloc.c */
+void aa_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma);
+void aa_store(CGTarget* t, Operand addr, Operand src, MemAccess ma);
+u32 aa64_force_reg_int(CGTarget* t, Operand op, u32 sf, u32 scratch);
+
+/* alloc.c helpers used in ops.c */
+void emit_cmp_ab(CGTarget* t, Operand a_op, Operand b_op);
+void aa_alloc_vtable_init(CGTarget* t);
+
+/* shared type helpers (defined in emit.c, used broadly) */
+int type_is_64(const Type* t);
+int type_is_fp_double(const Type* t);
+int type_is_signed(const Type* t);
+u32 type_byte_size(const Type* t);
+u32 size_idx_for_bytes(u32 nbytes);
+u32 reg_num(Operand op);
diff --git a/src/arch/aarch64/ops.c b/src/arch/aarch64/ops.c
@@ -0,0 +1,1895 @@
+/* aarch64/ops.c — data movement, arithmetic, calls, varargs, atomics,
+ * intrinsics, asm_block, set_loc, finalize/destroy, vtable constructor. */
+
+#include "arch/aarch64/internal.h"
+
+/* ============================================================
+ * Data movement
+ * ============================================================ */
+
+static void aa_load_imm(CGTarget* t, Operand dst, i64 imm) {
+ u32 sf = type_is_64(dst.type) ? 1u : 0u;
+ aa64_emit_load_imm(t->mc, sf, reg_num(dst), imm);
+}
+
+static void aa_load_const(CGTarget* t, Operand dst, ConstBytes cb) {
+ AAImpl* a = impl_of(t);
+ if (dst.cls != RC_FP) {
+ compiler_panic(t->c, a->loc, "aarch64 load_const: only FP supported in v1");
+ }
+
+ Sym ro_name = pool_intern_cstr(t->c->global, ".rodata");
+ ObjSecId ro = obj_section(t->obj, ro_name, SEC_RODATA, SF_ALLOC, 1u);
+
+ u32 cur_section = t->mc->section_id;
+ t->mc->set_section(t->mc, ro);
+ u32 ro_off = obj_align_to(t->obj, ro, cb.align ? cb.align : 4);
+ t->mc->emit_bytes(t->mc, cb.bytes, cb.size);
+
+ char namebuf[64];
+ static u32 lit_seq = 0;
+ int len = 0;
+ {
+ const char* prefix = ".LCFP";
+ for (; prefix[len]; ++len) namebuf[len] = prefix[len];
+ u32 v = lit_seq++;
+ char tmp[16];
+ int tn = 0;
+ if (v == 0)
+ tmp[tn++] = '0';
+ else {
+ while (v) {
+ tmp[tn++] = '0' + (char)(v % 10);
+ v /= 10;
+ }
+ }
+ for (int i = tn - 1; i >= 0; --i) namebuf[len++] = tmp[i];
+ namebuf[len] = 0;
+ }
+ Sym sname = pool_intern_cstr(t->c->global, namebuf);
+ ObjSymId sym = obj_symbol(t->obj, sname, SB_LOCAL, SK_OBJ, ro, (u64)ro_off,
+ (u64)cb.size);
+
+ t->mc->set_section(t->mc, cur_section);
+
+ u32 adrp_pos = t->mc->pos(t->mc);
+ aa64_emit32(t->mc, aa64_adrp_base(9));
+ t->mc->emit_reloc_at(t->mc, cur_section, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21,
+ sym, 0, 0, 0);
+
+ u32 ldr_pos = t->mc->pos(t->mc);
+ u32 sidx = (cb.size == 8) ? 3u : 2u;
+ aa64_emit32(t->mc, aa64_ldr_fp_uimm(sidx, reg_num(dst), 9, 0));
+ RelocKind lo12 = (cb.size == 8) ? R_AARCH64_LDST64_ABS_LO12_NC
+ : R_AARCH64_LDST32_ABS_LO12_NC;
+ t->mc->emit_reloc_at(t->mc, cur_section, ldr_pos, lo12, sym, 0, 0, 0);
+}
+
+static void aa_copy(CGTarget* t, Operand dst, Operand src) {
+ if (dst.cls == RC_FP || src.cls == RC_FP) {
+ u32 type = type_is_fp_double(dst.type) ? 1u : 0u;
+ aa64_emit32(t->mc, aa64_fmov_reg(type, reg_num(dst), reg_num(src)));
+ return;
+ }
+ u32 sf = type_is_64(dst.type) ? 1u : 0u;
+ aa64_emit32(t->mc, aa64_mov_reg(sf, reg_num(dst), reg_num(src)));
+}
+
+/* ============================================================
+ * Load / store
+ * ============================================================ */
+
+static RelocKind ldst_lo12_reloc_for(u32 nbytes) {
+ switch (nbytes) {
+ case 1: return R_AARCH64_LDST8_ABS_LO12_NC;
+ case 2: return R_AARCH64_LDST16_ABS_LO12_NC;
+ case 4: return R_AARCH64_LDST32_ABS_LO12_NC;
+ case 8: return R_AARCH64_LDST64_ABS_LO12_NC;
+ default: return R_AARCH64_LDST64_ABS_LO12_NC;
+ }
+}
+
+static int use_got_for_sym(CGTarget* t, ObjSymId sym) {
+ return obj_symbol_extern_via_got(t->c, t->obj, sym);
+}
+
+static u32 addr_base(CGTarget* t, Operand addr, i32* out_off, u32 tmp_reg) {
+ AAImpl* a = impl_of(t);
+ if (addr.kind == OPK_LOCAL) {
+ AASlot* s = aa64_slot_get(a, addr.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "aarch64 addr_base: bad slot");
+ i32 off = -(i32)s->off;
+ if (off >= -256 && off <= 255) {
+ *out_off = off;
+ return 29;
+ }
+ aa64_emit_addr_adjust(t->mc, tmp_reg, 29, off);
+ *out_off = 0;
+ return tmp_reg;
+ }
+ if (addr.kind == OPK_INDIRECT) {
+ i32 off = addr.v.ind.ofs;
+ u32 base = addr.v.ind.base & 0x1f;
+ if (off >= -256 && off <= 255) {
+ *out_off = off;
+ return base;
+ }
+ aa64_emit_addr_adjust(t->mc, tmp_reg, base, off);
+ *out_off = 0;
+ return tmp_reg;
+ }
+ if (addr.kind == OPK_GLOBAL) {
+ emit_global_addr(t, tmp_reg, addr.v.global.sym, addr.v.global.addend);
+ *out_off = 0;
+ return tmp_reg;
+ }
+ compiler_panic(t->c, a->loc, "aarch64 addr_base: unsupported kind %d",
+ (int)addr.kind);
+}
+
+void aa_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) {
+ u32 sz = ma.size ? ma.size : type_byte_size(addr.type);
+ u32 sidx = size_idx_for_bytes(sz);
+
+ if (addr.kind == OPK_GLOBAL) {
+ MCEmitter* mc = t->mc;
+ u32 sec = mc->section_id;
+ ObjSymId sym = addr.v.global.sym;
+ i64 add = addr.v.global.addend;
+ if (use_got_for_sym(t, sym)) {
+ aa64_emit_got_load_addr(t, /*dst=*/9, sym);
+ if (dst.cls == RC_FP) {
+ aa64_emit32(mc, aa64_ldur_fp(sidx, reg_num(dst), 9, (i32)add));
+ } else {
+ aa64_emit32(mc, aa64_ldur(sidx, reg_num(dst), 9, (i32)add));
+ }
+ return;
+ }
+ u32 adrp_pos = mc->pos(mc);
+ aa64_emit32(mc, aa64_adrp_base(/*Rd=*/9));
+ mc->emit_reloc_at(mc, sec, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21, sym, add,
+ 0, 0);
+ u32 ld_pos = mc->pos(mc);
+ if (dst.cls == RC_FP) {
+ aa64_emit32(mc, aa64_ldr_fp_uimm(sidx, reg_num(dst), 9, 0));
+ } else {
+ aa64_emit32(mc, aa64_ldr_uimm(sidx, reg_num(dst), 9, 0));
+ }
+ mc->emit_reloc_at(mc, sec, ld_pos, ldst_lo12_reloc_for(sz), sym, add, 0, 0);
+ return;
+ }
+
+ i32 off;
+ u32 base = addr_base(t, addr, &off, 9);
+ if (dst.cls == RC_FP) {
+ aa64_emit32(t->mc, aa64_ldur_fp(sidx, reg_num(dst), base, off));
+ } else {
+ aa64_emit32(t->mc, aa64_ldur(sidx, reg_num(dst), base, off));
+ }
+}
+
+void aa_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) {
+ u32 sz = ma.size ? ma.size : type_byte_size(addr.type);
+ u32 sidx = size_idx_for_bytes(sz);
+
+ if (addr.kind == OPK_GLOBAL) {
+ MCEmitter* mc = t->mc;
+ u32 sec = mc->section_id;
+ ObjSymId sym = addr.v.global.sym;
+ i64 add = addr.v.global.addend;
+
+ u32 src_reg;
+ u32 src_is_fp = 0;
+ if (src.kind == OPK_IMM) {
+ u32 sf = (sz == 8) ? 1u : 0u;
+ aa64_emit_load_imm(mc, sf, /*Rd=*/9, src.v.imm);
+ src_reg = 9;
+ } else if (src.cls == RC_FP) {
+ src_reg = reg_num(src);
+ src_is_fp = 1;
+ } else {
+ src_reg = reg_num(src);
+ }
+ u32 base = (src.kind == OPK_IMM) ? 10u : 9u;
+ if (use_got_for_sym(t, sym)) {
+ aa64_emit_got_load_addr(t, base, sym);
+ if (src_is_fp) {
+ aa64_emit32(mc, aa64_stur_fp(sidx, src_reg, base, (i32)add));
+ } else {
+ aa64_emit32(mc, aa64_stur(sidx, src_reg, base, (i32)add));
+ }
+ return;
+ }
+ u32 adrp_pos = mc->pos(mc);
+ aa64_emit32(mc, aa64_adrp_base(base));
+ mc->emit_reloc_at(mc, sec, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21, sym, add,
+ 0, 0);
+ u32 st_pos = mc->pos(mc);
+ if (src_is_fp) {
+ aa64_emit32(mc, aa64_str_fp_uimm(sidx, src_reg, base, 0));
+ } else {
+ aa64_emit32(mc, aa64_str_uimm(sidx, src_reg, base, 0));
+ }
+ mc->emit_reloc_at(mc, sec, st_pos, ldst_lo12_reloc_for(sz), sym, add, 0, 0);
+ return;
+ }
+
+ i32 off;
+ u32 addr_tmp = (src.kind == OPK_IMM) ? 10u : 9u;
+ u32 base = addr_base(t, addr, &off, addr_tmp);
+
+ if (src.kind == OPK_IMM) {
+ u32 sf = (sz == 8) ? 1u : 0u;
+ aa64_emit_load_imm(t->mc, sf, 9, src.v.imm);
+ aa64_emit32(t->mc, aa64_stur(sidx, 9, base, off));
+ return;
+ }
+ if (src.cls == RC_FP) {
+ aa64_emit32(t->mc, aa64_stur_fp(sidx, reg_num(src), base, off));
+ } else {
+ aa64_emit32(t->mc, aa64_stur(sidx, reg_num(src), base, off));
+ }
+}
+
+static void aa_addr_of(CGTarget* t, Operand dst, Operand lv) {
+ AAImpl* a = impl_of(t);
+ if (lv.kind == OPK_LOCAL) {
+ AASlot* s = aa64_slot_get(a, lv.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "aarch64 addr_of: bad slot");
+ aa64_emit32(t->mc, aa64_sub_imm(1, reg_num(dst), 29, s->off, 0));
+ return;
+ }
+ if (lv.kind == OPK_INDIRECT) {
+ i32 ofs = lv.v.ind.ofs;
+ u32 base = lv.v.ind.base & 0x1f;
+ if (ofs == 0) {
+ aa64_emit32(t->mc, aa64_mov_reg(1, reg_num(dst), base));
+ } else if (ofs > 0 && ofs <= 0xfff) {
+ aa64_emit32(t->mc, aa64_add_imm(1, reg_num(dst), base, (u32)ofs, 0));
+ } else if (ofs < 0 && -ofs <= 0xfff) {
+ aa64_emit32(t->mc, aa64_sub_imm(1, reg_num(dst), base, (u32)(-ofs), 0));
+ } else {
+ compiler_panic(t->c, a->loc,
+ "aarch64 addr_of: indirect offset %d unsupported", ofs);
+ }
+ return;
+ }
+ if (lv.kind == OPK_GLOBAL) {
+ u32 rd = reg_num(dst);
+ ObjSymId sym = lv.v.global.sym;
+ i64 addend = lv.v.global.addend;
+ if (use_got_for_sym(t, sym)) {
+ aa64_emit_got_load_addr(t, rd, sym);
+ if (addend) aa64_emit_addr_adjust(t->mc, rd, rd, (i32)addend);
+ return;
+ }
+ u32 sec = t->mc->section_id;
+ u32 adrp_pos = t->mc->pos(t->mc);
+ aa64_emit32(t->mc, aa64_adrp_base(rd));
+ t->mc->emit_reloc_at(t->mc, sec, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21, sym,
+ addend, 0, 0);
+ u32 add_pos = t->mc->pos(t->mc);
+ aa64_emit32(t->mc, aa64_add_imm(1, rd, rd, 0, 0));
+ t->mc->emit_reloc_at(t->mc, sec, add_pos, R_AARCH64_ADD_ABS_LO12_NC, sym,
+ addend, 0, 0);
+ return;
+ }
+ compiler_panic(t->c, impl_of(t)->loc, "aarch64: addr_of not implemented");
+}
+
+static void aa_tls_addr_of(CGTarget* t, Operand dst, ObjSymId sym, i64 addend) {
+ MCEmitter* mc = t->mc;
+ u32 sec = mc->section_id;
+ u32 rd = reg_num(dst);
+
+ aa64_emit32(mc, aa64_mrs_tpidr_el0(/*Rt=*/9));
+
+ u32 hi_pos = mc->pos(mc);
+ aa64_emit32(mc, aa64_add_imm(/*sf=*/1, rd, /*Rn=*/9, /*imm12=*/0, /*sh=*/1));
+ mc->emit_reloc_at(mc, sec, hi_pos, R_AARCH64_TLSLE_ADD_TPREL_HI12, sym,
+ addend, 0, 0);
+
+ u32 lo_pos = mc->pos(mc);
+ aa64_emit32(mc, aa64_add_imm(/*sf=*/1, rd, /*Rn=*/rd, /*imm12=*/0, /*sh=*/0));
+ mc->emit_reloc_at(mc, sec, lo_pos, R_AARCH64_TLSLE_ADD_TPREL_LO12_NC, sym,
+ addend, 0, 0);
+}
+
+/* ============================================================
+ * Aggregate helpers
+ * ============================================================ */
+
+static u32 agg_addr_reg(CGTarget* t, Operand op, u32 scratch) {
+ if (op.kind == OPK_REG) return reg_num(op);
+ if (op.kind == OPK_LOCAL) {
+ AAImpl* a = impl_of(t);
+ AASlot* s = aa64_slot_get(a, op.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "aarch64 agg: bad slot");
+ aa64_emit32(t->mc, aa64_sub_imm(1, scratch, 29, s->off, 0));
+ return scratch;
+ }
+ compiler_panic(t->c, impl_of(t)->loc,
+ "aarch64 agg: address kind %d unsupported", (int)op.kind);
+}
+
+static void aa_copy_bytes(CGTarget* t, Operand dst_addr, Operand src_addr,
+ AggregateAccess agg) {
+ MCEmitter* mc = t->mc;
+ u32 dr = agg_addr_reg(t, dst_addr, 9);
+ u32 sr = agg_addr_reg(t, src_addr, (dr == 10) ? 11u : 10u);
+ u32 nbytes = agg.size;
+ u32 i = 0;
+ while (i + 8 <= nbytes) {
+ aa64_emit32(mc, aa64_ldur(3, 12, sr, (i32)i));
+ aa64_emit32(mc, aa64_stur(3, 12, dr, (i32)i));
+ i += 8;
+ }
+ while (i + 4 <= nbytes) {
+ aa64_emit32(mc, aa64_ldur(2, 12, sr, (i32)i));
+ aa64_emit32(mc, aa64_stur(2, 12, dr, (i32)i));
+ i += 4;
+ }
+ while (i + 2 <= nbytes) {
+ aa64_emit32(mc, aa64_ldur(1, 12, sr, (i32)i));
+ aa64_emit32(mc, aa64_stur(1, 12, dr, (i32)i));
+ i += 2;
+ }
+ while (i < nbytes) {
+ aa64_emit32(mc, aa64_ldur(0, 12, sr, (i32)i));
+ aa64_emit32(mc, aa64_stur(0, 12, dr, (i32)i));
+ i += 1;
+ }
+}
+
+static void aa_set_bytes(CGTarget* t, Operand dst_addr, Operand byte_value,
+ AggregateAccess agg) {
+ MCEmitter* mc = t->mc;
+ u32 dr = agg_addr_reg(t, dst_addr, 9);
+
+ u32 byte;
+ if (byte_value.kind == OPK_IMM) {
+ byte = (u32)(byte_value.v.imm & 0xffu);
+ } else {
+ compiler_panic(t->c, impl_of(t)->loc,
+ "aarch64 set_bytes: REG byte not yet supported");
+ }
+ u32 nbytes = agg.size;
+
+ if (byte == 0) {
+ u32 i = 0;
+ while (i + 8 <= nbytes) {
+ aa64_emit32(mc, aa64_stur(3, 31, dr, (i32)i));
+ i += 8;
+ }
+ while (i + 4 <= nbytes) {
+ aa64_emit32(mc, aa64_stur(2, 31, dr, (i32)i));
+ i += 4;
+ }
+ while (i + 2 <= nbytes) {
+ aa64_emit32(mc, aa64_stur(1, 31, dr, (i32)i));
+ i += 2;
+ }
+ while (i < nbytes) {
+ aa64_emit32(mc, aa64_stur(0, 31, dr, (i32)i));
+ i += 1;
+ }
+ return;
+ }
+
+ u64 b64 = byte;
+ b64 |= b64 << 8;
+ b64 |= b64 << 16;
+ b64 |= b64 << 32;
+ aa64_emit_load_imm(mc, /*sf=*/1u, /*Rd=*/12u, (i64)b64);
+
+ u32 i = 0;
+ while (i + 8 <= nbytes) {
+ aa64_emit32(mc, aa64_stur(3, 12, dr, (i32)i));
+ i += 8;
+ }
+ while (i + 4 <= nbytes) {
+ aa64_emit32(mc, aa64_stur(2, 12, dr, (i32)i));
+ i += 4;
+ }
+ while (i + 2 <= nbytes) {
+ aa64_emit32(mc, aa64_stur(1, 12, dr, (i32)i));
+ i += 2;
+ }
+ while (i < nbytes) {
+ aa64_emit32(mc, aa64_stur(0, 12, dr, (i32)i));
+ i += 1;
+ }
+}
+
+/* ============================================================
+ * Bitfields
+ * ============================================================ */
+
+static void aa_bitfield_load(CGTarget* t, Operand dst, Operand record_addr,
+ BitFieldAccess bf) {
+ MCEmitter* mc = t->mc;
+ u32 base = agg_addr_reg(t, record_addr, 9);
+ u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u;
+ u32 sf = (storage_bytes == 8u) ? 1u : 0u;
+ u32 sidx = size_idx_for_bytes(storage_bytes);
+ u32 rd = reg_num(dst);
+
+ aa64_emit32(mc, aa64_ldur(sidx, rd, base, (i32)bf.storage_offset));
+ u32 lsb = bf.bit_offset;
+ u32 width = bf.bit_width ? bf.bit_width : 1u;
+ u32 imms = lsb + width - 1u;
+ if (bf.signed_) {
+ aa64_emit32(mc, aa64_sbfm(sf, rd, rd, lsb, imms));
+ } else {
+ aa64_emit32(mc, aa64_ubfm(sf, rd, rd, lsb, imms));
+ }
+}
+
+static void aa_bitfield_store(CGTarget* t, Operand record_addr, Operand src,
+ BitFieldAccess bf) {
+ MCEmitter* mc = t->mc;
+ u32 base = agg_addr_reg(t, record_addr, 9);
+ u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u;
+ u32 sf = (storage_bytes == 8u) ? 1u : 0u;
+ u32 sidx = size_idx_for_bytes(storage_bytes);
+
+ aa64_emit32(mc, aa64_ldur(sidx, /*Rt=*/10u, base, (i32)bf.storage_offset));
+
+ u32 src_reg;
+ if (src.kind == OPK_IMM) {
+ aa64_emit_load_imm(mc, sf, /*Rd=*/11u, src.v.imm);
+ src_reg = 11u;
+ } else if (src.kind == OPK_REG) {
+ src_reg = reg_num(src);
+ } else {
+ compiler_panic(t->c, impl_of(t)->loc,
+ "aarch64 bitfield_store: src kind %d unsupported",
+ (int)src.kind);
+ }
+
+ u32 reg_size = sf ? 64u : 32u;
+ u32 lsb = bf.bit_offset;
+ u32 width = bf.bit_width ? bf.bit_width : 1u;
+ u32 immr = (reg_size - lsb) % reg_size;
+ u32 imms = width - 1u;
+ aa64_emit32(mc, aa64_bfm(sf, /*Rd=*/10u, src_reg, immr, imms));
+
+ aa64_emit32(mc, aa64_stur(sidx, /*Rt=*/10u, base, (i32)bf.storage_offset));
+}
+
+/* ============================================================
+ * Arithmetic helpers
+ * ============================================================ */
+
+u32 aa64_force_reg_int(CGTarget* t, Operand op, u32 sf, u32 scratch) {
+ if (op.kind == OPK_REG) return reg_num(op);
+ if (op.kind == OPK_IMM) {
+ aa64_emit_load_imm(t->mc, sf, scratch, op.v.imm);
+ return scratch;
+ }
+ compiler_panic(t->c, impl_of(t)->loc,
+ "aarch64 binop: operand kind %d unsupported", (int)op.kind);
+}
+
+static void aa_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op,
+ Operand b_op) {
+ MCEmitter* mc = t->mc;
+
+ if (op == BO_FADD || op == BO_FSUB || op == BO_FMUL || op == BO_FDIV) {
+ if (a_op.kind != OPK_REG || b_op.kind != OPK_REG || dst.cls != RC_FP) {
+ compiler_panic(t->c, impl_of(t)->loc,
+ "aarch64 binop: FP op requires REG operands");
+ }
+ u32 type = type_is_fp_double(dst.type) ? 1u : 0u;
+ u32 rd = reg_num(dst);
+ u32 rn = reg_num(a_op);
+ u32 rm = reg_num(b_op);
+ u32 w;
+ switch (op) {
+ case BO_FADD: w = aa64_fadd(type, rd, rn, rm); break;
+ case BO_FSUB: w = aa64_fsub(type, rd, rn, rm); break;
+ case BO_FMUL: w = aa64_fmul(type, rd, rn, rm); break;
+ case BO_FDIV: w = aa64_fdiv(type, rd, rn, rm); break;
+ default: w = 0; break;
+ }
+ aa64_emit32(mc, w);
+ return;
+ }
+
+ u32 sf = type_is_64(dst.type) ? 1u : 0u;
+ u32 rd = reg_num(dst);
+
+ switch (op) {
+ case BO_IADD:
+ case BO_AND:
+ case BO_OR:
+ case BO_XOR: {
+ if (a_op.kind == OPK_IMM && b_op.kind != OPK_IMM) {
+ Operand t_op = a_op; a_op = b_op; b_op = t_op;
+ }
+ break;
+ }
+ default: break;
+ }
+
+ if (b_op.kind == OPK_IMM && a_op.kind != OPK_IMM) {
+ u32 rn_reg = reg_num(a_op);
+ i64 imm = b_op.v.imm;
+ u32 imm12, sh, N, immr, imms;
+ switch (op) {
+ case BO_IADD:
+ if (aa64_addsub_imm_fits(imm, &imm12, &sh)) {
+ aa64_emit32(mc, aa64_add_imm(sf, rd, rn_reg, imm12, sh));
+ return;
+ }
+ break;
+ case BO_ISUB:
+ if (aa64_addsub_imm_fits(imm, &imm12, &sh)) {
+ aa64_emit32(mc, aa64_sub_imm(sf, rd, rn_reg, imm12, sh));
+ return;
+ }
+ break;
+ case BO_AND:
+ if (aa64_logimm_encode((u64)imm, sf, &N, &immr, &imms)) {
+ aa64_emit32(mc, aa64_and_imm(sf, rd, rn_reg, N, immr, imms));
+ return;
+ }
+ break;
+ case BO_OR:
+ if (aa64_logimm_encode((u64)imm, sf, &N, &immr, &imms)) {
+ aa64_emit32(mc, aa64_orr_imm(sf, rd, rn_reg, N, immr, imms));
+ return;
+ }
+ break;
+ case BO_XOR:
+ if (aa64_logimm_encode((u64)imm, sf, &N, &immr, &imms)) {
+ aa64_emit32(mc, aa64_eor_imm(sf, rd, rn_reg, N, immr, imms));
+ return;
+ }
+ break;
+ case BO_SHL: {
+ u32 width = sf ? 64u : 32u;
+ u32 sh_amt = (u32)((u64)imm & (width - 1u));
+ if (aa64_lsl_imm_fields(sh_amt, sf, &immr, &imms)) {
+ aa64_emit32(mc, aa64_ubfm(sf, rd, rn_reg, immr, imms));
+ return;
+ }
+ break;
+ }
+ case BO_SHR_U: {
+ u32 width = sf ? 64u : 32u;
+ u32 sh_amt = (u32)((u64)imm & (width - 1u));
+ if (aa64_lsr_imm_fields(sh_amt, sf, &immr, &imms)) {
+ aa64_emit32(mc, aa64_ubfm(sf, rd, rn_reg, immr, imms));
+ return;
+ }
+ break;
+ }
+ case BO_SHR_S: {
+ u32 width = sf ? 64u : 32u;
+ u32 sh_amt = (u32)((u64)imm & (width - 1u));
+ if (aa64_asr_imm_fields(sh_amt, sf, &immr, &imms)) {
+ aa64_emit32(mc, aa64_sbfm(sf, rd, rn_reg, immr, imms));
+ return;
+ }
+ break;
+ }
+ default: break;
+ }
+ }
+
+ u32 rn = aa64_force_reg_int(t, a_op, sf, 9);
+ u32 rm = aa64_force_reg_int(t, b_op, sf, (rn == 9) ? 10 : 9);
+
+ u32 word;
+ switch (op) {
+ case BO_IADD: word = aa64_add(sf, rd, rn, rm); break;
+ case BO_ISUB: word = aa64_sub(sf, rd, rn, rm); break;
+ case BO_IMUL: word = aa64_mul(sf, rd, rn, rm); break;
+ case BO_AND: word = aa64_and(sf, rd, rn, rm); break;
+ case BO_OR: word = aa64_orr(sf, rd, rn, rm); break;
+ case BO_XOR: word = aa64_eor(sf, rd, rn, rm); break;
+ case BO_SHL: word = aa64_lslv(sf, rd, rn, rm); break;
+ case BO_SHR_U: word = aa64_lsrv(sf, rd, rn, rm); break;
+ case BO_SHR_S: word = aa64_asrv(sf, rd, rn, rm); break;
+ case BO_UDIV: word = aa64_udiv(sf, rd, rn, rm); break;
+ case BO_SDIV: word = aa64_sdiv(sf, rd, rn, rm); break;
+ case BO_SREM:
+ aa64_emit32(mc, aa64_sdiv(sf, 11, rn, rm));
+ word = aa64_msub(sf, rd, 11, rm, rn);
+ break;
+ case BO_UREM:
+ aa64_emit32(mc, aa64_udiv(sf, 11, rn, rm));
+ word = aa64_msub(sf, rd, 11, rm, rn);
+ break;
+ case BO_FADD:
+ case BO_FSUB:
+ case BO_FMUL:
+ case BO_FDIV:
+ default:
+ compiler_panic(t->c, impl_of(t)->loc, "aarch64 binop: op %d unimpl",
+ (int)op);
+ }
+ aa64_emit32(mc, word);
+}
+
+static void aa_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) {
+ MCEmitter* mc = t->mc;
+ u32 sf = type_is_64(dst.type) ? 1u : 0u;
+ u32 rd = reg_num(dst);
+ u32 rn = aa64_force_reg_int(t, a_op, sf, 9);
+ u32 word;
+
+ switch (op) {
+ case UO_NEG:
+ word = aa64_neg(sf, rd, rn);
+ break;
+ case UO_BNOT:
+ word = aa64_mvn(sf, rd, rn);
+ break;
+ case UO_NOT:
+ aa64_emit32(mc, aa64_subs_imm(sf, /*ZR=*/31, rn, 0));
+ word = aa64_cset_eq(sf, rd);
+ break;
+ default:
+ compiler_panic(t->c, impl_of(t)->loc, "aarch64 unop: op %d unimpl",
+ (int)op);
+ }
+ aa64_emit32(mc, word);
+}
+
+static void aa_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) {
+ AAImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+ u32 rd = reg_num(dst);
+ u32 rn = reg_num(src);
+
+ switch (k) {
+ case CV_SEXT: {
+ if (src.cls != RC_INT || dst.cls != RC_INT) {
+ compiler_panic(t->c, a->loc, "aarch64 convert SEXT: bad classes");
+ }
+ u32 src_bits = type_byte_size(src.type) * 8u;
+ u32 sf_dst = type_is_64(dst.type) ? 1u : 0u;
+ aa64_emit32(mc, aa64_sbfm(sf_dst, rd, rn, /*immr=*/0, /*imms=*/src_bits - 1u));
+ return;
+ }
+ case CV_ZEXT: {
+ if (src.cls != RC_INT || dst.cls != RC_INT) {
+ compiler_panic(t->c, a->loc, "aarch64 convert ZEXT: bad classes");
+ }
+ u32 src_bits = type_byte_size(src.type) * 8u;
+ if (src_bits == 32u) {
+ aa64_emit32(mc, aa64_mov_reg(0, rd, rn));
+ } else {
+ aa64_emit32(mc, aa64_ubfm(0, rd, rn, /*immr=*/0, /*imms=*/src_bits - 1u));
+ }
+ return;
+ }
+ case CV_TRUNC: {
+ aa64_emit32(mc, aa64_mov_reg(0, rd, rn));
+ return;
+ }
+ case CV_ITOF_S: {
+ u32 sf_src = type_is_64(src.type) ? 1u : 0u;
+ u32 type = type_is_fp_double(dst.type) ? 1u : 0u;
+ aa64_emit32(mc, aa64_scvtf(sf_src, type, rd, rn));
+ return;
+ }
+ case CV_ITOF_U: {
+ u32 sf_src = type_is_64(src.type) ? 1u : 0u;
+ u32 type = type_is_fp_double(dst.type) ? 1u : 0u;
+ aa64_emit32(mc, aa64_ucvtf(sf_src, type, rd, rn));
+ return;
+ }
+ case CV_FTOI_S: {
+ if (src.cls != RC_FP || dst.cls != RC_INT) {
+ compiler_panic(t->c, a->loc, "aarch64 convert FTOI_S: bad classes");
+ }
+ u32 sf = type_is_64(dst.type) ? 1u : 0u;
+ u32 type = type_is_fp_double(src.type) ? 1u : 0u;
+ aa64_emit32(mc, aa64_fcvtzs(sf, type, rd, rn));
+ return;
+ }
+ case CV_FTOI_U: {
+ if (src.cls != RC_FP || dst.cls != RC_INT) {
+ compiler_panic(t->c, a->loc, "aarch64 convert FTOI_U: bad classes");
+ }
+ u32 sf = type_is_64(dst.type) ? 1u : 0u;
+ u32 type = type_is_fp_double(src.type) ? 1u : 0u;
+ aa64_emit32(mc, aa64_fcvtzu(sf, type, rd, rn));
+ return;
+ }
+ case CV_FEXT: {
+ aa64_emit32(mc, aa64_fcvt_d_s(rd, rn));
+ return;
+ }
+ case CV_FTRUNC: {
+ aa64_emit32(mc, aa64_fcvt_s_d(rd, rn));
+ return;
+ }
+ case CV_BITCAST: {
+ if (src.cls == RC_INT && dst.cls == RC_FP) {
+ u32 sz = type_byte_size(dst.type);
+ aa64_emit32(mc, sz == 8 ? aa64_fmov_d_x(rd, rn) : aa64_fmov_s_w(rd, rn));
+ } else if (src.cls == RC_FP && dst.cls == RC_INT) {
+ u32 sz = type_byte_size(src.type);
+ aa64_emit32(mc, sz == 8 ? aa64_fmov_x_d(rd, rn) : aa64_fmov_w_s(rd, rn));
+ } else {
+ compiler_panic(t->c, a->loc,
+ "aarch64 convert BITCAST: same-class not yet supported");
+ }
+ return;
+ }
+ default:
+ compiler_panic(t->c, a->loc, "aarch64 convert kind %d unimpl", (int)k);
+ }
+}
+
+/* ============================================================
+ * Calls
+ * ============================================================ */
+
+static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi,
+ const CGABIValue* av, u32* next_int, u32* next_fp,
+ u32* stack_off) {
+ AAImpl* a = impl_of(t);
+ ABIArgInfo va_ai;
+ ABIArgPart va_pt;
+ const ABIArgInfo* ai = av->abi;
+ if (!ai) {
+ u32 sz = type_byte_size(av->type);
+ memset(&va_ai, 0, sizeof va_ai);
+ memset(&va_pt, 0, sizeof va_pt);
+ va_ai.kind = ABI_ARG_DIRECT;
+ va_ai.parts = &va_pt;
+ va_ai.nparts = 1;
+ va_pt.cls = (av->storage.cls == RC_FP) ? ABI_CLASS_FP : ABI_CLASS_INT;
+ va_pt.size = sz;
+ va_pt.align = sz;
+ va_pt.src_offset = 0;
+ ai = &va_ai;
+ if (fi && fi->vararg_on_stack) {
+ *next_int = 8;
+ *next_fp = 8;
+ }
+ }
+ if (ai->kind == ABI_ARG_IGNORE) return;
+
+ if (ai->kind == ABI_ARG_INDIRECT) {
+ u32 dst_reg;
+ int to_stack = (*next_int >= 8);
+ if (!to_stack)
+ dst_reg = (*next_int)++;
+ else
+ dst_reg = 9;
+ if (av->storage.kind == OPK_LOCAL) {
+ AASlot* s = aa64_slot_get(a, av->storage.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "aarch64 call: bad byval slot");
+ aa64_emit32(t->mc, aa64_sub_imm(1, dst_reg, 29, s->off, 0));
+ } else if (av->storage.kind == OPK_INDIRECT) {
+ aa64_emit_addr_adjust(t->mc, dst_reg, av->storage.v.ind.base & 0x1f,
+ av->storage.v.ind.ofs);
+ } else {
+ compiler_panic(t->c, a->loc,
+ "aarch64 call: INDIRECT arg storage kind %d unsupported",
+ (int)av->storage.kind);
+ }
+ if (to_stack) {
+ aa64_emit32(t->mc, aa64_str_uimm(3, dst_reg, 31, *stack_off));
+ *stack_off += 8;
+ }
+ return;
+ }
+
+ for (u16 i = 0; i < ai->nparts; ++i) {
+ const ABIArgPart* pt = &ai->parts[i];
+ u32 sz = pt->size;
+ u32 sidx = size_idx_for_bytes(sz);
+
+ if (pt->cls == ABI_CLASS_INT) {
+ int to_stack = (*next_int >= 8);
+ u32 dst_reg = to_stack ? 9u : (*next_int)++;
+ switch (av->storage.kind) {
+ case OPK_IMM: {
+ u32 sf = (sz == 8) ? 1u : 0u;
+ aa64_emit_load_imm(t->mc, sf, dst_reg, av->storage.v.imm);
+ break;
+ }
+ case OPK_REG: {
+ u32 sf = (sz == 8) ? 1u : 0u;
+ aa64_emit32(t->mc, aa64_mov_reg(sf, dst_reg, reg_num(av->storage)));
+ break;
+ }
+ case OPK_LOCAL: {
+ AASlot* s = aa64_slot_get(a, av->storage.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "aarch64 call: bad arg slot");
+ i32 off = -(i32)s->off + (i32)pt->src_offset;
+ aa64_emit32(t->mc, aa64_ldur(sidx, dst_reg, 29, off));
+ break;
+ }
+ case OPK_INDIRECT: {
+ Operand src;
+ memset(&src, 0, sizeof src);
+ src.kind = OPK_INDIRECT;
+ src.v.ind.base = av->storage.v.ind.base;
+ src.v.ind.ofs = av->storage.v.ind.ofs + (i32)pt->src_offset;
+ i32 off;
+ u32 base = addr_base(t, src, &off, /*tmp=*/9);
+ aa64_emit32(t->mc, aa64_ldur(sidx, dst_reg, base, off));
+ break;
+ }
+ default:
+ compiler_panic(t->c, a->loc,
+ "aarch64 call: arg storage kind %d unsupported",
+ (int)av->storage.kind);
+ }
+ if (to_stack) {
+ aa64_emit32(t->mc, aa64_str_uimm(3, dst_reg, 31, *stack_off));
+ *stack_off += 8;
+ }
+ } else if (pt->cls == ABI_CLASS_FP) {
+ int to_stack = (*next_fp >= 8);
+ if (!to_stack) {
+ u32 dst_reg = (*next_fp)++;
+ switch (av->storage.kind) {
+ case OPK_REG: {
+ u32 type = (sz == 8) ? 1u : 0u;
+ aa64_emit32(t->mc, aa64_fmov_reg(type, dst_reg, reg_num(av->storage)));
+ break;
+ }
+ case OPK_INDIRECT: {
+ Operand src;
+ memset(&src, 0, sizeof src);
+ src.kind = OPK_INDIRECT;
+ src.v.ind.base = av->storage.v.ind.base;
+ src.v.ind.ofs = av->storage.v.ind.ofs + (i32)pt->src_offset;
+ i32 off;
+ u32 base = addr_base(t, src, &off, /*tmp=*/9);
+ aa64_emit32(t->mc, aa64_ldur_fp(sidx, dst_reg, base, off));
+ break;
+ }
+ default:
+ compiler_panic(t->c, a->loc,
+ "aarch64 call: FP arg storage kind %d unsupported",
+ (int)av->storage.kind);
+ }
+ } else {
+ switch (av->storage.kind) {
+ case OPK_REG:
+ aa64_emit32(t->mc, aa64_stur_fp(sidx, reg_num(av->storage), 31,
+ (i32)*stack_off));
+ break;
+ case OPK_INDIRECT: {
+ Operand src;
+ memset(&src, 0, sizeof src);
+ src.kind = OPK_INDIRECT;
+ src.v.ind.base = av->storage.v.ind.base;
+ src.v.ind.ofs = av->storage.v.ind.ofs + (i32)pt->src_offset;
+ i32 off;
+ u32 base = addr_base(t, src, &off, /*tmp=*/9);
+ aa64_emit32(t->mc, aa64_ldur_fp(sidx, /*Vt=*/16u, base, off));
+ aa64_emit32(t->mc, aa64_stur_fp(sidx, /*Vt=*/16u, 31, (i32)*stack_off));
+ break;
+ }
+ default:
+ compiler_panic(
+ t->c, a->loc,
+ "aarch64 call: FP stack-arg storage kind %d unsupported",
+ (int)av->storage.kind);
+ }
+ *stack_off += 8;
+ }
+ } else {
+ compiler_panic(t->c, a->loc, "aarch64 call: ABI class %d unimpl",
+ (int)pt->cls);
+ }
+ }
+}
+
+static void aa_call(CGTarget* t, const CGCallDesc* d) {
+ AAImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+
+ u32 next_int = 0, next_fp = 0, stack_off = 0;
+
+ if (d->abi && d->abi->has_sret) {
+ if (d->ret.storage.kind != OPK_LOCAL) {
+ compiler_panic(t->c, a->loc,
+ "aarch64 call: sret destination must be LOCAL");
+ }
+ AASlot* s = aa64_slot_get(a, d->ret.storage.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "aarch64 call: bad sret slot");
+ aa64_emit32(mc, aa64_sub_imm(1, 8, 29, s->off, 0));
+ }
+
+ for (u32 i = 0; i < d->nargs; ++i) {
+ emit_arg_value(t, d->abi, &d->args[i], &next_int, &next_fp, &stack_off);
+ }
+
+ u32 needed = (stack_off + 15u) & ~15u;
+ if (needed > a->max_outgoing) a->max_outgoing = needed;
+
+ if (d->callee.kind == OPK_GLOBAL) {
+ u32 bl_pos = mc->pos(mc);
+ aa64_emit32(mc, aa64_bl_base());
+ mc->emit_reloc_at(mc, mc->section_id, bl_pos, R_AARCH64_CALL26,
+ d->callee.v.global.sym, d->callee.v.global.addend, 0, 0);
+ } else if (d->callee.kind == OPK_REG) {
+ aa64_emit32(mc, aa64_blr(reg_num(d->callee)));
+ } else {
+ compiler_panic(t->c, a->loc, "aarch64 call: callee kind %d unsupported",
+ (int)d->callee.kind);
+ }
+
+ const ABIArgInfo* ri = &d->abi->ret;
+ if (ri->kind == ABI_ARG_IGNORE || ri->kind == ABI_ARG_INDIRECT) {
+ return;
+ }
+ if (ri->nparts == 0) return;
+
+ Operand rs = d->ret.storage;
+ u32 next_int_ret = 0, next_fp_ret = 0;
+ for (u16 i = 0; i < ri->nparts; ++i) {
+ const ABIArgPart* p = &ri->parts[i];
+ u32 src_reg;
+ if (p->cls == ABI_CLASS_INT) {
+ src_reg = next_int_ret++;
+ } else if (p->cls == ABI_CLASS_FP) {
+ src_reg = next_fp_ret++;
+ } else {
+ compiler_panic(t->c, a->loc, "aarch64 call: ret part cls %d unimpl",
+ (int)p->cls);
+ }
+
+ if (rs.kind == OPK_REG) {
+ if (ri->nparts != 1) {
+ compiler_panic(t->c, a->loc,
+ "aarch64 call: REG ret_storage with %u parts",
+ (unsigned)ri->nparts);
+ }
+ if (p->cls == ABI_CLASS_INT) {
+ u32 sf = (p->size == 8) ? 1u : 0u;
+ aa64_emit32(mc, aa64_mov_reg(sf, reg_num(rs), src_reg));
+ } else {
+ u32 type = (p->size == 8) ? 1u : 0u;
+ aa64_emit32(mc, aa64_fmov_reg(type, reg_num(rs), src_reg));
+ }
+ } else if (rs.kind == OPK_LOCAL || rs.kind == OPK_INDIRECT) {
+ u32 base_reg;
+ i32 base_off;
+ if (rs.kind == OPK_LOCAL) {
+ AASlot* s = aa64_slot_get(a, rs.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "aarch64 call: bad ret slot");
+ base_reg = 29;
+ base_off = -(i32)s->off;
+ } else {
+ base_reg = rs.v.ind.base & 0x1f;
+ base_off = rs.v.ind.ofs;
+ }
+ u32 sidx = size_idx_for_bytes(p->size);
+ i32 off = base_off + (i32)p->src_offset;
+ if (p->cls == ABI_CLASS_INT) {
+ aa64_emit32(mc, aa64_stur(sidx, src_reg, base_reg, off));
+ } else {
+ aa64_emit32(mc, aa64_stur_fp(sidx, src_reg, base_reg, off));
+ }
+ } else if (rs.kind == OPK_IMM && rs.type && rs.type->kind == TY_VOID) {
+ /* void return placeholder */
+ } else {
+ compiler_panic(t->c, a->loc,
+ "aarch64 call: ret_storage kind %d unsupported",
+ (int)rs.kind);
+ }
+ }
+}
+
+static void aa_ret(CGTarget* t, const CGABIValue* val) {
+ AAImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+
+ if (val) {
+ const ABIArgInfo* ri = val->abi;
+ if (ri && ri->kind == ABI_ARG_INDIRECT) {
+ if (val->storage.kind == OPK_LOCAL) {
+ AASlot* s = aa64_slot_get(a, val->storage.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "aarch64 ret: bad sret slot");
+ if (a->sret_ptr_slot != FRAME_SLOT_NONE) {
+ AASlot* sp = aa64_slot_get(a, a->sret_ptr_slot);
+ if (sp) aa64_emit32(mc, aa64_ldur(3, 8, 29, -(i32)sp->off));
+ }
+ u32 nbytes = s->size;
+ u32 i = 0;
+ while (i + 8 <= nbytes) {
+ aa64_emit32(mc, aa64_ldur(3, 9, 29, -(i32)s->off + (i32)i));
+ aa64_emit32(mc, aa64_str_uimm(3, 9, 8, i));
+ i += 8;
+ }
+ while (i + 4 <= nbytes) {
+ aa64_emit32(mc, aa64_ldur(2, 9, 29, -(i32)s->off + (i32)i));
+ aa64_emit32(mc, aa64_str_uimm(2, 9, 8, i));
+ i += 4;
+ }
+ while (i + 2 <= nbytes) {
+ aa64_emit32(mc, aa64_ldur(1, 9, 29, -(i32)s->off + (i32)i));
+ aa64_emit32(mc, aa64_str_uimm(1, 9, 8, i));
+ i += 2;
+ }
+ while (i < nbytes) {
+ aa64_emit32(mc, aa64_ldur(0, 9, 29, -(i32)s->off + (i32)i));
+ aa64_emit32(mc, aa64_str_uimm(0, 9, 8, i));
+ i += 1;
+ }
+ } else if (val->storage.kind == OPK_INDIRECT) {
+ u32 nbytes = val->size;
+ if (!nbytes) {
+ compiler_panic(t->c, a->loc,
+ "aarch64 ret indirect: missing aggregate size");
+ }
+ if (a->sret_ptr_slot != FRAME_SLOT_NONE) {
+ AASlot* sp = aa64_slot_get(a, a->sret_ptr_slot);
+ if (sp) aa64_emit32(mc, aa64_ldur(3, 8, 29, -(i32)sp->off));
+ }
+ u32 base_reg = val->storage.v.ind.base & 0x1f;
+ i32 base_off = val->storage.v.ind.ofs;
+ u32 i = 0;
+ while (i + 8 <= nbytes) {
+ aa64_emit32(mc, aa64_ldur(3, 9, base_reg, base_off + (i32)i));
+ aa64_emit32(mc, aa64_str_uimm(3, 9, 8, i));
+ i += 8;
+ }
+ while (i + 4 <= nbytes) {
+ aa64_emit32(mc, aa64_ldur(2, 9, base_reg, base_off + (i32)i));
+ aa64_emit32(mc, aa64_str_uimm(2, 9, 8, i));
+ i += 4;
+ }
+ while (i + 2 <= nbytes) {
+ aa64_emit32(mc, aa64_ldur(1, 9, base_reg, base_off + (i32)i));
+ aa64_emit32(mc, aa64_str_uimm(1, 9, 8, i));
+ i += 2;
+ }
+ while (i < nbytes) {
+ aa64_emit32(mc, aa64_ldur(0, 9, base_reg, base_off + (i32)i));
+ aa64_emit32(mc, aa64_str_uimm(0, 9, 8, i));
+ i += 1;
+ }
+ } else {
+ compiler_panic(t->c, a->loc,
+ "aarch64 ret indirect: storage kind %d unsupported",
+ (int)val->storage.kind);
+ }
+ } else if (val->storage.kind == OPK_REG) {
+ if (val->storage.cls == RC_FP) {
+ u32 type = type_is_fp_double(val->storage.type) ? 1u : 0u;
+ aa64_emit32(mc, aa64_fmov_reg(type, /*Rd=*/0, reg_num(val->storage)));
+ } else {
+ u32 sf = type_is_64(val->storage.type) ? 1u : 0u;
+ aa64_emit32(mc, aa64_mov_reg(sf, /*Rd=*/0, reg_num(val->storage)));
+ }
+ } else if (val->storage.kind == OPK_IMM) {
+ u32 sf = type_is_64(val->storage.type) ? 1u : 0u;
+ aa64_emit_load_imm(mc, sf, /*Rd=*/0, val->storage.v.imm);
+ } else if (val->storage.kind == OPK_LOCAL ||
+ val->storage.kind == OPK_INDIRECT) {
+ u32 base_reg;
+ i32 base_off;
+ if (val->storage.kind == OPK_LOCAL) {
+ AASlot* s = aa64_slot_get(a, val->storage.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "aarch64 ret: bad local slot");
+ base_reg = 29;
+ base_off = -(i32)s->off;
+ } else {
+ base_reg = val->storage.v.ind.base & 0x1f;
+ base_off = val->storage.v.ind.ofs;
+ }
+ const ABIArgInfo* ri2 = val->abi;
+ for (u16 i = 0; i < (ri2 ? ri2->nparts : 0); ++i) {
+ const ABIArgPart* pt = &ri2->parts[i];
+ u32 sidx = size_idx_for_bytes(pt->size);
+ i32 off = base_off + (i32)pt->src_offset;
+ if (pt->cls == ABI_CLASS_INT) {
+ aa64_emit32(mc, aa64_ldur(sidx, /*Rt=*/i, base_reg, off));
+ } else if (pt->cls == ABI_CLASS_FP) {
+ aa64_emit32(mc, aa64_ldur_fp(sidx, /*Rt=*/i, base_reg, off));
+ } else {
+ compiler_panic(t->c, a->loc, "aarch64 ret: ret part cls %d unimpl",
+ (int)pt->cls);
+ }
+ }
+ }
+ }
+ u32 bpos = mc->pos(mc);
+ aa64_emit32(mc, aa64_b_base());
+ mc->emit_label_ref(mc, a->epilogue_label, R_AARCH64_JUMP26, 4, 0);
+ (void)bpos;
+}
+
+/* ============================================================
+ * alloca
+ * ============================================================ */
+
+static void aa_alloca_(CGTarget* t, Operand d, Operand sz, u32 align) {
+ AAImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+
+ if (d.kind != OPK_REG) {
+ compiler_panic(t->c, a->loc, "aarch64 alloca: dst must be REG");
+ }
+ if (align > 16) {
+ compiler_panic(t->c, a->loc,
+ "aarch64 alloca: align %u > 16 not yet supported", align);
+ }
+
+ if (sz.kind == OPK_IMM) {
+ i64 v = sz.v.imm;
+ if (v < 0) {
+ compiler_panic(t->c, a->loc, "aarch64 alloca: negative size");
+ }
+ u64 aligned = ((u64)v + 15u) & ~(u64)15u;
+ if (aligned == 0) aligned = 16;
+ if (aligned > 0xfffu) {
+ compiler_panic(t->c, a->loc,
+ "aarch64 alloca: const size %llu too large for v1",
+ (unsigned long long)aligned);
+ }
+ aa64_emit32(mc, aa64_sub_imm(1, /*Rd=SP*/ 31, /*Rn=SP*/ 31, (u32)aligned, 0));
+ } else if (sz.kind == OPK_REG) {
+ u32 sz_reg = reg_num(sz);
+ aa64_emit32(mc, aa64_add_imm(1, 9, sz_reg, 15u, 0));
+ aa64_emit32(mc, aa64_ubfm(1, 9, 9, 4, 63));
+ aa64_emit32(mc, aa64_ubfm(1, 9, 9, 60, 59));
+ aa64_emit32(mc, aa64_sub_extreg_x_uxtx(/*SP*/ 31, /*SP*/ 31, 9));
+ } else {
+ compiler_panic(t->c, a->loc, "aarch64 alloca: size kind %d unsupported",
+ (int)sz.kind);
+ }
+
+ if (a->nadd_patches == a->add_patches_cap) {
+ u32 ncap = a->add_patches_cap ? a->add_patches_cap * 2 : 4;
+ struct AAAllocaPatch* nb =
+ arena_array(t->c->tu, struct AAAllocaPatch, ncap);
+ if (a->add_patches)
+ memcpy(nb, a->add_patches, sizeof(*nb) * a->nadd_patches);
+ a->add_patches = nb;
+ a->add_patches_cap = ncap;
+ }
+ u32 dst_reg = reg_num(d);
+ a->add_patches[a->nadd_patches].pos = mc->pos(mc);
+ a->add_patches[a->nadd_patches].dst_reg = dst_reg;
+ a->nadd_patches++;
+ aa64_emit32(mc, aa64_add_imm(1, dst_reg, /*Rn=SP*/ 31, 0, 0));
+ a->has_alloca = 1;
+}
+
+/* ============================================================
+ * Varargs
+ * ============================================================ */
+
+static void emit_fp_off(MCEmitter* mc, u32 dst, i32 ofs) {
+ if (ofs == 0)
+ aa64_emit32(mc, aa64_mov_reg(1, dst, 29));
+ else if (ofs > 0 && (u32)ofs <= 0xfff)
+ aa64_emit32(mc, aa64_add_imm(1, dst, 29, (u32)ofs, 0));
+ else if (ofs < 0 && (u32)(-ofs) <= 0xfff)
+ aa64_emit32(mc, aa64_sub_imm(1, dst, 29, (u32)(-ofs), 0));
+ else {
+ aa64_emit_load_imm(mc, 1, dst, ofs);
+ aa64_emit32(mc, aa64_add(1, dst, 29, dst));
+ }
+}
+
+static void aa_va_start_(CGTarget* t, Operand ap_op) {
+ AAImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+ if (!a->is_variadic) {
+ compiler_panic(t->c, a->loc, "aarch64 va_start: function not variadic");
+ }
+ u32 ap = reg_num(ap_op);
+ AASlot* gs = aa64_slot_get(a, a->gp_save_slot);
+ AASlot* fs = aa64_slot_get(a, a->fp_save_slot);
+
+ {
+ u32 ofs = 16u + a->next_param_stack;
+ if (ofs <= 0xfff)
+ aa64_emit32(mc, aa64_add_imm(1, 9, 29, ofs, 0));
+ else {
+ aa64_emit_load_imm(mc, 1, 9, (i64)ofs);
+ aa64_emit32(mc, aa64_add(1, 9, 29, 9));
+ }
+ aa64_emit32(mc, aa64_str_uimm(3, 9, ap, 0));
+ }
+ emit_fp_off(mc, 9, -(i32)gs->off + (i32)gs->size);
+ aa64_emit32(mc, aa64_str_uimm(3, 9, ap, 8));
+ emit_fp_off(mc, 9, -(i32)fs->off + (i32)fs->size);
+ aa64_emit32(mc, aa64_str_uimm(3, 9, ap, 16));
+ aa64_emit_load_imm(mc, 0, 9, (i64)((i32)(a->next_param_int * 8u) - 64));
+ aa64_emit32(mc, aa64_str_uimm(2, 9, ap, 24));
+ aa64_emit_load_imm(mc, 0, 9, (i64)((i32)(a->next_param_fp * 16u) - 128));
+ aa64_emit32(mc, aa64_str_uimm(2, 9, ap, 28));
+}
+
+static void aa_va_arg_(CGTarget* t, Operand dst, Operand ap_op,
+ const Type* ty) {
+ MCEmitter* mc = t->mc;
+ u32 ap = reg_num(ap_op);
+ int is_fp = (dst.cls == RC_FP);
+ u32 offs_field = is_fp ? 28u : 24u;
+ u32 top_field = is_fp ? 16u : 8u;
+ u32 stride_reg = is_fp ? 16u : 8u;
+ u32 sz = type_byte_size(ty);
+ u32 sidx = size_idx_for_bytes(sz);
+
+ MCLabel L_stack = mc->label_new(mc);
+ MCLabel L_done = mc->label_new(mc);
+
+ aa64_emit32(mc, aa64_ldur(2, 9, ap, (i32)offs_field));
+ aa64_emit32(mc, aa64_subs_imm(0, 31, 9, 0));
+ aa64_emit32(mc, aa64_b_cond(0xa /*GE*/));
+ mc->emit_label_ref(mc, L_stack, R_AARCH64_CONDBR19, 4, 0);
+
+ aa64_emit32(mc, aa64_ldur(3, 10, ap, (i32)top_field));
+ aa64_emit32(mc, aa64_sbfm(1, 12, 9, 0, 31));
+ aa64_emit32(mc, aa64_add(1, 11, 10, 12));
+ if (is_fp)
+ aa64_emit32(mc, aa64_ldur_fp(sidx, reg_num(dst), 11, 0));
+ else
+ aa64_emit32(mc, aa64_ldur(sidx, reg_num(dst), 11, 0));
+ aa64_emit32(mc, aa64_add_imm(0, 9, 9, stride_reg, 0));
+ aa64_emit32(mc, aa64_stur(2, 9, ap, (i32)offs_field));
+ aa64_emit32(mc, aa64_b_base());
+ mc->emit_label_ref(mc, L_done, R_AARCH64_JUMP26, 4, 0);
+
+ mc->label_place(mc, L_stack);
+ aa64_emit32(mc, aa64_ldur(3, 10, ap, 0));
+ if (is_fp)
+ aa64_emit32(mc, aa64_ldur_fp(sidx, reg_num(dst), 10, 0));
+ else
+ aa64_emit32(mc, aa64_ldur(sidx, reg_num(dst), 10, 0));
+ aa64_emit32(mc, aa64_add_imm(1, 10, 10, 8u, 0));
+ aa64_emit32(mc, aa64_stur(3, 10, ap, 0));
+
+ mc->label_place(mc, L_done);
+}
+
+static void aa_va_end_(CGTarget* t, Operand a) {
+ (void)t;
+ (void)a;
+}
+
+static void aa_va_copy_(CGTarget* t, Operand d, Operand s) {
+ MCEmitter* mc = t->mc;
+ u32 dr = reg_num(d);
+ u32 sr = reg_num(s);
+ for (u32 i = 0; i < 32u; i += 8u) {
+ aa64_emit32(mc, aa64_ldur(3, 9, sr, (i32)i));
+ aa64_emit32(mc, aa64_stur(3, 9, dr, (i32)i));
+ }
+}
+
+/* ============================================================
+ * Atomics
+ * ============================================================ */
+
+static inline u32 aa64_ldar(u32 sf64, u32 Rt, u32 Rn) {
+ return (sf64 ? 0xC8DFFC00u : 0x88DFFC00u) | ((Rn & 0x1f) << 5) | (Rt & 0x1f);
+}
+static inline u32 aa64_stlr(u32 sf64, u32 Rt, u32 Rn) {
+ return (sf64 ? 0xC89FFC00u : 0x889FFC00u) | ((Rn & 0x1f) << 5) | (Rt & 0x1f);
+}
+static inline u32 aa64_ldxr(u32 sf64, u32 Rt, u32 Rn) {
+ return (sf64 ? 0xC85F7C00u : 0x885F7C00u) | ((Rn & 0x1f) << 5) | (Rt & 0x1f);
+}
+static inline u32 aa64_ldaxr(u32 sf64, u32 Rt, u32 Rn) {
+ return (sf64 ? 0xC85FFC00u : 0x885FFC00u) | ((Rn & 0x1f) << 5) | (Rt & 0x1f);
+}
+static inline u32 aa64_stxr(u32 sf64, u32 Rs, u32 Rt, u32 Rn) {
+ return (sf64 ? 0xC8007C00u : 0x88007C00u) | ((Rs & 0x1f) << 16) |
+ ((Rn & 0x1f) << 5) | (Rt & 0x1f);
+}
+static inline u32 aa64_stlxr(u32 sf64, u32 Rs, u32 Rt, u32 Rn) {
+ return (sf64 ? 0xC800FC00u : 0x8800FC00u) | ((Rs & 0x1f) << 16) |
+ ((Rn & 0x1f) << 5) | (Rt & 0x1f);
+}
+static inline u32 aa64_cbnz(u32 sf64, u32 Rt) {
+ return 0x35000000u | (sf64 << 31) | (Rt & 0x1f);
+}
+
+static int mem_order_is_acquire(MemOrder o) {
+ return o == MO_ACQUIRE || o == MO_ACQ_REL || o == MO_SEQ_CST ||
+ o == MO_CONSUME;
+}
+static int mem_order_is_release(MemOrder o) {
+ return o == MO_RELEASE || o == MO_ACQ_REL || o == MO_SEQ_CST;
+}
+
+static void aa_atomic_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma,
+ MemOrder ord) {
+ AAImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+ u32 sf = (ma.size == 8) ? 1u : 0u;
+
+ u32 base;
+ if (addr.kind == OPK_REG) {
+ base = reg_num(addr);
+ } else if (addr.kind == OPK_LOCAL) {
+ AASlot* s = aa64_slot_get(a, addr.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_load: bad slot");
+ base = 9u;
+ aa64_emit32(mc, aa64_sub_imm(1, base, 29, s->off, 0));
+ } else {
+ compiler_panic(t->c, a->loc,
+ "aarch64 atomic_load: addr kind %d unsupported",
+ (int)addr.kind);
+ }
+ if (mem_order_is_acquire(ord)) {
+ aa64_emit32(mc, aa64_ldar(sf, reg_num(dst), base));
+ } else {
+ u32 sidx = size_idx_for_bytes(ma.size);
+ aa64_emit32(mc, aa64_ldur(sidx, reg_num(dst), base, 0));
+ }
+}
+
+static void aa_atomic_store(CGTarget* t, Operand addr, Operand src,
+ MemAccess ma, MemOrder ord) {
+ AAImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+ u32 sf = (ma.size == 8) ? 1u : 0u;
+
+ u32 src_reg;
+ if (src.kind == OPK_IMM) {
+ src_reg = 10u;
+ aa64_emit_load_imm(mc, sf, src_reg, src.v.imm);
+ } else if (src.kind == OPK_REG) {
+ src_reg = reg_num(src);
+ } else {
+ compiler_panic(t->c, a->loc,
+ "aarch64 atomic_store: src kind %d unsupported",
+ (int)src.kind);
+ }
+ u32 base;
+ if (addr.kind == OPK_REG) {
+ base = reg_num(addr);
+ } else if (addr.kind == OPK_LOCAL) {
+ AASlot* s = aa64_slot_get(a, addr.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_store: bad slot");
+ base = 9u;
+ aa64_emit32(mc, aa64_sub_imm(1, base, 29, s->off, 0));
+ } else {
+ compiler_panic(t->c, a->loc,
+ "aarch64 atomic_store: addr kind %d unsupported",
+ (int)addr.kind);
+ }
+ if (mem_order_is_release(ord)) {
+ aa64_emit32(mc, aa64_stlr(sf, src_reg, base));
+ } else {
+ u32 sidx = size_idx_for_bytes(ma.size);
+ aa64_emit32(mc, aa64_stur(sidx, src_reg, base, 0));
+ }
+}
+
+static void emit_rmw_combine(MCEmitter* mc, AtomicOp op, u32 sf, u32 dst_new,
+ u32 prior, u32 val) {
+ switch (op) {
+ case AO_XCHG: aa64_emit32(mc, aa64_mov_reg(sf, dst_new, val)); break;
+ case AO_ADD: aa64_emit32(mc, aa64_add(sf, dst_new, prior, val)); break;
+ case AO_SUB: aa64_emit32(mc, aa64_sub(sf, dst_new, prior, val)); break;
+ case AO_AND: aa64_emit32(mc, aa64_and(sf, dst_new, prior, val)); break;
+ case AO_OR: aa64_emit32(mc, aa64_orr(sf, dst_new, prior, val)); break;
+ case AO_XOR: aa64_emit32(mc, aa64_eor(sf, dst_new, prior, val)); break;
+ case AO_NAND:
+ aa64_emit32(mc, aa64_and(sf, dst_new, prior, val));
+ aa64_emit32(mc, aa64_mvn(sf, dst_new, dst_new));
+ break;
+ default:
+ aa64_emit32(mc, aa64_mov_reg(sf, dst_new, val));
+ break;
+ }
+}
+
+static void aa_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, Operand addr,
+ Operand val, MemAccess ma, MemOrder ord) {
+ AAImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+ u32 sf = (ma.size == 8) ? 1u : 0u;
+
+ u32 base = 9u;
+ if (addr.kind == OPK_REG) {
+ aa64_emit32(mc, aa64_mov_reg(1, 9, reg_num(addr)));
+ } else if (addr.kind == OPK_LOCAL) {
+ AASlot* s = aa64_slot_get(a, addr.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_rmw: bad slot");
+ aa64_emit32(mc, aa64_sub_imm(1, 9, 29, s->off, 0));
+ } else {
+ compiler_panic(t->c, a->loc, "aarch64 atomic_rmw: addr kind %d unsupported",
+ (int)addr.kind);
+ }
+ u32 vreg = 10u;
+ if (val.kind == OPK_IMM) {
+ aa64_emit_load_imm(mc, sf, vreg, val.v.imm);
+ } else if (val.kind == OPK_REG) {
+ aa64_emit32(mc, aa64_mov_reg(sf, vreg, reg_num(val)));
+ } else {
+ compiler_panic(t->c, a->loc, "aarch64 atomic_rmw: val kind %d unsupported",
+ (int)val.kind);
+ }
+
+ int do_acq = mem_order_is_acquire(ord);
+ int do_rel = mem_order_is_release(ord);
+
+ MCLabel L_retry = mc->label_new(mc);
+ mc->label_place(mc, L_retry);
+
+ if (do_acq)
+ aa64_emit32(mc, aa64_ldaxr(sf, reg_num(dst), base));
+ else
+ aa64_emit32(mc, aa64_ldxr(sf, reg_num(dst), base));
+
+ emit_rmw_combine(mc, op, sf, /*new=*/11u, /*prior=*/reg_num(dst), vreg);
+
+ if (do_rel)
+ aa64_emit32(mc, aa64_stlxr(sf, /*Rs=*/12u, /*Rt=*/11u, base));
+ else
+ aa64_emit32(mc, aa64_stxr(sf, /*Rs=*/12u, /*Rt=*/11u, base));
+
+ u32 cbnz_pos = mc->pos(mc);
+ aa64_emit32(mc, aa64_cbnz(0, /*Rt=*/12u));
+ mc->emit_label_ref(mc, L_retry, R_AARCH64_CONDBR19, 4, 0);
+ (void)cbnz_pos;
+}
+
+static void aa_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr,
+ Operand expected, Operand desired, MemAccess ma,
+ MemOrder succ, MemOrder fail) {
+ AAImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+ u32 sf = (ma.size == 8) ? 1u : 0u;
+ (void)fail;
+
+ u32 base = 9u;
+ if (addr.kind == OPK_REG)
+ aa64_emit32(mc, aa64_mov_reg(1, 9, reg_num(addr)));
+ else if (addr.kind == OPK_LOCAL) {
+ AASlot* s = aa64_slot_get(a, addr.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "aarch64 atomic_cas: bad slot");
+ aa64_emit32(mc, aa64_sub_imm(1, 9, 29, s->off, 0));
+ } else {
+ compiler_panic(t->c, a->loc, "aarch64 atomic_cas: addr kind %d unsupported",
+ (int)addr.kind);
+ }
+ if (expected.kind == OPK_IMM)
+ aa64_emit_load_imm(mc, sf, 10, expected.v.imm);
+ else if (expected.kind == OPK_REG)
+ aa64_emit32(mc, aa64_mov_reg(sf, 10, reg_num(expected)));
+ else
+ compiler_panic(t->c, a->loc, "aarch64 atomic_cas: exp kind %d unsupported",
+ (int)expected.kind);
+ if (desired.kind == OPK_IMM)
+ aa64_emit_load_imm(mc, sf, 11, desired.v.imm);
+ else if (desired.kind == OPK_REG)
+ aa64_emit32(mc, aa64_mov_reg(sf, 11, reg_num(desired)));
+ else
+ compiler_panic(t->c, a->loc, "aarch64 atomic_cas: des kind %d unsupported",
+ (int)desired.kind);
+
+ int do_acq = mem_order_is_acquire(succ);
+ int do_rel = mem_order_is_release(succ);
+
+ MCLabel L_retry = mc->label_new(mc);
+ MCLabel L_fail = mc->label_new(mc);
+ MCLabel L_done = mc->label_new(mc);
+
+ mc->label_place(mc, L_retry);
+ if (do_acq)
+ aa64_emit32(mc, aa64_ldaxr(sf, reg_num(prior), base));
+ else
+ aa64_emit32(mc, aa64_ldxr(sf, reg_num(prior), base));
+
+ aa64_emit32(mc, aa64_subs_reg(sf, /*Rd=ZR*/ 31u, reg_num(prior), 10u));
+ aa64_emit32(mc, aa64_b_cond(0x1u /*NE*/));
+ mc->emit_label_ref(mc, L_fail, R_AARCH64_CONDBR19, 4, 0);
+
+ if (do_rel)
+ aa64_emit32(mc, aa64_stlxr(sf, 12u, 11u, base));
+ else
+ aa64_emit32(mc, aa64_stxr(sf, 12u, 11u, base));
+ aa64_emit32(mc, aa64_cbnz(0, 12u));
+ mc->emit_label_ref(mc, L_retry, R_AARCH64_CONDBR19, 4, 0);
+
+ aa64_emit_load_imm(mc, 0, reg_num(ok), 1);
+ aa64_emit32(mc, aa64_b_base());
+ mc->emit_label_ref(mc, L_done, R_AARCH64_JUMP26, 4, 0);
+
+ mc->label_place(mc, L_fail);
+ aa64_emit32(mc, aa64_clrex(AA64_BARRIER_OPT_SY));
+ aa64_emit_load_imm(mc, 0, reg_num(ok), 0);
+
+ mc->label_place(mc, L_done);
+}
+
+static void aa_fence(CGTarget* t, MemOrder o) {
+ (void)o;
+ if (o == MO_RELAXED) return;
+ aa64_emit32(t->mc, aa64_dmb(AA64_BARRIER_OPT_ISH));
+}
+
+/* ============================================================
+ * Intrinsics
+ * ============================================================ */
+
+static inline u32 aa64_rev16_w(u32 Rd, u32 Rn) {
+ return 0x5AC00400u | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
+}
+static inline u32 aa64_rev_w(u32 Rd, u32 Rn) {
+ return 0x5AC00800u | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
+}
+static inline u32 aa64_rev_x(u32 Rd, u32 Rn) {
+ return 0xDAC00C00u | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
+}
+static inline u32 aa64_rbit(u32 sf64, u32 Rd, u32 Rn) {
+ return (sf64 ? 0xDAC00000u : 0x5AC00000u) | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
+}
+static inline u32 aa64_clz(u32 sf64, u32 Rd, u32 Rn) {
+ return (sf64 ? 0xDAC01000u : 0x5AC01000u) | ((Rn & 0x1f) << 5) | (Rd & 0x1f);
+}
+static inline u32 aa64_cnt_8b(u32 Vd, u32 Vn) {
+ return 0x0E205800u | ((Vn & 0x1f) << 5) | (Vd & 0x1f);
+}
+static inline u32 aa64_addv_b_8b(u32 Vd, u32 Vn) {
+ return 0x0E31B800u | ((Vn & 0x1f) << 5) | (Vd & 0x1f);
+}
+static inline u32 aa64_adds_reg(u32 sf, u32 Rd, u32 Rn, u32 Rm) {
+ return 0x2B000000u | (sf << 31) | ((Rm & 0x1f) << 16) | ((Rn & 0x1f) << 5) |
+ (Rd & 0x1f);
+}
+static inline u32 aa64_smaddl(u32 Rd, u32 Rn, u32 Rm, u32 Ra) {
+ return aa64_dp3_pack((AA64DP3){
+ .sf = 1, .op31 = 1, .o0 = 0, .Rm = Rm, .Ra = Ra, .Rn = Rn, .Rd = Rd});
+}
+static inline u32 aa64_smull(u32 Rd, u32 Rn, u32 Rm) {
+ return aa64_smaddl(Rd, Rn, Rm, AA64_ZR);
+}
+static inline u32 aa64_subs_extreg_x_sxtw(u32 Rd, u32 Rn, u32 Rm) {
+ return 0xEB200000u | ((Rm & 0x1f) << 16) | (6u << 13) | ((Rn & 0x1f) << 5) |
+ (Rd & 0x1f);
+}
+
+static void aa_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd,
+ const Operand* args, u32 na) {
+ AAImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+ (void)nd;
+
+ switch (kind) {
+ case INTRIN_POPCOUNT: {
+ Operand src = args[0];
+ Operand dst = dsts[0];
+ u32 sz_in = type_byte_size(src.type);
+ if (sz_in == 8)
+ aa64_emit32(mc, aa64_fmov_d_x(0, reg_num(src)));
+ else
+ aa64_emit32(mc, aa64_fmov_s_w(0, reg_num(src)));
+ aa64_emit32(mc, aa64_cnt_8b(0, 0));
+ aa64_emit32(mc, aa64_addv_b_8b(0, 0));
+ aa64_emit32(mc, aa64_fmov_w_s(reg_num(dst), 0));
+ return;
+ }
+ case INTRIN_CLZ: {
+ Operand src = args[0];
+ Operand dst = dsts[0];
+ u32 sf = type_is_64(src.type) ? 1u : 0u;
+ aa64_emit32(mc, aa64_clz(sf, reg_num(dst), reg_num(src)));
+ return;
+ }
+ case INTRIN_CTZ: {
+ Operand src = args[0];
+ Operand dst = dsts[0];
+ u32 sf = type_is_64(src.type) ? 1u : 0u;
+ aa64_emit32(mc, aa64_rbit(sf, reg_num(dst), reg_num(src)));
+ aa64_emit32(mc, aa64_clz(sf, reg_num(dst), reg_num(dst)));
+ return;
+ }
+ case INTRIN_BSWAP16: {
+ aa64_emit32(mc, aa64_rev16_w(reg_num(dsts[0]), reg_num(args[0])));
+ return;
+ }
+ case INTRIN_BSWAP32: {
+ aa64_emit32(mc, aa64_rev_w(reg_num(dsts[0]), reg_num(args[0])));
+ return;
+ }
+ case INTRIN_BSWAP64: {
+ aa64_emit32(mc, aa64_rev_x(reg_num(dsts[0]), reg_num(args[0])));
+ return;
+ }
+ case INTRIN_MEMCPY:
+ case INTRIN_MEMMOVE: {
+ Operand da = args[0], sa = args[1], nb = args[2];
+ if (da.kind != OPK_REG || sa.kind != OPK_REG || nb.kind != OPK_IMM) {
+ compiler_panic(t->c, a->loc,
+ "aarch64 intrinsic: %s with non-const n or non-REG ptr",
+ kind == INTRIN_MEMCPY ? "memcpy" : "memmove");
+ }
+ u32 dr = reg_num(da);
+ u32 sr = reg_num(sa);
+ u32 n = (u32)nb.v.imm;
+ if (kind == INTRIN_MEMCPY) {
+ u32 i = 0;
+ while (i + 8 <= n) {
+ aa64_emit32(mc, aa64_ldur(3, 12, sr, (i32)i));
+ aa64_emit32(mc, aa64_stur(3, 12, dr, (i32)i));
+ i += 8;
+ }
+ while (i + 4 <= n) {
+ aa64_emit32(mc, aa64_ldur(2, 12, sr, (i32)i));
+ aa64_emit32(mc, aa64_stur(2, 12, dr, (i32)i));
+ i += 4;
+ }
+ while (i + 2 <= n) {
+ aa64_emit32(mc, aa64_ldur(1, 12, sr, (i32)i));
+ aa64_emit32(mc, aa64_stur(1, 12, dr, (i32)i));
+ i += 2;
+ }
+ while (i < n) {
+ aa64_emit32(mc, aa64_ldur(0, 12, sr, (i32)i));
+ aa64_emit32(mc, aa64_stur(0, 12, dr, (i32)i));
+ i += 1;
+ }
+ } else {
+ u32 i = n;
+ while (i >= 8) {
+ i -= 8;
+ aa64_emit32(mc, aa64_ldur(3, 12, sr, (i32)i));
+ aa64_emit32(mc, aa64_stur(3, 12, dr, (i32)i));
+ }
+ while (i >= 4) {
+ i -= 4;
+ aa64_emit32(mc, aa64_ldur(2, 12, sr, (i32)i));
+ aa64_emit32(mc, aa64_stur(2, 12, dr, (i32)i));
+ }
+ while (i >= 2) {
+ i -= 2;
+ aa64_emit32(mc, aa64_ldur(1, 12, sr, (i32)i));
+ aa64_emit32(mc, aa64_stur(1, 12, dr, (i32)i));
+ }
+ while (i >= 1) {
+ i -= 1;
+ aa64_emit32(mc, aa64_ldur(0, 12, sr, (i32)i));
+ aa64_emit32(mc, aa64_stur(0, 12, dr, (i32)i));
+ }
+ }
+ return;
+ }
+ case INTRIN_MEMSET: {
+ Operand da = args[0], bv = args[1], nb = args[2];
+ if (da.kind != OPK_REG || nb.kind != OPK_IMM) {
+ compiler_panic(
+ t->c, a->loc,
+ "aarch64 intrinsic: memset with non-const n / non-REG ptr");
+ }
+ u32 dr = reg_num(da);
+ u32 n = (u32)nb.v.imm;
+ u32 byte;
+ u32 src_reg;
+ if (bv.kind == OPK_IMM) {
+ byte = (u32)(bv.v.imm & 0xffu);
+ if (byte == 0) {
+ src_reg = 31u;
+ } else {
+ u64 b64 = byte;
+ b64 |= b64 << 8;
+ b64 |= b64 << 16;
+ b64 |= b64 << 32;
+ aa64_emit_load_imm(mc, 1, 12, (i64)b64);
+ src_reg = 12u;
+ }
+ } else if (bv.kind == OPK_REG) {
+ aa64_emit_load_imm(mc, 1, 12, (i64)0x0101010101010101ll);
+ aa64_emit32(mc, aa64_madd(1, 12, reg_num(bv), 12, AA64_ZR));
+ src_reg = 12u;
+ } else {
+ compiler_panic(t->c, a->loc,
+ "aarch64 intrinsic: memset byte kind %d unsupported",
+ (int)bv.kind);
+ }
+ u32 i = 0;
+ while (i + 8 <= n) {
+ aa64_emit32(mc, aa64_stur(3, src_reg, dr, (i32)i));
+ i += 8;
+ }
+ while (i + 4 <= n) {
+ aa64_emit32(mc, aa64_stur(2, src_reg, dr, (i32)i));
+ i += 4;
+ }
+ while (i + 2 <= n) {
+ aa64_emit32(mc, aa64_stur(1, src_reg, dr, (i32)i));
+ i += 2;
+ }
+ while (i < n) {
+ aa64_emit32(mc, aa64_stur(0, src_reg, dr, (i32)i));
+ i += 1;
+ }
+ return;
+ }
+ case INTRIN_PREFETCH:
+ (void)args;
+ (void)na;
+ return;
+ case INTRIN_ASSUME_ALIGNED: {
+ Operand src = args[0];
+ Operand dst = dsts[0];
+ if (reg_num(src) != reg_num(dst)) {
+ aa64_emit32(mc, aa64_mov_reg(1, reg_num(dst), reg_num(src)));
+ }
+ return;
+ }
+ case INTRIN_EXPECT: {
+ Operand val = args[0];
+ Operand dst = dsts[0];
+ u32 sf = type_is_64(dst.type) ? 1u : 0u;
+ if (val.kind == OPK_REG) {
+ if (reg_num(val) != reg_num(dst)) {
+ aa64_emit32(mc, aa64_mov_reg(sf, reg_num(dst), reg_num(val)));
+ }
+ } else if (val.kind == OPK_IMM) {
+ aa64_emit_load_imm(mc, sf, reg_num(dst), val.v.imm);
+ } else {
+ compiler_panic(t->c, a->loc,
+ "aarch64 intrinsic: expect val kind %d unsupported",
+ (int)val.kind);
+ }
+ return;
+ }
+ case INTRIN_UNREACHABLE:
+ case INTRIN_TRAP:
+ aa64_emit32(mc, aa64_brk(kind == INTRIN_TRAP ? 1u : 0u));
+ return;
+ case INTRIN_ADD_OVERFLOW:
+ case INTRIN_SUB_OVERFLOW: {
+ Operand a_op = args[0], b_op = args[1];
+ Operand dval = dsts[0], dovf = dsts[1];
+ u32 sf = type_is_64(dval.type) ? 1u : 0u;
+ u32 ra = aa64_force_reg_int(t, a_op, sf, 9);
+ u32 rb = aa64_force_reg_int(t, b_op, sf, (ra == 9) ? 10u : 9u);
+ u32 word = (kind == INTRIN_ADD_OVERFLOW)
+ ? aa64_adds_reg(sf, reg_num(dval), ra, rb)
+ : aa64_subs_reg(sf, reg_num(dval), ra, rb);
+ aa64_emit32(mc, word);
+ aa64_emit32(mc, aa64_cset(sf, reg_num(dovf), 0x6u /*VS*/));
+ return;
+ }
+ case INTRIN_MUL_OVERFLOW: {
+ Operand a_op = args[0], b_op = args[1];
+ Operand dval = dsts[0], dovf = dsts[1];
+ u32 sf = type_is_64(dval.type) ? 1u : 0u;
+ if (sf) {
+ compiler_panic(
+ t->c, a->loc,
+ "aarch64 intrinsic: mul_overflow on i64 not yet supported");
+ }
+ u32 ra = aa64_force_reg_int(t, a_op, 0, 9);
+ u32 rb = aa64_force_reg_int(t, b_op, 0, (ra == 9) ? 10u : 9u);
+ aa64_emit32(mc, aa64_smull(/*X*/ 11u, ra, rb));
+ aa64_emit32(mc, aa64_subs_extreg_x_sxtw(/*XZR*/ 31u, /*Xn=*/11u, /*Wm=*/11u));
+ aa64_emit32(mc, aa64_cset(0, reg_num(dovf), 0x1u /*NE*/));
+ aa64_emit32(mc, aa64_mov_reg(0, reg_num(dval), 11u));
+ return;
+ }
+ default:
+ compiler_panic(t->c, a->loc, "aarch64 intrinsic: kind %d unsupported",
+ (int)kind);
+ }
+}
+
+/* ============================================================
+ * Inline asm block
+ * ============================================================ */
+
+static void aa_asm_block(CGTarget* t, const char* tmpl,
+ const AsmConstraint* outs, u32 no, Operand* oo,
+ const AsmConstraint* ins, u32 ni, const Operand* io,
+ const Sym* clobs, u32 nc) {
+ AAImpl* a_impl = impl_of(t);
+ for (u32 i = 0; i < nc; ++i) {
+ Reg phys;
+ RegClass cls;
+ if (t->resolve_reg_name(t, clobs[i], &phys, &cls) != 0) continue;
+ if (cls == RC_INT) {
+ u32 idx = (u32)phys;
+ RegPool* p = &a_impl->int_pool;
+ if (idx >= p->base && idx < (u32)(p->base + p->nregs)) {
+ u32 off = idx - p->base + 1u;
+ if (off > p->hwm) p->hwm = off;
+ }
+ } else if (cls == RC_FP) {
+ u32 idx = (u32)phys;
+ RegPool* p = &a_impl->fp_pool;
+ if (idx >= p->base && idx < (u32)(p->base + p->nregs)) {
+ u32 off = idx - p->base + 1u;
+ if (off > p->hwm) p->hwm = off;
+ }
+ }
+ }
+ AA64Asm* a = aa64_asm_open(t->c);
+ aa64_inline_bind(a, outs, no, oo, ins, ni, io, clobs, nc);
+ aa64_asm_run_template(a, t->mc, tmpl);
+ aa64_asm_close(a);
+}
+
+/* ============================================================
+ * Lifecycle / vtable constructor
+ * ============================================================ */
+
+static void aa_set_loc(CGTarget* t, SrcLoc loc) {
+ impl_of(t)->loc = loc;
+ t->mc->set_loc(t->mc, loc);
+}
+
+static void aa_finalize(CGTarget* t) { (void)t; }
+
+static void aa_destroy(CGTarget* t) { (void)t; }
+
+static void cgt_cleanup(void* arg) { cgtarget_free((CGTarget*)arg); }
+
+CGTarget* aa64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) {
+ AAImpl* a = arena_new(c->tu, AAImpl);
+ memset(a, 0, sizeof *a);
+
+ CGTarget* t = &a->base;
+ t->c = c;
+ t->obj = o;
+ t->mc = m;
+
+ t->func_begin = aa_func_begin;
+ t->func_end = aa_func_end;
+ t->frame_slot = aa_frame_slot;
+ t->param = aa_param;
+
+ t->load_imm = aa_load_imm;
+ t->load_const = aa_load_const;
+ t->copy = aa_copy;
+ t->load = aa_load;
+ t->store = aa_store;
+ t->addr_of = aa_addr_of;
+ t->tls_addr_of = aa_tls_addr_of;
+ t->copy_bytes = aa_copy_bytes;
+ t->set_bytes = aa_set_bytes;
+ t->bitfield_load = aa_bitfield_load;
+ t->bitfield_store = aa_bitfield_store;
+
+ t->binop = aa_binop;
+ t->unop = aa_unop;
+ t->convert = aa_convert;
+
+ t->call = aa_call;
+ t->ret = aa_ret;
+
+ t->alloca_ = aa_alloca_;
+ t->va_start_ = aa_va_start_;
+ t->va_arg_ = aa_va_arg_;
+ t->va_end_ = aa_va_end_;
+ t->va_copy_ = aa_va_copy_;
+
+ t->setjmp_ = NULL;
+ t->longjmp_ = NULL;
+
+ t->atomic_load = aa_atomic_load;
+ t->atomic_store = aa_atomic_store;
+ t->atomic_rmw = aa_atomic_rmw;
+ t->atomic_cas = aa_atomic_cas;
+ t->fence = aa_fence;
+
+ t->intrinsic = aa_intrinsic;
+ t->asm_block = aa_asm_block;
+
+ t->set_loc = aa_set_loc;
+ t->finalize = aa_finalize;
+ t->destroy = aa_destroy;
+
+ /* alloc/label/scope vtable entries */
+ aa_alloc_vtable_init(t);
+
+ /* Suppress unused warning. */
+ (void)type_is_signed;
+
+ compiler_defer(c, cgt_cleanup, t);
+ return t;
+}
diff --git a/src/arch/rv64.c b/src/arch/rv64.c
@@ -1,2765 +0,0 @@
-/* Minimal RISC-V (RV64IMFD, LP64D) CGTarget.
- *
- * Single-pass codegen mirroring src/arch/aarch64.c. The frame uses s0
- * (x8) as the frame pointer; locals live at s0-relative negative
- * offsets, callee-save spills and outgoing args at sp-relative positive
- * offsets. The prologue is reserved as a NOP placeholder at func_begin
- * and patched at func_end once frame_size and the callee-save high-
- * water marks are known.
- *
- * Reg allocator: lowest-bit-first over s2..s11 (int) and fs2..fs11 (fp).
- * Scratch registers held outside the pools are t0..t3 (x5..x7, x28).
- *
- * Scope: this backend covers the v1 cg corpus paths the aarch64 backend
- * covers, with these explicit gaps that still panic:
- * - va_*, alloca, asm_block, atomic_cas (partial), intrinsic
- * INTRIN_MUL_OVERFLOW i64. */
-
-#include <string.h>
-
-#include "arch/arch.h"
-#include "arch/rv64.h"
-#include "arch/rv64_isa.h"
-#include "core/arena.h"
-#include "obj/obj.h"
-#include "type/type.h"
-
-#define RV_PROLOGUE_WORDS 32u
-
-/* ============================================================
- * RegPool (copy of the aa64 helper — bit-set free mask). */
-typedef struct RegPool {
- u32 free;
- u32 hwm;
- u8 base;
- u8 nregs;
- u8 pad[2];
-} RegPool;
-
-static void regpool_init(RegPool* p, u8 base, u8 nregs) {
- p->base = base;
- p->nregs = nregs;
- p->hwm = 0;
- p->free = (nregs >= 32u) ? 0xFFFFFFFFu : ((1u << nregs) - 1u);
-}
-
-static Reg regpool_alloc(RegPool* p) {
- if (p->free == 0) return (Reg)REG_NONE;
- u32 idx = (u32)__builtin_ctz(p->free);
- p->free &= ~(1u << idx);
- if (idx + 1u > p->hwm) p->hwm = idx + 1u;
- return (Reg)(p->base + idx);
-}
-
-static int regpool_free(RegPool* p, Reg r) {
- u32 rn = (u32)r;
- if (rn < p->base || rn >= (u32)(p->base + p->nregs)) return 0;
- u32 idx = rn - p->base;
- u32 bit = 1u << idx;
- if (p->free & bit) return -1;
- p->free |= bit;
- return 1;
-}
-
-/* ============================================================
- * RImpl */
-
-typedef struct RvSlot {
- u32 off; /* bytes below s0 (positive); address = s0 - off */
- u32 size;
- u32 align;
- u8 kind;
- u8 pad[3];
-} RvSlot;
-
-typedef struct RvScope {
- u8 kind;
- u8 has_else;
- u8 pad[2];
- MCLabel else_label;
- MCLabel end_label;
- Label break_label;
- Label continue_label;
-} RvScope;
-
-typedef struct RImpl {
- CGTarget base;
- SrcLoc loc;
- const CGFuncDesc* fd;
-
- u32 func_start;
- u32 prologue_pos;
- MCLabel epilogue_label;
-
- RvSlot* slots;
- u32 nslots;
- u32 slots_cap;
- u32 cum_off;
- u32 max_outgoing;
- /* fp_pair_off captures the offset from sp where the saved-s0/ra pair
- * sits. Computed at func_end. Stored so post-prologue sret/varargs
- * stores written by func_begin don't depend on it (they use s0). */
- u32 fp_pair_off;
-
- u32 next_param_int;
- u32 next_param_fp;
- u32 next_param_stack;
- u8 has_sret;
- FrameSlot sret_ptr_slot;
-
- RegPool int_pool;
- RegPool fp_pool;
-
- RvScope* scopes;
- u32 nscopes;
- u32 scopes_cap;
-
- u8 has_alloca;
- /* alloca patch list: each call emits `addi dst, sp, 0` and registers
- * the (pos, dst_reg) for patching with max_outgoing at func_end. */
- struct RvAllocaPatch {
- u32 pos;
- u32 dst_reg;
- }* add_patches;
- u32 nadd_patches;
- u32 add_patches_cap;
-
- /* Variadic register save area: 64 bytes (a0..a7). Allocated lazily on
- * the first va_start. The prologue patcher spills a-regs into it. */
- u8 is_variadic;
- FrameSlot gp_save_slot;
-} RImpl;
-
-static RImpl* impl_of(CGTarget* t) { return (RImpl*)t; }
-
-/* Forward decls. */
-static FrameSlot rv_frame_slot(CGTarget* t, const FrameSlotDesc* d);
-static RvSlot* slot_get(RImpl* a, FrameSlot fs);
-static void rv_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma);
-static void rv_store(CGTarget* t, Operand addr, Operand src, MemAccess ma);
-
-/* ---- type helpers ---- */
-static int type_is_64(const Type* t) {
- if (!t) return 0;
- switch (t->kind) {
- case TY_LONG:
- case TY_ULONG:
- case TY_LLONG:
- case TY_ULLONG:
- case TY_PTR:
- case TY_DOUBLE:
- return 1;
- default:
- return 0;
- }
-}
-static int type_is_fp_double(const Type* t) {
- return t && (t->kind == TY_DOUBLE || t->kind == TY_LDOUBLE);
-}
-static u32 type_byte_size(const Type* t) {
- if (!t) return 4;
- switch (t->kind) {
- case TY_CHAR:
- case TY_SCHAR:
- case TY_UCHAR:
- case TY_BOOL:
- return 1;
- case TY_SHORT:
- case TY_USHORT:
- return 2;
- case TY_INT:
- case TY_UINT:
- case TY_FLOAT:
- return 4;
- case TY_LONG:
- case TY_ULONG:
- case TY_LLONG:
- case TY_ULLONG:
- case TY_PTR:
- case TY_DOUBLE:
- return 8;
- default:
- return 8;
- }
-}
-static int type_is_signed(const Type* t) {
- if (!t) return 0;
- switch (t->kind) {
- case TY_CHAR:
- case TY_SCHAR:
- case TY_SHORT:
- case TY_INT:
- case TY_LONG:
- case TY_LLONG:
- return 1;
- default:
- return 0;
- }
-}
-
-static u32 reg_num(Operand op) { return op.v.reg & 0x1fu; }
-
-extern void debug_emit_row(Debug*, ObjSecId text_section, u32 offset, SrcLoc);
-
-static void emit32(MCEmitter* mc, u32 word) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- u8 b[4];
- b[0] = (u8)(word & 0xff);
- b[1] = (u8)((word >> 8) & 0xff);
- b[2] = (u8)((word >> 16) & 0xff);
- b[3] = (u8)((word >> 24) & 0xff);
- mc->emit_bytes(mc, b, 4);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-static void patch32(ObjBuilder* obj, u32 sec_id, u32 ofs, u32 word) {
- u8 b[4];
- b[0] = (u8)(word & 0xff);
- b[1] = (u8)((word >> 8) & 0xff);
- b[2] = (u8)((word >> 16) & 0xff);
- b[3] = (u8)((word >> 24) & 0xff);
- obj_patch(obj, sec_id, ofs, b, 4);
-}
-
-static _Noreturn void rv_panic(CGTarget* t, const char* what) {
- SrcLoc loc = impl_of(t)->loc;
- compiler_panic(t->c, loc, "rv64: %s not implemented", what);
-}
-
-/* ---- immediate materialization ----
- * Load any i64 into `rd`. Strategy:
- * - if fits signed 12-bit: addi rd, x0, imm
- * - elif fits signed 32-bit: lui rd, hi20; addiw rd, rd, lo12
- * - otherwise: split into high and low 32-bit halves, materialize
- * each separately, then shift-and-or. Worst-case sequence is up
- * to 8 instructions; good enough for the cg test corpus. */
-static int fits_signed32(i64 v) { return v >= (i64)(i32)0x80000000 && v <= (i64)(i32)0x7fffffff; }
-
-static void emit_li_32(MCEmitter* mc, u32 rd, i32 imm) {
- if (imm >= -2048 && imm <= 2047) {
- emit32(mc, rv_addi(rd, RV_ZERO, imm));
- return;
- }
- /* hi20 + lo12, with 0x800 bias to compensate ADDIW's sign-ext. */
- i32 hi = (i32)((u32)(imm + 0x800) >> 12);
- i32 lo = (i32)((i32)imm - (i32)(hi << 12));
- emit32(mc, rv_lui(rd, (u32)hi & 0xfffffu));
- if (lo) emit32(mc, rv_addiw(rd, rd, lo));
-}
-
-static void emit_load_imm(MCEmitter* mc, u32 sf, u32 rd, i64 imm) {
- if (!sf) {
- /* 32-bit destination: low 32 bits, sign-extended. */
- emit_li_32(mc, rd, (i32)imm);
- return;
- }
- if (fits_signed32(imm)) {
- emit_li_32(mc, rd, (i32)imm);
- return;
- }
- /* General 64-bit load: split into high and low 32 bits, place high
- * into rd << 32, then OR in low via a temp register (t0=x5). The cg
- * corpus has no IMM operands that collide with t0, so this is safe. */
- i64 lo32 = (i64)(i32)(imm & 0xffffffffu); /* sign-ext low half */
- i64 hi64 = (imm - lo32) >> 32; /* what remains in hi */
- if (hi64 < (i64)(i32)0x80000000 ||
- hi64 > (i64)(i32)0x7fffffff) {
- /* Out of i32 range — fallback: use a smaller chunked approach.
- * For the cg corpus this isn't hit; emit a conservative sequence:
- * li rd, hi32; slli 32; li t0, lo32; or rd, rd, t0. */
- i32 hi32 = (i32)(imm >> 32);
- i32 lo32_i = (i32)imm;
- emit_li_32(mc, rd, hi32);
- emit32(mc, rv_slli(rd, rd, 32));
- emit_li_32(mc, RV_T0, lo32_i);
- /* zero-extend t0 to clear sign-extension before OR */
- emit32(mc, rv_slli(RV_T0, RV_T0, 32));
- emit32(mc, rv_srli(RV_T0, RV_T0, 32));
- emit32(mc, rv_or(rd, rd, RV_T0));
- return;
- }
- emit_li_32(mc, rd, (i32)hi64);
- emit32(mc, rv_slli(rd, rd, 32));
- if (lo32 != 0) {
- emit_li_32(mc, RV_T0, (i32)lo32);
- emit32(mc, rv_slli(RV_T0, RV_T0, 32));
- emit32(mc, rv_srli(RV_T0, RV_T0, 32));
- emit32(mc, rv_or(rd, rd, RV_T0));
- }
-}
-
-/* sp += imm. imm can be any signed value the caller passes — we pick
- * the shortest sequence. */
-static void emit_sp_addi(MCEmitter* mc, i64 imm) {
- if (imm >= -2048 && imm <= 2047) {
- emit32(mc, rv_addi(RV_SP, RV_SP, (i32)imm));
- return;
- }
- emit_load_imm(mc, 1, RV_T0, imm);
- emit32(mc, rv_add(RV_SP, RV_SP, RV_T0));
-}
-
-/* ---- function lifecycle ---- */
-
-static void rv_func_begin(CGTarget* t, const CGFuncDesc* fd) {
- RImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
-
- mc->set_section(mc, fd->text_section_id);
- mc->emit_align(mc, 4, 0);
-
- a->fd = fd;
- a->func_start = mc->pos(mc);
- a->next_param_int = 0;
- a->next_param_fp = 0;
- a->next_param_stack = 0;
- a->has_sret = (fd->abi && fd->abi->has_sret) ? 1 : 0;
- a->cum_off = 0;
- a->max_outgoing = 0;
- a->fp_pair_off = 0;
- regpool_init(&a->int_pool, /*base=*/18u, /*nregs=*/10u); /* s2..s11 */
- regpool_init(&a->fp_pool, /*base=*/18u, /*nregs=*/10u); /* fs2..fs11 */
- a->nslots = 0;
- a->nscopes = 0;
- a->has_alloca = 0;
- a->nadd_patches = 0;
- a->is_variadic = (fd->abi && fd->abi->variadic) ? 1 : 0;
- a->gp_save_slot = FRAME_SLOT_NONE;
- a->sret_ptr_slot = FRAME_SLOT_NONE;
- a->epilogue_label = mc->label_new(mc);
-
- mc->cfi_startproc(mc);
-
- /* Reserve a NOP-filled prologue placeholder; func_end patches it. */
- a->prologue_pos = mc->pos(mc);
- for (u32 i = 0; i < RV_PROLOGUE_WORDS; ++i) emit32(mc, RV_NOP);
-
- /* For an sret return, the caller passed the destination pointer in
- * a0; reserve a hidden slot to spill it into so the body can use a0
- * freely. The actual SD a0, ...(s0) is emitted in the patched
- * prologue once the slot offset is known. */
- if (a->has_sret) {
- FrameSlotDesc fsd = {
- .type = NULL,
- .name = 0,
- .loc = (SrcLoc){0, 0, 0},
- .size = 8,
- .align = 8,
- .kind = FS_SPILL,
- .flags = 0,
- };
- a->sret_ptr_slot = rv_frame_slot(t, &fsd);
- /* Consume a0 — it is no longer available for the first real param. */
- a->next_param_int = 1;
- }
-
- /* Variadic: a 64-byte GP save area for a0..a7 lives at the very top
- * of the frame, immediately above the saved-s0/ra pair, so its bytes
- * are contiguous with the caller's stack args. The patcher spills the
- * unnamed a-regs into it as part of the prologue. The slot is implicit
- * (not allocated through rv_frame_slot) — it sits at [s0 + 16] when
- * is_variadic is set. */
-}
-
-static void rv_func_end(CGTarget* t) {
- RImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
- ObjBuilder* obj = t->obj;
- u32 sec = a->fd->text_section_id;
-
- u32 n_int_saves = a->int_pool.hwm; /* s2..s2+hwm-1 */
- u32 n_fp_saves = a->fp_pool.hwm;
- u32 max_out = (a->max_outgoing + 15u) & ~15u;
- u32 int_saves_sz = n_int_saves * 8u;
- u32 fp_saves_sz = n_fp_saves * 8u;
-
- /* Variadic functions reserve a 64-byte save area at the very top of
- * the frame so the save area and caller's stack args form a single
- * contiguous byte stream walked by the va_list pointer. */
- u32 va_save_sz = a->is_variadic ? 64u : 0u;
- u32 locals_off = max_out + int_saves_sz + fp_saves_sz; /* from sp */
- u32 fp_pair_off = locals_off + a->cum_off;
- u32 frame_size = fp_pair_off + 16u + va_save_sz;
- frame_size = (frame_size + 15u) & ~15u;
- fp_pair_off = frame_size - 16u - va_save_sz;
- a->fp_pair_off = fp_pair_off;
-
- /* Place the epilogue label at current pos. */
- mc->label_place(mc, a->epilogue_label);
-
- /* Restore int and fp saves using s0-relative addressing so they
- * don't depend on the final frame_size encoding (and survive
- * alloca-induced sp shifts). */
- /* layout below s0:
- * s0 - 8 .. s0 - 16 saved s0/ra ? No — those are at sp+fp_pair_off
- * We arranged saved-s0/ra at [sp+fp_pair_off], not below s0. So
- * immediately below s0 are: int saves, then fp saves, then locals.
- * Wait — let me recompute.
- *
- * sp + 0 outgoing args (max_out bytes)
- * sp + max_out int saves
- * sp + max_out + I fp saves
- * sp + max_out+I+F locals (cum_off)
- * sp + fp_pair_off saved s0_caller (8)
- * sp + fp_pair_off+8 saved ra (8)
- * sp + frame_size end
- *
- * s0 = sp + fp_pair_off (so [s0+0] = saved s0_caller).
- * Locals at [s0 - off] where off in [1..cum_off].
- * FP saves at [s0 - cum_off - 8*i].
- * Int saves at [s0 - cum_off - F - 8*i]. */
- /* Save slots sit at the start of an 8-byte cell below the locals
- * area. fp_save_base = offset of the first fp save (=-(L+8)); each
- * subsequent save is 8 bytes lower. int saves start below the fp
- * block. */
- i32 fp_save_base = -(i32)a->cum_off - 8;
- i32 int_save_base = fp_save_base - (i32)fp_saves_sz;
-
- /* Reverse order: ints first (lowest address) on restore, but we emit
- * the restore loop in reverse to keep the prologue/epilogue symmetric. */
- for (i32 i = (i32)n_int_saves - 1; i >= 0; --i) {
- u32 r = 18u + (u32)i; /* s2 + i */
- i32 off = int_save_base - 8 * (i32)i;
- emit32(mc, rv_ld(r, RV_S0, off));
- }
- for (i32 i = (i32)n_fp_saves - 1; i >= 0; --i) {
- u32 r = 18u + (u32)i; /* fs2 + i (fp reg number) */
- i32 off = fp_save_base - 8 * (i32)i;
- emit32(mc, rv_fld(r, RV_S0, off));
- }
- /* Restore sp from s0 first so alloca-induced offsets don't matter.
- * After this, sp == its post-prologue value. */
- if (a->has_alloca) {
- if ((i32)fp_pair_off > 2047) {
- compiler_panic(t->c, a->loc, "rv64: fp_pair_off too large for alloca");
- }
- emit32(mc, rv_addi(RV_SP, RV_S0, -(i32)fp_pair_off));
- }
- emit32(mc, rv_ld(RV_S0, RV_SP, (i32)fp_pair_off));
- emit32(mc, rv_ld(RV_RA, RV_SP, (i32)fp_pair_off + 8));
- emit_sp_addi(mc, (i64)frame_size);
- emit32(mc, rv_ret_());
-
- /* Now patch the prologue placeholder. */
- u32 pos = a->prologue_pos;
- u32 words[RV_PROLOGUE_WORDS];
- for (u32 i = 0; i < RV_PROLOGUE_WORDS; ++i) words[i] = RV_NOP;
- u32 wi = 0;
-
- /* addi sp, sp, -frame_size (or 2-insn if too large) */
- if ((i64)frame_size <= 2048) {
- words[wi++] = rv_addi(RV_SP, RV_SP, -(i32)frame_size);
- } else {
- /* li t0, -frame_size; add sp, sp, t0 */
- /* Use a small two-instruction expansion via LUI+ADDI if it fits 32-bit;
- * otherwise we'd need a full load_imm but that's overkill for tests. */
- i64 neg = -(i64)frame_size;
- if (fits_signed32(neg)) {
- i32 hi = (i32)((u32)((i32)neg + 0x800) >> 12);
- i32 lo = (i32)neg - (hi << 12);
- words[wi++] = rv_lui(RV_T0, (u32)hi & 0xfffffu);
- if (lo) words[wi++] = rv_addiw(RV_T0, RV_T0, lo);
- words[wi++] = rv_add(RV_SP, RV_SP, RV_T0);
- } else {
- compiler_panic(t->c, a->loc, "rv64: frame_size too large to patch");
- }
- }
- /* sd s0, fp_pair_off(sp); sd ra, fp_pair_off+8(sp); addi s0, sp, fp_pair_off */
- if ((i32)fp_pair_off > 2047 || (i32)(fp_pair_off + 8) > 2047) {
- compiler_panic(t->c, a->loc, "rv64: fp_pair_off out of imm12 range");
- }
- words[wi++] = rv_sd(RV_S0, RV_SP, (i32)fp_pair_off);
- words[wi++] = rv_sd(RV_RA, RV_SP, (i32)fp_pair_off + 8);
- words[wi++] = rv_addi(RV_S0, RV_SP, (i32)fp_pair_off);
-
- /* If sret, spill incoming a0 into the hidden slot. */
- if (a->has_sret && a->sret_ptr_slot != FRAME_SLOT_NONE) {
- RvSlot* s = slot_get(a, a->sret_ptr_slot);
- if (s) {
- if (wi >= RV_PROLOGUE_WORDS) goto overflow;
- words[wi++] = rv_sd(RV_A0, RV_S0, -(i32)s->off);
- }
- }
- /* Variadic: spill the still-unconsumed a-regs (a_{nparams_int}..a7)
- * into the save area at [s0 + 16 + i*8]. The save area sits between
- * the saved-s0/ra pair and the caller's stack args, so save_area[8]
- * == caller's first stack arg. */
- if (a->is_variadic) {
- for (u32 i = a->next_param_int; i < 8; ++i) {
- if (wi >= RV_PROLOGUE_WORDS) goto overflow;
- words[wi++] = rv_sd(RV_A0 + i, RV_S0, 16 + (i32)i * 8);
- }
- }
- /* int saves */
- for (u32 i = 0; i < n_int_saves; ++i) {
- u32 r = 18u + i;
- i32 off = int_save_base - 8 * (i32)i;
- if (wi >= RV_PROLOGUE_WORDS) goto overflow;
- words[wi++] = rv_sd(r, RV_S0, off);
- }
- /* fp saves */
- for (u32 i = 0; i < n_fp_saves; ++i) {
- u32 r = 18u + i;
- i32 off = fp_save_base - 8 * (i32)i;
- if (wi >= RV_PROLOGUE_WORDS) goto overflow;
- words[wi++] = rv_fsd(r, RV_S0, off);
- }
- if (0) {
- overflow:
- compiler_panic(t->c, a->loc,
- "rv64: prologue placeholder too small (used %u of %u)", wi,
- RV_PROLOGUE_WORDS);
- }
-
- for (u32 i = 0; i < RV_PROLOGUE_WORDS; ++i) {
- patch32(obj, sec, pos + i * 4u, words[i]);
- }
-
- /* Patch alloca placeholders with max_outgoing. */
- if (max_out > 2047u) {
- compiler_panic(t->c, a->loc,
- "rv64: max_outgoing %u out of imm12 for alloca patch",
- max_out);
- }
- for (u32 i = 0; i < a->nadd_patches; ++i) {
- u32 dr = a->add_patches[i].dst_reg;
- u32 word = rv_addi(dr, RV_SP, (i32)max_out);
- patch32(obj, sec, a->add_patches[i].pos, word);
- }
-
- /* Define the function symbol. */
- u32 end = mc->pos(mc);
- obj_symbol_define(obj, a->fd->sym, sec, (u64)a->func_start,
- (u64)(end - a->func_start));
-
- mc->cfi_endproc(mc);
- a->fd = NULL;
-}
-
-/* ---- regs / frame ---- */
-
-static Reg rv_alloc_reg(CGTarget* t, RegClass cls, const Type* ty) {
- RImpl* a = impl_of(t);
- (void)ty;
- if (cls == RC_INT) return regpool_alloc(&a->int_pool);
- if (cls == RC_FP) return regpool_alloc(&a->fp_pool);
- compiler_panic(t->c, a->loc, "rv64 alloc_reg: class %d unimpl", (int)cls);
-}
-
-static void rv_free_reg(CGTarget* t, Reg r, RegClass cls) {
- RImpl* a = impl_of(t);
- RegPool* p;
- switch (cls) {
- case RC_INT: p = &a->int_pool; break;
- case RC_FP: p = &a->fp_pool; break;
- default:
- compiler_panic(t->c, a->loc, "rv64 free_reg: class %d unimpl", (int)cls);
- }
- int rc = regpool_free(p, r);
- if (rc == 1) return;
- if (rc == -1) {
- compiler_panic(t->c, a->loc, "rv64 free_reg: reg %u already free in %s pool",
- (unsigned)r, cls == RC_FP ? "fp" : "int");
- }
- compiler_panic(t->c, a->loc, "rv64 free_reg: reg %u not in %s pool",
- (unsigned)r, cls == RC_FP ? "fp" : "int");
-}
-
-static FrameSlot rv_frame_slot(CGTarget* t, const FrameSlotDesc* d) {
- RImpl* a = impl_of(t);
- if (a->nslots == a->slots_cap) {
- u32 ncap = a->slots_cap ? a->slots_cap * 2 : 8;
- RvSlot* nbuf = arena_array(t->c->tu, RvSlot, ncap);
- if (a->slots) memcpy(nbuf, a->slots, sizeof(RvSlot) * a->nslots);
- a->slots = nbuf;
- a->slots_cap = ncap;
- }
- u32 size = d->size ? d->size : 8;
- u32 align = d->align ? d->align : 1;
- u32 next = a->cum_off + size;
- u32 mask = align - 1;
- next = (next + mask) & ~mask;
-
- RvSlot* s = &a->slots[a->nslots];
- s->off = next;
- s->size = size;
- s->align = align;
- s->kind = d->kind;
-
- a->cum_off = next;
- a->nslots++;
- return (FrameSlot)(a->nslots);
-}
-
-static RvSlot* slot_get(RImpl* a, FrameSlot fs) {
- if (fs == FRAME_SLOT_NONE || fs > a->nslots) return NULL;
- return &a->slots[fs - 1];
-}
-
-/* For a memory access of `nbytes`, pick the right store opcode. */
-static u32 enc_int_store(u32 nbytes, u32 src, u32 base, i32 off) {
- switch (nbytes) {
- case 1: return rv_sb(src, base, off);
- case 2: return rv_sh(src, base, off);
- case 4: return rv_sw(src, base, off);
- default: return rv_sd(src, base, off);
- }
-}
-static u32 enc_int_load(u32 nbytes, int sign_ext, u32 rd, u32 base, i32 off) {
- switch (nbytes) {
- case 1: return sign_ext ? rv_lb(rd, base, off) : rv_lbu(rd, base, off);
- case 2: return sign_ext ? rv_lh(rd, base, off) : rv_lhu(rd, base, off);
- case 4: return sign_ext ? rv_lw(rd, base, off) : rv_lwu(rd, base, off);
- default: return rv_ld(rd, base, off);
- }
-}
-
-/* ---- param ---- */
-
-static void rv_param(CGTarget* t, const CGParamDesc* p) {
- RImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
- RvSlot* s = slot_get(a, p->slot);
- if (!s) compiler_panic(t->c, a->loc, "rv64 param: bad slot");
- const ABIArgInfo* ai = p->abi;
- /* Caller's stack args start above the saved-s0/ra pair, plus the
- * 64-byte variadic save area when this function is variadic. */
- i32 caller_stack_base = 16 + (a->is_variadic ? 64 : 0);
-
- if (ai->kind == ABI_ARG_IGNORE) return;
- if (ai->kind == ABI_ARG_INDIRECT) {
- /* Pointer-to-copy passed in a-register. Copy bytes from there into
- * the home slot. Source pointer is in a0..a7. */
- u32 ptr_reg;
- if (a->next_param_int < 8) {
- ptr_reg = RV_A0 + a->next_param_int;
- a->next_param_int++;
- } else {
- u32 caller_off = a->next_param_stack;
- a->next_param_stack += 8;
- /* Incoming stack args live in the caller's outgoing-arg area,
- * which is `frame_size - fp_pair_off` (= 16 + the saved-s0/ra
- * pair) above s0 — same logic as aa64's `16 + caller_off`. */
- emit32(mc, rv_ld(RV_T1, RV_S0, caller_stack_base + (i32)caller_off));
- ptr_reg = RV_T1;
- }
- u32 nbytes = s->size;
- u32 i = 0;
- while (i + 8 <= nbytes) {
- emit32(mc, rv_ld(RV_T2, ptr_reg, (i32)i));
- emit32(mc, rv_sd(RV_T2, RV_S0, -(i32)s->off + (i32)i));
- i += 8;
- }
- while (i + 4 <= nbytes) {
- emit32(mc, rv_lwu(RV_T2, ptr_reg, (i32)i));
- emit32(mc, rv_sw(RV_T2, RV_S0, -(i32)s->off + (i32)i));
- i += 4;
- }
- while (i + 2 <= nbytes) {
- emit32(mc, rv_lhu(RV_T2, ptr_reg, (i32)i));
- emit32(mc, rv_sh(RV_T2, RV_S0, -(i32)s->off + (i32)i));
- i += 2;
- }
- while (i < nbytes) {
- emit32(mc, rv_lbu(RV_T2, ptr_reg, (i32)i));
- emit32(mc, rv_sb(RV_T2, RV_S0, -(i32)s->off + (i32)i));
- i += 1;
- }
- return;
- }
- /* DIRECT */
- for (u16 i = 0; i < ai->nparts; ++i) {
- const ABIArgPart* pt = &ai->parts[i];
- u32 part_off = pt->src_offset;
- u32 sz = pt->size;
-
- if (pt->cls == ABI_CLASS_INT) {
- if (a->next_param_int < 8) {
- u32 reg = RV_A0 + a->next_param_int;
- a->next_param_int++;
- emit32(mc, enc_int_store(sz, reg, RV_S0,
- -(i32)s->off + (i32)part_off));
- } else {
- u32 caller_off = a->next_param_stack;
- a->next_param_stack += 8;
- emit32(mc, enc_int_load(sz, 0, RV_T2, RV_S0,
- caller_stack_base + (i32)caller_off));
- emit32(mc, enc_int_store(sz, RV_T2, RV_S0,
- -(i32)s->off + (i32)part_off));
- }
- } else if (pt->cls == ABI_CLASS_FP) {
- if (a->next_param_fp < 8) {
- u32 reg = a->next_param_fp; /* fa0..fa7 → freg 10..17 */
- u32 freg = 10u + reg;
- a->next_param_fp++;
- if (sz == 8) {
- emit32(mc, rv_fsd(freg, RV_S0, -(i32)s->off + (i32)part_off));
- } else {
- emit32(mc, rv_fsw(freg, RV_S0, -(i32)s->off + (i32)part_off));
- }
- } else {
- u32 caller_off = a->next_param_stack;
- a->next_param_stack += 8;
- if (sz == 8) {
- emit32(mc, rv_fld(0, RV_S0, caller_stack_base + (i32)caller_off));
- emit32(mc, rv_fsd(0, RV_S0, -(i32)s->off + (i32)part_off));
- } else {
- emit32(mc, rv_flw(0, RV_S0, caller_stack_base + (i32)caller_off));
- emit32(mc, rv_fsw(0, RV_S0, -(i32)s->off + (i32)part_off));
- }
- }
- } else {
- compiler_panic(t->c, a->loc, "rv64 param: ABI class %d unimpl",
- (int)pt->cls);
- }
- }
-}
-
-static const Reg* rv_clobbers(CGTarget* t, RegClass c, u32* n) {
- (void)c;
- (void)n;
- rv_panic(t, "clobbers");
-}
-
-static void rv_spill_reg(CGTarget* t, Operand src, FrameSlot slot,
- MemAccess ma) {
- RImpl* a = impl_of(t);
- if (src.kind != OPK_REG) {
- compiler_panic(t->c, a->loc, "rv64 spill_reg: src is not OPK_REG");
- }
- Operand addr;
- memset(&addr, 0, sizeof addr);
- addr.kind = OPK_LOCAL;
- addr.cls = RC_INT;
- addr.type = ma.type;
- addr.v.frame_slot = slot;
- rv_store(t, addr, src, ma);
- rv_free_reg(t, src.v.reg, src.cls);
-}
-
-static void rv_reload_reg(CGTarget* t, Operand dst, FrameSlot slot,
- MemAccess ma) {
- RImpl* a = impl_of(t);
- if (dst.kind != OPK_REG) {
- compiler_panic(t->c, a->loc, "rv64 reload_reg: dst is not OPK_REG");
- }
- Operand addr;
- memset(&addr, 0, sizeof addr);
- addr.kind = OPK_LOCAL;
- addr.cls = RC_INT;
- addr.type = ma.type;
- addr.v.frame_slot = slot;
- rv_load(t, dst, addr, ma);
-}
-
-/* ---- labels / control flow ---- */
-
-static Label rv_label_new(CGTarget* t) {
- return (Label)t->mc->label_new(t->mc);
-}
-static void rv_label_place(CGTarget* t, Label l) {
- t->mc->label_place(t->mc, (MCLabel)l);
-}
-static void rv_jump(CGTarget* t, Label l) {
- MCEmitter* mc = t->mc;
- emit32(mc, rv_jal(RV_ZERO, 0));
- mc->emit_label_ref(mc, (MCLabel)l, R_RV_JAL, 4, 0);
-}
-
-/* Force an integer Operand into a register; materializes IMM via scratch. */
-static u32 force_reg_int(CGTarget* t, Operand op, u32 scratch) {
- if (op.kind == OPK_REG) return reg_num(op);
- if (op.kind == OPK_IMM) {
- u32 sf = type_is_64(op.type) ? 1u : 0u;
- emit_load_imm(t->mc, sf, scratch, op.v.imm);
- return scratch;
- }
- compiler_panic(t->c, impl_of(t)->loc,
- "rv64: operand kind %d unsupported here", (int)op.kind);
-}
-
-/* Emit a conditional branch (a OP b) → label. Uses BEQ/BNE/BLT/BGE etc. */
-static void rv_cmp_branch(CGTarget* t, CmpOp op, Operand a_op, Operand b_op,
- Label l) {
- MCEmitter* mc = t->mc;
- RImpl* a = impl_of(t);
- /* For FP compares, fall through to materialize the result and CBNZ. */
- if (op == CMP_LT_F || op == CMP_LE_F || op == CMP_GT_F || op == CMP_GE_F) {
- compiler_panic(t->c, a->loc, "rv64 cmp_branch: FP cmp NYI");
- }
- u32 ra = force_reg_int(t, a_op, RV_T0);
- u32 rb = force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0);
- u32 word = 0;
- switch (op) {
- case CMP_EQ: word = rv_beq(ra, rb, 0); break;
- case CMP_NE: word = rv_bne(ra, rb, 0); break;
- case CMP_LT_S: word = rv_blt(ra, rb, 0); break;
- case CMP_GE_S: word = rv_bge(ra, rb, 0); break;
- case CMP_LT_U: word = rv_bltu(ra, rb, 0); break;
- case CMP_GE_U: word = rv_bgeu(ra, rb, 0); break;
- /* >= can become < with operands swapped: a > b ↔ b < a;
- * a <= b ↔ b >= a. */
- case CMP_GT_S: word = rv_blt(rb, ra, 0); break;
- case CMP_LE_S: word = rv_bge(rb, ra, 0); break;
- case CMP_GT_U: word = rv_bltu(rb, ra, 0); break;
- case CMP_LE_U: word = rv_bgeu(rb, ra, 0); break;
- default:
- compiler_panic(t->c, a->loc, "rv64 cmp_branch: op %d unimpl", (int)op);
- }
- emit32(mc, word);
- mc->emit_label_ref(mc, (MCLabel)l, R_RV_BRANCH, 4, 0);
-}
-
-/* Materialize 0/1 into dst from a comparison. */
-static void rv_cmp(CGTarget* t, CmpOp op, Operand dst, Operand a_op,
- Operand b_op) {
- MCEmitter* mc = t->mc;
- RImpl* a = impl_of(t);
- u32 rd = reg_num(dst);
-
- if (op == CMP_LT_F || op == CMP_LE_F || op == CMP_GT_F || op == CMP_GE_F) {
- /* FP compare in fa,fb → rd. Use FLT/FLE/FEQ depending on op. */
- int is_d = type_is_fp_double(a_op.type);
- u32 fa = reg_num(a_op);
- u32 fb = reg_num(b_op);
- switch (op) {
- case CMP_LT_F: emit32(mc, is_d ? rv_flt_d(rd, fa, fb) : rv_flt_s(rd, fa, fb)); return;
- case CMP_LE_F: emit32(mc, is_d ? rv_fle_d(rd, fa, fb) : rv_fle_s(rd, fa, fb)); return;
- case CMP_GT_F: emit32(mc, is_d ? rv_flt_d(rd, fb, fa) : rv_flt_s(rd, fb, fa)); return;
- case CMP_GE_F: emit32(mc, is_d ? rv_fle_d(rd, fb, fa) : rv_fle_s(rd, fb, fa)); return;
- default: break;
- }
- }
- u32 ra = force_reg_int(t, a_op, RV_T0);
- u32 rb = force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0);
-
- switch (op) {
- case CMP_EQ:
- emit32(mc, rv_sub(rd, ra, rb));
- emit32(mc, rv_sltiu(rd, rd, 1));
- return;
- case CMP_NE:
- emit32(mc, rv_sub(rd, ra, rb));
- emit32(mc, rv_sltu(rd, RV_ZERO, rd));
- return;
- case CMP_LT_S: emit32(mc, rv_slt(rd, ra, rb)); return;
- case CMP_LT_U: emit32(mc, rv_sltu(rd, ra, rb)); return;
- case CMP_GT_S: emit32(mc, rv_slt(rd, rb, ra)); return;
- case CMP_GT_U: emit32(mc, rv_sltu(rd, rb, ra)); return;
- case CMP_GE_S:
- emit32(mc, rv_slt(rd, ra, rb));
- emit32(mc, rv_xori(rd, rd, 1));
- return;
- case CMP_GE_U:
- emit32(mc, rv_sltu(rd, ra, rb));
- emit32(mc, rv_xori(rd, rd, 1));
- return;
- case CMP_LE_S:
- emit32(mc, rv_slt(rd, rb, ra));
- emit32(mc, rv_xori(rd, rd, 1));
- return;
- case CMP_LE_U:
- emit32(mc, rv_sltu(rd, rb, ra));
- emit32(mc, rv_xori(rd, rd, 1));
- return;
- default:
- compiler_panic(t->c, a->loc, "rv64 cmp: op %d unimpl", (int)op);
- }
-}
-
-/* ---- structured scopes (SCOPE_IF + SCOPE_LOOP/BLOCK bookkeep) ---- */
-
-static CGScope rv_scope_begin(CGTarget* t, const CGScopeDesc* d) {
- RImpl* a = impl_of(t);
- if (a->nscopes == a->scopes_cap) {
- u32 ncap = a->scopes_cap ? a->scopes_cap * 2u : 4u;
- RvScope* nb = arena_array(t->c->tu, RvScope, ncap);
- if (a->scopes) memcpy(nb, a->scopes, sizeof(RvScope) * a->nscopes);
- a->scopes = nb;
- a->scopes_cap = ncap;
- }
- RvScope* sc = &a->scopes[a->nscopes];
- sc->kind = (u8)d->kind;
- sc->has_else = 0;
- sc->else_label = 0;
- sc->end_label = 0;
- sc->break_label = d->break_label;
- sc->continue_label = d->continue_label;
-
- if (d->kind == SCOPE_IF) {
- sc->else_label = t->mc->label_new(t->mc);
- sc->end_label = t->mc->label_new(t->mc);
- u32 rn = force_reg_int(t, d->cond, RV_T0);
- /* beq rn, x0, else_label */
- emit32(t->mc, rv_beq(rn, RV_ZERO, 0));
- t->mc->emit_label_ref(t->mc, sc->else_label, R_RV_BRANCH, 4, 0);
- } else if (d->kind == SCOPE_LOOP || d->kind == SCOPE_BLOCK) {
- /* bookkeep only */
- } else {
- compiler_panic(t->c, a->loc,
- "rv64 scope_begin: kind %d not yet implemented",
- (int)d->kind);
- }
- a->nscopes++;
- return (CGScope)a->nscopes;
-}
-
-static void rv_scope_else(CGTarget* t, CGScope s) {
- RImpl* a = impl_of(t);
- if (s == CG_SCOPE_NONE || s > a->nscopes) {
- compiler_panic(t->c, a->loc, "rv64 scope_else: bad scope");
- }
- RvScope* sc = &a->scopes[s - 1];
- /* jump end ; place else */
- emit32(t->mc, rv_jal(RV_ZERO, 0));
- t->mc->emit_label_ref(t->mc, sc->end_label, R_RV_JAL, 4, 0);
- t->mc->label_place(t->mc, sc->else_label);
- sc->has_else = 1;
-}
-
-static void rv_scope_end(CGTarget* t, CGScope s) {
- RImpl* a = impl_of(t);
- if (s == CG_SCOPE_NONE || s > a->nscopes) {
- compiler_panic(t->c, a->loc, "rv64 scope_end: bad scope");
- }
- RvScope* sc = &a->scopes[s - 1];
- if (sc->kind == SCOPE_IF) {
- if (!sc->has_else) t->mc->label_place(t->mc, sc->else_label);
- t->mc->label_place(t->mc, sc->end_label);
- }
-}
-
-static void rv_break_to(CGTarget* t, CGScope s) {
- RImpl* a = impl_of(t);
- if (s == CG_SCOPE_NONE || s > a->nscopes) {
- compiler_panic(t->c, a->loc, "rv64 break_to: bad scope");
- }
- rv_jump(t, a->scopes[s - 1].break_label);
-}
-
-static void rv_continue_to(CGTarget* t, CGScope s) {
- RImpl* a = impl_of(t);
- if (s == CG_SCOPE_NONE || s > a->nscopes) {
- compiler_panic(t->c, a->loc, "rv64 continue_to: bad scope");
- }
- rv_jump(t, a->scopes[s - 1].continue_label);
-}
-
-/* ---- data movement ---- */
-
-static void rv_load_imm(CGTarget* t, Operand dst, i64 imm) {
- u32 sf = type_is_64(dst.type) ? 1u : 0u;
- emit_load_imm(t->mc, sf, reg_num(dst), imm);
-}
-
-static void rv_load_const(CGTarget* t, Operand dst, ConstBytes cb) {
- RImpl* a = impl_of(t);
- if (dst.cls != RC_FP) {
- compiler_panic(t->c, a->loc, "rv64 load_const: only FP supported in v1");
- }
- Sym ro_name = pool_intern_cstr(t->c->global, ".rodata");
- ObjSecId ro = obj_section(t->obj, ro_name, SEC_RODATA, SF_ALLOC, 1u);
-
- u32 cur_section = t->mc->section_id;
- t->mc->set_section(t->mc, ro);
- u32 ro_off = obj_align_to(t->obj, ro, cb.align ? cb.align : 4);
- t->mc->emit_bytes(t->mc, cb.bytes, cb.size);
-
- char namebuf[64];
- static u32 lit_seq = 0;
- int len = 0;
- {
- const char* prefix = ".LCFP";
- for (; prefix[len]; ++len) namebuf[len] = prefix[len];
- u32 v = lit_seq++;
- char tmp[16];
- int tn = 0;
- if (v == 0) tmp[tn++] = '0';
- else {
- while (v) { tmp[tn++] = '0' + (char)(v % 10); v /= 10; }
- }
- for (int i = tn - 1; i >= 0; --i) namebuf[len++] = tmp[i];
- namebuf[len] = 0;
- }
- Sym sname = pool_intern_cstr(t->c->global, namebuf);
- ObjSymId sym = obj_symbol(t->obj, sname, SB_LOCAL, SK_OBJ, ro, (u64)ro_off,
- (u64)cb.size);
- t->mc->set_section(t->mc, cur_section);
-
- /* auipc t0, %pcrel_hi(sym) ; flw/fld dst, %pcrel_lo(...)(t0)
- * The LO12_I reloc references the AUIPC's site address (a label/sym
- * placed at the AUIPC). For simplicity we make a local symbol at the
- * AUIPC and bind LO12_I to it. */
- u32 sec = t->mc->section_id;
- u32 auipc_pos = t->mc->pos(t->mc);
- emit32(t->mc, rv_auipc(RV_T0, 0));
- t->mc->emit_reloc_at(t->mc, sec, auipc_pos, R_RV_PCREL_HI20, sym, 0, 0, 0);
- /* Create a local symbol at the AUIPC site to anchor PCREL_LO12. */
- char anchor_buf[64];
- int al = 0;
- {
- const char* p2 = ".LpcrelHi";
- for (; p2[al]; ++al) anchor_buf[al] = p2[al];
- static u32 seq2 = 0;
- u32 v = seq2++;
- char tmp[16]; int tn = 0;
- if (v == 0) tmp[tn++] = '0';
- else { while (v) { tmp[tn++] = '0' + (char)(v % 10); v /= 10; } }
- for (int i = tn - 1; i >= 0; --i) anchor_buf[al++] = tmp[i];
- anchor_buf[al] = 0;
- }
- Sym aname = pool_intern_cstr(t->c->global, anchor_buf);
- ObjSymId anchor = obj_symbol(t->obj, aname, SB_LOCAL, SK_OBJ, sec,
- (u64)auipc_pos, 0);
- u32 lpos = t->mc->pos(t->mc);
- if (cb.size == 8) {
- emit32(t->mc, rv_fld(reg_num(dst), RV_T0, 0));
- } else {
- emit32(t->mc, rv_flw(reg_num(dst), RV_T0, 0));
- }
- t->mc->emit_reloc_at(t->mc, sec, lpos, R_RV_PCREL_LO12_I, anchor, 0, 0, 0);
-}
-
-static void rv_copy(CGTarget* t, Operand dst, Operand src) {
- if (dst.cls == RC_FP || src.cls == RC_FP) {
- u32 fmt = type_is_fp_double(dst.type) ? RV_FMT_D : RV_FMT_S;
- /* fmv.fmt rd, rs = fsgnj.fmt rd, rs, rs */
- u32 r = reg_num(src);
- emit32(t->mc, rv_fsgnj(fmt, reg_num(dst), r, r));
- return;
- }
- /* mv rd, rs = addi rd, rs, 0 (works for both 32 and 64-bit copies) */
- emit32(t->mc, rv_addi(reg_num(dst), reg_num(src), 0));
-}
-
-/* ---- address resolution ---- */
-
-/* Materialize the address of `addr` (LOCAL or INDIRECT or GLOBAL) into
- * `tmp_reg`. Returns the register holding the base and writes the
- * effective signed offset to *out_off (0 when we synthesized into tmp).
- * For OPK_GLOBAL, emits AUIPC + an LO12 reloc on the caller's load/store. */
-static u32 addr_base(CGTarget* t, Operand addr, i32* out_off, u32 tmp_reg) {
- RImpl* a = impl_of(t);
- if (addr.kind == OPK_LOCAL) {
- RvSlot* s = slot_get(a, addr.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "rv64 addr_base: bad slot");
- i32 off = -(i32)s->off;
- if (off >= -2048 && off <= 2047) {
- *out_off = off;
- return RV_S0;
- }
- emit_load_imm(t->mc, 1, tmp_reg, (i64)off);
- emit32(t->mc, rv_add(tmp_reg, RV_S0, tmp_reg));
- *out_off = 0;
- return tmp_reg;
- }
- if (addr.kind == OPK_INDIRECT) {
- i32 off = addr.v.ind.ofs;
- u32 base = addr.v.ind.base & 0x1f;
- if (off >= -2048 && off <= 2047) {
- *out_off = off;
- return base;
- }
- emit_load_imm(t->mc, 1, tmp_reg, (i64)off);
- emit32(t->mc, rv_add(tmp_reg, base, tmp_reg));
- *out_off = 0;
- return tmp_reg;
- }
- compiler_panic(t->c, a->loc, "rv64 addr_base: kind %d unsupported",
- (int)addr.kind);
-}
-
-static int rv64_use_got_for_sym(CGTarget* t, ObjSymId sym) {
- return obj_symbol_extern_via_got(t->c, t->obj, sym);
-}
-
-/* Anchor symbol management for PCREL_LO12_*. Each AUIPC site gets a
- * fresh local sym; the paired LO12 reloc references the anchor. */
-static ObjSymId emit_pcrel_anchor(CGTarget* t, u32 sec, u32 auipc_pos) {
- char buf[64];
- int len = 0;
- const char* p = ".LpcrelHi";
- for (; p[len]; ++len) buf[len] = p[len];
- static u32 seq = 0;
- u32 v = seq++;
- char tmp[16]; int tn = 0;
- if (v == 0) tmp[tn++] = '0';
- else { while (v) { tmp[tn++] = '0' + (char)(v % 10); v /= 10; } }
- for (int i = tn - 1; i >= 0; --i) buf[len++] = tmp[i];
- buf[len] = 0;
- Sym n = pool_intern_cstr(t->c->global, buf);
- return obj_symbol(t->obj, n, SB_LOCAL, SK_OBJ, sec, (u64)auipc_pos, 0);
-}
-
-/* Emit `auipc dst, %got_pcrel_hi(sym) ; ld dst, %pcrel_lo(.)(dst)`,
- * leaving the runtime address of `sym` (the GOT slot's contents) in
- * `dst_reg`. Addends are omitted from the GOT relocs — most loaders
- * disallow nonzero addends on GOT-load fixups — so callers apply any
- * displacement with a follow-on ADDI/ADD against the loaded base. */
-static void emit_got_load_addr(CGTarget* t, u32 dst_reg, ObjSymId sym) {
- MCEmitter* mc = t->mc;
- u32 sec = mc->section_id;
- u32 ap = mc->pos(mc);
- emit32(mc, rv_auipc(dst_reg, 0));
- mc->emit_reloc_at(mc, sec, ap, R_RV_GOT_HI20, sym, 0, 0, 0);
- ObjSymId anchor = emit_pcrel_anchor(t, sec, ap);
- u32 lp = mc->pos(mc);
- emit32(mc, rv_ld(dst_reg, dst_reg, 0));
- mc->emit_reloc_at(mc, sec, lp, R_RV_PCREL_LO12_I, anchor, 0, 0, 0);
-}
-
-/* Add a signed displacement `off` to `base`, writing into `rd`. Uses
- * ADDI for ±2047, otherwise materializes the offset via emit_load_imm
- * + ADD. Mirrors emit_addr_adjust in aarch64.c. */
-static void emit_addr_adjust(MCEmitter* mc, u32 rd, u32 base, i32 off) {
- if (off == 0) {
- if (rd != base) emit32(mc, rv_addi(rd, base, 0));
- return;
- }
- if (off >= -2048 && off <= 2047) {
- emit32(mc, rv_addi(rd, base, off));
- return;
- }
- emit_load_imm(mc, 1, RV_T1, (i64)off);
- emit32(mc, rv_add(rd, base, RV_T1));
-}
-
-static void rv_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) {
- u32 sz = ma.size ? ma.size : type_byte_size(addr.type);
- MCEmitter* mc = t->mc;
-
- if (addr.kind == OPK_GLOBAL) {
- u32 sec = mc->section_id;
- ObjSymId sym = addr.v.global.sym;
- i64 add = addr.v.global.addend;
- /* Extern-via-GOT path: load &sym from GOT, then load the value at
- * +addend (addend baked into the data load's imm12; relies on the
- * common case of `add` fitting ±2047 — larger addends would need a
- * follow-on ADD). */
- if (rv64_use_got_for_sym(t, sym)) {
- emit_got_load_addr(t, RV_T0, sym);
- i32 ao = (i32)add;
- if (dst.cls == RC_FP) {
- if (sz == 8) emit32(mc, rv_fld(reg_num(dst), RV_T0, ao));
- else emit32(mc, rv_flw(reg_num(dst), RV_T0, ao));
- } else {
- int sx = type_is_signed(addr.type);
- emit32(mc, enc_int_load(sz, sx, reg_num(dst), RV_T0, ao));
- }
- return;
- }
- u32 ap = mc->pos(mc);
- emit32(mc, rv_auipc(RV_T0, 0));
- mc->emit_reloc_at(mc, sec, ap, R_RV_PCREL_HI20, sym, add, 0, 0);
- ObjSymId anchor = emit_pcrel_anchor(t, sec, ap);
- u32 lp = mc->pos(mc);
- if (dst.cls == RC_FP) {
- if (sz == 8) emit32(mc, rv_fld(reg_num(dst), RV_T0, 0));
- else emit32(mc, rv_flw(reg_num(dst), RV_T0, 0));
- } else {
- int sx = type_is_signed(addr.type);
- emit32(mc, enc_int_load(sz, sx, reg_num(dst), RV_T0, 0));
- }
- mc->emit_reloc_at(mc, sec, lp, R_RV_PCREL_LO12_I, anchor, 0, 0, 0);
- return;
- }
-
- i32 off;
- u32 base = addr_base(t, addr, &off, RV_T0);
- if (dst.cls == RC_FP) {
- if (sz == 8) emit32(mc, rv_fld(reg_num(dst), base, off));
- else emit32(mc, rv_flw(reg_num(dst), base, off));
- } else {
- int sx = type_is_signed(addr.type);
- emit32(mc, enc_int_load(sz, sx, reg_num(dst), base, off));
- }
-}
-
-static void rv_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) {
- u32 sz = ma.size ? ma.size : type_byte_size(addr.type);
- MCEmitter* mc = t->mc;
-
- if (addr.kind == OPK_GLOBAL) {
- u32 sec = mc->section_id;
- ObjSymId sym = addr.v.global.sym;
- i64 add = addr.v.global.addend;
- u32 src_reg;
- int src_fp = 0;
- if (src.kind == OPK_IMM) {
- u32 sf = (sz == 8) ? 1u : 0u;
- emit_load_imm(mc, sf, RV_T1, src.v.imm);
- src_reg = RV_T1;
- } else if (src.cls == RC_FP) {
- src_reg = reg_num(src);
- src_fp = 1;
- } else {
- src_reg = reg_num(src);
- }
- /* Extern-via-GOT path: load &sym from GOT into t0, then store with
- * addend baked into the imm12 (no reloc on the store). */
- if (rv64_use_got_for_sym(t, sym)) {
- emit_got_load_addr(t, RV_T0, sym);
- i32 ao = (i32)add;
- if (src_fp) {
- if (sz == 8) emit32(mc, rv_fsd(src_reg, RV_T0, ao));
- else emit32(mc, rv_fsw(src_reg, RV_T0, ao));
- } else {
- emit32(mc, enc_int_store(sz, src_reg, RV_T0, ao));
- }
- return;
- }
- u32 ap = mc->pos(mc);
- emit32(mc, rv_auipc(RV_T0, 0));
- mc->emit_reloc_at(mc, sec, ap, R_RV_PCREL_HI20, sym, add, 0, 0);
- ObjSymId anchor = emit_pcrel_anchor(t, sec, ap);
- u32 sp_pos = mc->pos(mc);
- if (src_fp) {
- if (sz == 8) emit32(mc, rv_fsd(src_reg, RV_T0, 0));
- else emit32(mc, rv_fsw(src_reg, RV_T0, 0));
- } else {
- emit32(mc, enc_int_store(sz, src_reg, RV_T0, 0));
- }
- mc->emit_reloc_at(mc, sec, sp_pos, R_RV_PCREL_LO12_S, anchor, 0, 0, 0);
- return;
- }
-
- i32 off;
- u32 base = addr_base(t, addr, &off,
- (src.kind == OPK_IMM) ? RV_T1 : RV_T0);
- if (src.kind == OPK_IMM) {
- u32 sf = (sz == 8) ? 1u : 0u;
- emit_load_imm(mc, sf, RV_T0, src.v.imm);
- emit32(mc, enc_int_store(sz, RV_T0, base, off));
- return;
- }
- if (src.cls == RC_FP) {
- if (sz == 8) emit32(mc, rv_fsd(reg_num(src), base, off));
- else emit32(mc, rv_fsw(reg_num(src), base, off));
- } else {
- emit32(mc, enc_int_store(sz, reg_num(src), base, off));
- }
-}
-
-static void rv_addr_of(CGTarget* t, Operand dst, Operand lv) {
- RImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
- u32 rd = reg_num(dst);
- if (lv.kind == OPK_LOCAL) {
- RvSlot* s = slot_get(a, lv.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "rv64 addr_of: bad slot");
- i32 off = -(i32)s->off;
- if (off >= -2048 && off <= 2047) {
- emit32(mc, rv_addi(rd, RV_S0, off));
- } else {
- emit_load_imm(mc, 1, rd, (i64)off);
- emit32(mc, rv_add(rd, RV_S0, rd));
- }
- return;
- }
- if (lv.kind == OPK_INDIRECT) {
- i32 ofs = lv.v.ind.ofs;
- u32 base = lv.v.ind.base & 0x1f;
- if (ofs >= -2048 && ofs <= 2047) {
- emit32(mc, rv_addi(rd, base, ofs));
- } else {
- emit_load_imm(mc, 1, rd, (i64)ofs);
- emit32(mc, rv_add(rd, base, rd));
- }
- return;
- }
- if (lv.kind == OPK_GLOBAL) {
- ObjSymId sym = lv.v.global.sym;
- i64 addend = lv.v.global.addend;
- /* Extern-via-GOT path: GOT load yields &sym directly; apply any
- * addend with a follow-on ADDI/ADD (GOT relocs disallow addends). */
- if (rv64_use_got_for_sym(t, sym)) {
- emit_got_load_addr(t, rd, sym);
- if (addend) emit_addr_adjust(mc, rd, rd, (i32)addend);
- return;
- }
- u32 sec = mc->section_id;
- u32 ap = mc->pos(mc);
- emit32(mc, rv_auipc(rd, 0));
- mc->emit_reloc_at(mc, sec, ap, R_RV_PCREL_HI20, sym, addend, 0, 0);
- ObjSymId anchor = emit_pcrel_anchor(t, sec, ap);
- u32 ip = mc->pos(mc);
- emit32(mc, rv_addi(rd, rd, 0));
- mc->emit_reloc_at(mc, sec, ip, R_RV_PCREL_LO12_I, anchor, 0, 0, 0);
- return;
- }
- rv_panic(t, "addr_of");
-}
-
-static void rv_tls_addr_of(CGTarget* t, Operand dst, ObjSymId sym, i64 addend) {
- /* TLS Local-Exec: lui tmp, %tprel_hi(sym); add tmp, tp, tmp; addi dst,
- * tmp, %tprel_lo(sym). Uses R_RV_TPREL_HI20 / R_RV_TPREL_LO12_I. */
- MCEmitter* mc = t->mc;
- u32 sec = mc->section_id;
- u32 rd = reg_num(dst);
- u32 hp = mc->pos(mc);
- emit32(mc, rv_lui(RV_T0, 0));
- mc->emit_reloc_at(mc, sec, hp, R_RV_TPREL_HI20, sym, addend, 0, 0);
- emit32(mc, rv_add(RV_T0, RV_TP, RV_T0));
- u32 lp = mc->pos(mc);
- emit32(mc, rv_addi(rd, RV_T0, 0));
- mc->emit_reloc_at(mc, sec, lp, R_RV_TPREL_LO12_I, sym, addend, 0, 0);
-}
-
-/* ---- aggregate ops ---- */
-
-static u32 agg_addr_reg(CGTarget* t, Operand op, u32 scratch) {
- RImpl* a = impl_of(t);
- if (op.kind == OPK_REG) return reg_num(op);
- if (op.kind == OPK_LOCAL) {
- RvSlot* s = slot_get(a, op.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "rv64 agg: bad slot");
- i32 off = -(i32)s->off;
- if (off >= -2048 && off <= 2047) {
- emit32(t->mc, rv_addi(scratch, RV_S0, off));
- } else {
- emit_load_imm(t->mc, 1, scratch, (i64)off);
- emit32(t->mc, rv_add(scratch, RV_S0, scratch));
- }
- return scratch;
- }
- compiler_panic(t->c, a->loc, "rv64 agg: address kind %d unsupported",
- (int)op.kind);
-}
-
-static void rv_copy_bytes(CGTarget* t, Operand dst_addr, Operand src_addr,
- AggregateAccess agg) {
- MCEmitter* mc = t->mc;
- u32 dr = agg_addr_reg(t, dst_addr, RV_T0);
- u32 sr = agg_addr_reg(t, src_addr, (dr == RV_T1) ? RV_T2 : RV_T1);
- u32 n = agg.size;
- u32 i = 0;
- while (i + 8 <= n) {
- emit32(mc, rv_ld(RV_T3, sr, (i32)i));
- emit32(mc, rv_sd(RV_T3, dr, (i32)i));
- i += 8;
- }
- while (i + 4 <= n) {
- emit32(mc, rv_lwu(RV_T3, sr, (i32)i));
- emit32(mc, rv_sw(RV_T3, dr, (i32)i));
- i += 4;
- }
- while (i + 2 <= n) {
- emit32(mc, rv_lhu(RV_T3, sr, (i32)i));
- emit32(mc, rv_sh(RV_T3, dr, (i32)i));
- i += 2;
- }
- while (i < n) {
- emit32(mc, rv_lbu(RV_T3, sr, (i32)i));
- emit32(mc, rv_sb(RV_T3, dr, (i32)i));
- i += 1;
- }
-}
-
-static void rv_set_bytes(CGTarget* t, Operand dst_addr, Operand byte_value,
- AggregateAccess agg) {
- MCEmitter* mc = t->mc;
- u32 dr = agg_addr_reg(t, dst_addr, RV_T0);
- u32 byte;
- if (byte_value.kind == OPK_IMM) {
- byte = (u32)(byte_value.v.imm & 0xffu);
- } else {
- compiler_panic(t->c, impl_of(t)->loc,
- "rv64 set_bytes: REG byte NYI");
- }
- u32 n = agg.size;
- u32 src;
- if (byte == 0) {
- src = RV_ZERO;
- } else {
- u64 b = byte;
- b |= b << 8; b |= b << 16; b |= b << 32;
- emit_load_imm(mc, 1, RV_T3, (i64)b);
- src = RV_T3;
- }
- u32 i = 0;
- while (i + 8 <= n) { emit32(mc, rv_sd(src, dr, (i32)i)); i += 8; }
- while (i + 4 <= n) { emit32(mc, rv_sw(src, dr, (i32)i)); i += 4; }
- while (i + 2 <= n) { emit32(mc, rv_sh(src, dr, (i32)i)); i += 2; }
- while (i < n) { emit32(mc, rv_sb(src, dr, (i32)i)); i += 1; }
-}
-
-static void rv_bitfield_load(CGTarget* t, Operand dst, Operand record_addr,
- BitFieldAccess bf) {
- MCEmitter* mc = t->mc;
- u32 base = agg_addr_reg(t, record_addr, RV_T0);
- u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u;
- u32 rd = reg_num(dst);
- /* Load full storage unit (zero-ext for shifts). */
- emit32(mc, enc_int_load(storage_bytes, 0, rd, base, (i32)bf.storage_offset));
- /* Shift left by (XLEN - (bit_offset + bit_width)) then arithmetic
- * right-shift by (XLEN - bit_width). Use 64-bit shifts. */
- u32 lsb = bf.bit_offset;
- u32 width = bf.bit_width ? bf.bit_width : 1u;
- u32 sh_left = 64u - (lsb + width);
- u32 sh_right = 64u - width;
- emit32(mc, rv_slli(rd, rd, sh_left));
- if (bf.signed_) emit32(mc, rv_srai(rd, rd, sh_right));
- else emit32(mc, rv_srli(rd, rd, sh_right));
-}
-
-static void rv_bitfield_store(CGTarget* t, Operand record_addr, Operand src,
- BitFieldAccess bf) {
- MCEmitter* mc = t->mc;
- u32 base = agg_addr_reg(t, record_addr, RV_T0);
- u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u;
- /* Load current value into t1 */
- emit32(mc, enc_int_load(storage_bytes, 0, RV_T1, base,
- (i32)bf.storage_offset));
- u32 src_reg;
- if (src.kind == OPK_IMM) {
- emit_load_imm(mc, 1, RV_T2, src.v.imm);
- src_reg = RV_T2;
- } else if (src.kind == OPK_REG) {
- src_reg = reg_num(src);
- } else {
- compiler_panic(t->c, impl_of(t)->loc,
- "rv64 bitfield_store: src kind %d NYI", (int)src.kind);
- }
- u32 lsb = bf.bit_offset;
- u32 width = bf.bit_width ? bf.bit_width : 1u;
- /* mask = ((1 << width) - 1) << lsb */
- u64 mask = ((u64)1 << width) - 1u;
- /* t3 = src & ((1<<width)-1), then shifted to lsb */
- emit_load_imm(mc, 1, RV_T3, (i64)mask);
- emit32(mc, rv_and(RV_T3, src_reg, RV_T3));
- if (lsb) emit32(mc, rv_slli(RV_T3, RV_T3, lsb));
- /* clear the field bits in t1: andi or and-not pattern */
- u64 mask_in = mask << lsb;
- emit_load_imm(mc, 1, RV_T2, (i64)~mask_in);
- emit32(mc, rv_and(RV_T1, RV_T1, RV_T2));
- emit32(mc, rv_or(RV_T1, RV_T1, RV_T3));
- emit32(mc, enc_int_store(storage_bytes, RV_T1, base,
- (i32)bf.storage_offset));
-}
-
-/* ---- arithmetic ---- */
-
-static void rv_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op,
- Operand b_op) {
- MCEmitter* mc = t->mc;
- if (op == BO_FADD || op == BO_FSUB || op == BO_FMUL || op == BO_FDIV) {
- u32 fmt = type_is_fp_double(dst.type) ? RV_FMT_D : RV_FMT_S;
- u32 rd = reg_num(dst);
- u32 fa = reg_num(a_op);
- u32 fb = reg_num(b_op);
- switch (op) {
- case BO_FADD: emit32(mc, rv_fadd(fmt, rd, fa, fb)); return;
- case BO_FSUB: emit32(mc, rv_fsub(fmt, rd, fa, fb)); return;
- case BO_FMUL: emit32(mc, rv_fmul(fmt, rd, fa, fb)); return;
- case BO_FDIV: emit32(mc, rv_fdiv(fmt, rd, fa, fb)); return;
- default: break;
- }
- }
- u32 sf = type_is_64(dst.type) ? 1u : 0u;
- u32 rd = reg_num(dst);
-
- /* Canonicalize IMM to the RHS for commutative ops so the imm-form
- * check below handles `3 + a` the same as `a + 3`. ISUB is not
- * commutative — IMM-on-LHS still materializes. */
- switch (op) {
- case BO_IADD:
- case BO_AND:
- case BO_OR:
- case BO_XOR: {
- if (a_op.kind == OPK_IMM && b_op.kind != OPK_IMM) {
- Operand t_op = a_op; a_op = b_op; b_op = t_op;
- }
- break;
- }
- default: break;
- }
-
- /* IMM-form fast paths. RV-I admits a 12-bit signed immediate for
- * ADDI/ANDI/ORI/XORI/SLTI/SLTIU (range [-2048, 2047]). ISUB has no
- * SUBI — we encode it as ADDI with the negated literal when -imm
- * fits the same range (i.e., imm ∈ [-2047, 2048]; INT_MIN is
- * intentionally excluded since -INT_MIN overflows). Shifts admit a
- * shamt: 6 bits (0..63) on the 64-bit forms, 5 bits (0..31) on the
- * W-variants. */
- if (b_op.kind == OPK_IMM && a_op.kind != OPK_IMM) {
- u32 ra = reg_num(a_op);
- i64 imm = b_op.v.imm;
- int fits12 = imm >= -2048 && imm <= 2047;
- switch (op) {
- case BO_IADD:
- if (fits12) {
- emit32(mc, sf ? rv_addi(rd, ra, (i32)imm) : rv_addiw(rd, ra, (i32)imm));
- return;
- }
- break;
- case BO_ISUB:
- if (imm >= -2047 && imm <= 2048) {
- emit32(mc, sf ? rv_addi(rd, ra, (i32)-imm) : rv_addiw(rd, ra, (i32)-imm));
- return;
- }
- break;
- case BO_AND:
- if (fits12) { emit32(mc, rv_andi(rd, ra, (i32)imm)); return; }
- break;
- case BO_OR:
- if (fits12) { emit32(mc, rv_ori(rd, ra, (i32)imm)); return; }
- break;
- case BO_XOR:
- if (fits12) { emit32(mc, rv_xori(rd, ra, (i32)imm)); return; }
- break;
- case BO_SHL: {
- u32 width = sf ? 64u : 32u;
- u32 sh = (u32)((u64)imm & (width - 1u));
- emit32(mc, sf ? rv_slli(rd, ra, sh) : rv_slliw(rd, ra, sh));
- return;
- }
- case BO_SHR_U: {
- u32 width = sf ? 64u : 32u;
- u32 sh = (u32)((u64)imm & (width - 1u));
- emit32(mc, sf ? rv_srli(rd, ra, sh) : rv_srliw(rd, ra, sh));
- return;
- }
- case BO_SHR_S: {
- u32 width = sf ? 64u : 32u;
- u32 sh = (u32)((u64)imm & (width - 1u));
- emit32(mc, sf ? rv_srai(rd, ra, sh) : rv_sraiw(rd, ra, sh));
- return;
- }
- default: break;
- }
- }
-
- u32 ra = force_reg_int(t, a_op, RV_T0);
- u32 rb = force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0);
-
- switch (op) {
- case BO_IADD: emit32(mc, sf ? rv_add(rd, ra, rb) : rv_addw(rd, ra, rb)); return;
- case BO_ISUB: emit32(mc, sf ? rv_sub(rd, ra, rb) : rv_subw(rd, ra, rb)); return;
- case BO_IMUL: emit32(mc, sf ? rv_mul(rd, ra, rb) : rv_mulw(rd, ra, rb)); return;
- case BO_AND: emit32(mc, rv_and(rd, ra, rb)); return;
- case BO_OR: emit32(mc, rv_or(rd, ra, rb)); return;
- case BO_XOR: emit32(mc, rv_xor(rd, ra, rb)); return;
- case BO_SHL: emit32(mc, sf ? rv_sll(rd, ra, rb) : rv_sllw(rd, ra, rb)); return;
- case BO_SHR_U: emit32(mc, sf ? rv_srl(rd, ra, rb) : rv_srlw(rd, ra, rb)); return;
- case BO_SHR_S: emit32(mc, sf ? rv_sra(rd, ra, rb) : rv_sraw(rd, ra, rb)); return;
- case BO_SDIV: emit32(mc, sf ? rv_div(rd, ra, rb) : rv_divw(rd, ra, rb)); return;
- case BO_UDIV: emit32(mc, sf ? rv_divu(rd, ra, rb) : rv_divuw(rd, ra, rb)); return;
- case BO_SREM: emit32(mc, sf ? rv_rem(rd, ra, rb) : rv_remw(rd, ra, rb)); return;
- case BO_UREM: emit32(mc, sf ? rv_remu(rd, ra, rb) : rv_remuw(rd, ra, rb)); return;
- default:
- compiler_panic(t->c, impl_of(t)->loc, "rv64 binop: op %d unimpl", (int)op);
- }
-}
-
-static void rv_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) {
- MCEmitter* mc = t->mc;
- u32 sf = type_is_64(dst.type) ? 1u : 0u;
- u32 rd = reg_num(dst);
- /* IMM operand is legal per the CGTarget contract (arch.h); materialize
- * into t0 when not already a register. cg folds literal unops upstream
- * via cg_fold_unop. */
- u32 rn = force_reg_int(t, a_op, RV_T0);
- switch (op) {
- case UO_NEG:
- emit32(mc, sf ? rv_sub(rd, RV_ZERO, rn) : rv_subw(rd, RV_ZERO, rn));
- return;
- case UO_BNOT:
- emit32(mc, rv_xori(rd, rn, -1));
- return;
- case UO_NOT:
- /* logical: 1 if rn==0 else 0 → sltiu rd, rn, 1 */
- emit32(mc, rv_sltiu(rd, rn, 1));
- return;
- default:
- compiler_panic(t->c, impl_of(t)->loc, "rv64 unop: op %d unimpl", (int)op);
- }
-}
-
-static void rv_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) {
- RImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
- u32 rd = reg_num(dst);
- u32 rn = reg_num(src);
-
- switch (k) {
- case CV_SEXT: {
- u32 src_bits = type_byte_size(src.type) * 8u;
- if (src_bits == 32u) {
- /* sext.w rd, rs = addiw rd, rs, 0 */
- emit32(mc, rv_addiw(rd, rn, 0));
- return;
- }
- /* slli + srai by (64 - src_bits) */
- u32 sh = 64u - src_bits;
- emit32(mc, rv_slli(rd, rn, sh));
- emit32(mc, rv_srai(rd, rd, sh));
- return;
- }
- case CV_ZEXT: {
- u32 src_bits = type_byte_size(src.type) * 8u;
- if (src_bits == 32u) {
- /* zext.w: slli rd, rs, 32; srli rd, rd, 32 */
- emit32(mc, rv_slli(rd, rn, 32));
- emit32(mc, rv_srli(rd, rd, 32));
- } else {
- u32 sh = 64u - src_bits;
- emit32(mc, rv_slli(rd, rn, sh));
- emit32(mc, rv_srli(rd, rd, sh));
- }
- return;
- }
- case CV_TRUNC:
- /* Truncate to W: addiw rd, rs, 0 puts low 32 in rd sign-extended.
- * For narrower widths the consumer (store) handles it. */
- emit32(mc, rv_addiw(rd, rn, 0));
- return;
- case CV_ITOF_S: {
- int sf_src = type_is_64(src.type);
- int dst_d = type_is_fp_double(dst.type);
- if (dst_d) {
- emit32(mc, sf_src ? rv_fcvt_d_l(rd, rn) : rv_fcvt_d_w(rd, rn));
- } else {
- emit32(mc, sf_src ? rv_fcvt_s_l(rd, rn) : rv_fcvt_s_w(rd, rn));
- }
- return;
- }
- case CV_ITOF_U: {
- int sf_src = type_is_64(src.type);
- int dst_d = type_is_fp_double(dst.type);
- if (dst_d) {
- emit32(mc, sf_src ? rv_fcvt_d_lu(rd, rn) : rv_fcvt_d_wu(rd, rn));
- } else {
- emit32(mc, sf_src ? rv_fcvt_s_lu(rd, rn) : rv_fcvt_s_wu(rd, rn));
- }
- return;
- }
- case CV_FTOI_S: {
- int sf_dst = type_is_64(dst.type);
- int src_d = type_is_fp_double(src.type);
- if (src_d) {
- emit32(mc, sf_dst ? rv_fcvt_l_d(rd, rn) : rv_fcvt_w_d(rd, rn));
- } else {
- emit32(mc, sf_dst ? rv_fcvt_l_s(rd, rn) : rv_fcvt_w_s(rd, rn));
- }
- return;
- }
- case CV_FTOI_U: {
- int sf_dst = type_is_64(dst.type);
- int src_d = type_is_fp_double(src.type);
- if (src_d) {
- emit32(mc, sf_dst ? rv_fcvt_lu_d(rd, rn) : rv_fcvt_wu_d(rd, rn));
- } else {
- emit32(mc, sf_dst ? rv_fcvt_lu_s(rd, rn) : rv_fcvt_wu_s(rd, rn));
- }
- return;
- }
- case CV_FEXT: emit32(mc, rv_fcvt_d_s(rd, rn)); return;
- case CV_FTRUNC: emit32(mc, rv_fcvt_s_d(rd, rn)); return;
- case CV_BITCAST: {
- if (src.cls == RC_INT && dst.cls == RC_FP) {
- u32 sz = type_byte_size(dst.type);
- emit32(mc, sz == 8 ? rv_fmv_d_x(rd, rn) : rv_fmv_w_x(rd, rn));
- } else if (src.cls == RC_FP && dst.cls == RC_INT) {
- u32 sz = type_byte_size(src.type);
- emit32(mc, sz == 8 ? rv_fmv_x_d(rd, rn) : rv_fmv_x_w(rd, rn));
- } else {
- compiler_panic(t->c, a->loc, "rv64 BITCAST: same-class NYI");
- }
- return;
- }
- default:
- compiler_panic(t->c, a->loc, "rv64 convert kind %d unimpl", (int)k);
- }
-}
-
-/* ---- calls / return ---- */
-
-static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
- u32* next_fp, u32* stack_off) {
- RImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
-
- /* For variadic args (av->abi NULL) synthesize a one-part DIRECT shape.
- * On RV64 LP64D, variadic args go through the integer registers
- * regardless of FP-ness (per the psABI). */
- ABIArgInfo va_ai;
- ABIArgPart va_pt;
- const ABIArgInfo* ai = av->abi;
- if (!ai) {
- u32 sz = type_byte_size(av->type);
- memset(&va_ai, 0, sizeof va_ai);
- memset(&va_pt, 0, sizeof va_pt);
- va_ai.kind = ABI_ARG_DIRECT;
- va_ai.parts = &va_pt;
- va_ai.nparts = 1;
- va_pt.cls = ABI_CLASS_INT;
- va_pt.size = sz;
- va_pt.align = sz;
- va_pt.src_offset = 0;
- ai = &va_ai;
- }
- if (ai->kind == ABI_ARG_IGNORE) return;
-
- if (ai->kind == ABI_ARG_INDIRECT) {
- /* Pass the address of the storage in the next integer slot. */
- int to_stack = (*next_int >= 8);
- u32 dst_reg = to_stack ? RV_T0 : (RV_A0 + (*next_int)++);
- if (av->storage.kind == OPK_LOCAL) {
- RvSlot* s = slot_get(a, av->storage.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "rv64 call: bad byval slot");
- i32 off = -(i32)s->off;
- if (off >= -2048 && off <= 2047) {
- emit32(mc, rv_addi(dst_reg, RV_S0, off));
- } else {
- emit_load_imm(mc, 1, dst_reg, (i64)off);
- emit32(mc, rv_add(dst_reg, RV_S0, dst_reg));
- }
- } else if (av->storage.kind == OPK_INDIRECT) {
- u32 base = av->storage.v.ind.base & 0x1fu;
- i32 off = av->storage.v.ind.ofs;
- if (off >= -2048 && off <= 2047) {
- emit32(mc, rv_addi(dst_reg, base, off));
- } else {
- emit_load_imm(mc, 1, dst_reg, (i64)off);
- emit32(mc, rv_add(dst_reg, base, dst_reg));
- }
- } else {
- compiler_panic(t->c, a->loc,
- "rv64 call: INDIRECT storage kind %d NYI",
- (int)av->storage.kind);
- }
- if (to_stack) {
- emit32(mc, rv_sd(dst_reg, RV_SP, (i32)*stack_off));
- *stack_off += 8;
- }
- return;
- }
-
- for (u16 i = 0; i < ai->nparts; ++i) {
- const ABIArgPart* pt = &ai->parts[i];
- u32 sz = pt->size;
-
- if (pt->cls == ABI_CLASS_INT) {
- int to_stack = (*next_int >= 8);
- u32 dst_reg = to_stack ? RV_T0 : (RV_A0 + (*next_int)++);
- switch (av->storage.kind) {
- case OPK_IMM: {
- u32 sf = (sz == 8) ? 1u : 0u;
- emit_load_imm(mc, sf, dst_reg, av->storage.v.imm);
- break;
- }
- case OPK_REG: {
- /* Variadic FP arg pinned into an integer register: bitcast
- * via FMV.X.{D,W}. Otherwise normal MV. */
- if (av->storage.cls == RC_FP) {
- emit32(mc, (sz == 8) ? rv_fmv_x_d(dst_reg, reg_num(av->storage))
- : rv_fmv_x_w(dst_reg, reg_num(av->storage)));
- } else {
- emit32(mc, rv_addi(dst_reg, reg_num(av->storage), 0));
- }
- break;
- }
- case OPK_LOCAL: {
- RvSlot* s = slot_get(a, av->storage.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "rv64 call: bad arg slot");
- i32 off = -(i32)s->off + (i32)pt->src_offset;
- emit32(mc, enc_int_load(sz, 0, dst_reg, RV_S0, off));
- break;
- }
- case OPK_INDIRECT: {
- /* cg holds INDIRECT base regs in s2..s11, disjoint from arg
- * regs a0..a7 and the t0 stack-arg scratch. */
- u32 base = av->storage.v.ind.base & 0x1fu;
- i32 off = av->storage.v.ind.ofs + (i32)pt->src_offset;
- emit32(mc, enc_int_load(sz, 0, dst_reg, base, off));
- break;
- }
- default:
- compiler_panic(t->c, a->loc,
- "rv64 call: storage kind %d NYI",
- (int)av->storage.kind);
- }
- if (to_stack) {
- emit32(mc, rv_sd(dst_reg, RV_SP, (i32)*stack_off));
- *stack_off += 8;
- }
- } else if (pt->cls == ABI_CLASS_FP) {
- int to_stack = (*next_fp >= 8);
- if (!to_stack) {
- u32 freg = 10u + (*next_fp)++;
- switch (av->storage.kind) {
- case OPK_REG: {
- u32 fmt = (sz == 8) ? RV_FMT_D : RV_FMT_S;
- u32 r = reg_num(av->storage);
- emit32(mc, rv_fsgnj(fmt, freg, r, r));
- break;
- }
- case OPK_INDIRECT: {
- u32 base = av->storage.v.ind.base & 0x1fu;
- i32 off = av->storage.v.ind.ofs + (i32)pt->src_offset;
- emit32(mc, (sz == 8) ? rv_fld(freg, base, off)
- : rv_flw(freg, base, off));
- break;
- }
- default:
- compiler_panic(t->c, a->loc, "rv64 call: FP storage kind %d NYI",
- (int)av->storage.kind);
- }
- } else {
- switch (av->storage.kind) {
- case OPK_REG:
- if (sz == 8) emit32(mc, rv_fsd(reg_num(av->storage), RV_SP, (i32)*stack_off));
- else emit32(mc, rv_fsw(reg_num(av->storage), RV_SP, (i32)*stack_off));
- break;
- case OPK_INDIRECT: {
- /* Route through ft0 — it is in {ft0..ft7}, caller-saved
- * scratch outside the cg fs2..fs11 pool. */
- u32 base = av->storage.v.ind.base & 0x1fu;
- i32 off = av->storage.v.ind.ofs + (i32)pt->src_offset;
- if (sz == 8) {
- emit32(mc, rv_fld(/*ft0=*/0u, base, off));
- emit32(mc, rv_fsd(/*ft0=*/0u, RV_SP, (i32)*stack_off));
- } else {
- emit32(mc, rv_flw(/*ft0=*/0u, base, off));
- emit32(mc, rv_fsw(/*ft0=*/0u, RV_SP, (i32)*stack_off));
- }
- break;
- }
- default:
- compiler_panic(t->c, a->loc, "rv64 call: FP stack-arg NYI");
- }
- *stack_off += 8;
- }
- } else {
- compiler_panic(t->c, a->loc, "rv64 call: ABI class %d unimpl",
- (int)pt->cls);
- }
- }
-}
-
-static void rv_call(CGTarget* t, const CGCallDesc* d) {
- RImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
-
- u32 next_int = 0, next_fp = 0, stack_off = 0;
-
- /* sret: caller passes destination pointer in a0. */
- if (d->abi && d->abi->has_sret) {
- if (d->ret.storage.kind != OPK_LOCAL) {
- compiler_panic(t->c, a->loc, "rv64 call: sret dst must be LOCAL");
- }
- RvSlot* s = slot_get(a, d->ret.storage.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "rv64 call: bad sret slot");
- i32 off = -(i32)s->off;
- if (off >= -2048 && off <= 2047) {
- emit32(mc, rv_addi(RV_A0, RV_S0, off));
- } else {
- emit_load_imm(mc, 1, RV_A0, (i64)off);
- emit32(mc, rv_add(RV_A0, RV_S0, RV_A0));
- }
- next_int = 1;
- }
-
- for (u32 i = 0; i < d->nargs; ++i) {
- emit_arg_value(t, &d->args[i], &next_int, &next_fp, &stack_off);
- }
- u32 needed = (stack_off + 15u) & ~15u;
- if (needed > a->max_outgoing) a->max_outgoing = needed;
-
- if (d->callee.kind == OPK_GLOBAL) {
- /* AUIPC ra, 0 ; JALR ra, ra, 0 with R_RV_CALL on AUIPC */
- u32 sec = mc->section_id;
- u32 pos = mc->pos(mc);
- emit32(mc, rv_auipc(RV_RA, 0));
- emit32(mc, rv_jalr(RV_RA, RV_RA, 0));
- mc->emit_reloc_at(mc, sec, pos, R_RV_CALL,
- d->callee.v.global.sym, d->callee.v.global.addend, 0, 0);
- } else if (d->callee.kind == OPK_REG) {
- emit32(mc, rv_jalr(RV_RA, reg_num(d->callee), 0));
- } else {
- compiler_panic(t->c, a->loc, "rv64 call: callee kind %d unsupported",
- (int)d->callee.kind);
- }
-
- /* Receive return value. */
- const ABIArgInfo* ri = &d->abi->ret;
- if (ri->kind == ABI_ARG_IGNORE || ri->kind == ABI_ARG_INDIRECT) return;
- if (ri->nparts == 0) return;
-
- Operand rs = d->ret.storage;
- u32 nir = 0, nfr = 0;
- for (u16 i = 0; i < ri->nparts; ++i) {
- const ABIArgPart* p = &ri->parts[i];
- u32 src_reg = (p->cls == ABI_CLASS_INT) ? (RV_A0 + nir++) : (10u + nfr++);
-
- if (rs.kind == OPK_REG) {
- if (ri->nparts != 1) {
- compiler_panic(t->c, a->loc, "rv64 call: REG ret with %u parts",
- (unsigned)ri->nparts);
- }
- if (p->cls == ABI_CLASS_INT) {
- emit32(mc, rv_addi(reg_num(rs), src_reg, 0));
- } else {
- u32 fmt = (p->size == 8) ? RV_FMT_D : RV_FMT_S;
- emit32(mc, rv_fsgnj(fmt, reg_num(rs), src_reg, src_reg));
- }
- } else if (rs.kind == OPK_LOCAL || rs.kind == OPK_INDIRECT) {
- u32 base_reg;
- i32 base_off;
- if (rs.kind == OPK_LOCAL) {
- RvSlot* s = slot_get(a, rs.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "rv64 call: bad ret slot");
- base_reg = RV_S0;
- base_off = -(i32)s->off;
- } else {
- base_reg = rs.v.ind.base & 0x1fu;
- base_off = rs.v.ind.ofs;
- }
- i32 off = base_off + (i32)p->src_offset;
- if (p->cls == ABI_CLASS_INT) {
- emit32(mc, enc_int_store(p->size, src_reg, base_reg, off));
- } else {
- if (p->size == 8) emit32(mc, rv_fsd(src_reg, base_reg, off));
- else emit32(mc, rv_fsw(src_reg, base_reg, off));
- }
- } else if (rs.kind == OPK_IMM && rs.type && rs.type->kind == TY_VOID) {
- /* void return placeholder — nothing to do. */
- } else {
- compiler_panic(t->c, a->loc, "rv64 call: ret_storage kind %d unsupported",
- (int)rs.kind);
- }
- }
-}
-
-static void rv_ret(CGTarget* t, const CGABIValue* val) {
- RImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
-
- if (val) {
- const ABIArgInfo* ri = val->abi;
- if (ri && ri->kind == ABI_ARG_INDIRECT) {
- /* sret: reload destination pointer from sret_ptr_slot into t0,
- * then memcpy from val->storage into [t0]. */
- u32 src_base;
- i32 src_base_off;
- u32 nbytes;
- if (val->storage.kind == OPK_LOCAL) {
- RvSlot* s = slot_get(a, val->storage.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "rv64 ret: bad sret slot");
- src_base = RV_S0;
- src_base_off = -(i32)s->off;
- nbytes = s->size;
- } else if (val->storage.kind == OPK_INDIRECT) {
- src_base = val->storage.v.ind.base & 0x1fu;
- src_base_off = val->storage.v.ind.ofs;
- nbytes = val->size;
- if (!nbytes) {
- compiler_panic(t->c, a->loc,
- "rv64 ret indirect: missing aggregate size");
- }
- } else {
- compiler_panic(t->c, a->loc,
- "rv64 ret indirect: storage kind %d NYI",
- (int)val->storage.kind);
- }
- RvSlot* sp = (a->sret_ptr_slot != FRAME_SLOT_NONE)
- ? slot_get(a, a->sret_ptr_slot)
- : NULL;
- if (sp) emit32(mc, rv_ld(RV_T0, RV_S0, -(i32)sp->off));
- u32 i = 0;
- while (i + 8 <= nbytes) {
- emit32(mc, rv_ld(RV_T1, src_base, src_base_off + (i32)i));
- emit32(mc, rv_sd(RV_T1, RV_T0, (i32)i));
- i += 8;
- }
- while (i + 4 <= nbytes) {
- emit32(mc, rv_lwu(RV_T1, src_base, src_base_off + (i32)i));
- emit32(mc, rv_sw(RV_T1, RV_T0, (i32)i));
- i += 4;
- }
- while (i + 2 <= nbytes) {
- emit32(mc, rv_lhu(RV_T1, src_base, src_base_off + (i32)i));
- emit32(mc, rv_sh(RV_T1, RV_T0, (i32)i));
- i += 2;
- }
- while (i < nbytes) {
- emit32(mc, rv_lbu(RV_T1, src_base, src_base_off + (i32)i));
- emit32(mc, rv_sb(RV_T1, RV_T0, (i32)i));
- i += 1;
- }
- } else if (val->storage.kind == OPK_REG) {
- if (val->storage.cls == RC_FP) {
- u32 fmt = type_is_fp_double(val->storage.type) ? RV_FMT_D : RV_FMT_S;
- u32 r = reg_num(val->storage);
- emit32(mc, rv_fsgnj(fmt, 10u, r, r)); /* fa0 = freg 10 */
- } else {
- emit32(mc, rv_addi(RV_A0, reg_num(val->storage), 0));
- }
- } else if (val->storage.kind == OPK_IMM) {
- u32 sf = type_is_64(val->storage.type) ? 1u : 0u;
- emit_load_imm(mc, sf, RV_A0, val->storage.v.imm);
- } else if (val->storage.kind == OPK_LOCAL ||
- val->storage.kind == OPK_INDIRECT) {
- u32 base_reg;
- i32 base_off;
- if (val->storage.kind == OPK_LOCAL) {
- RvSlot* s = slot_get(a, val->storage.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "rv64 ret: bad local slot");
- base_reg = RV_S0;
- base_off = -(i32)s->off;
- } else {
- base_reg = val->storage.v.ind.base & 0x1fu;
- base_off = val->storage.v.ind.ofs;
- }
- const ABIArgInfo* ri2 = val->abi;
- u32 nir = 0, nfr = 0;
- for (u16 i = 0; i < (ri2 ? ri2->nparts : 0); ++i) {
- const ABIArgPart* pt = &ri2->parts[i];
- i32 off = base_off + (i32)pt->src_offset;
- if (pt->cls == ABI_CLASS_INT) {
- emit32(mc, enc_int_load(pt->size, 0, RV_A0 + nir++, base_reg, off));
- } else if (pt->cls == ABI_CLASS_FP) {
- u32 freg = 10u + nfr++;
- if (pt->size == 8) emit32(mc, rv_fld(freg, base_reg, off));
- else emit32(mc, rv_flw(freg, base_reg, off));
- } else {
- compiler_panic(t->c, a->loc, "rv64 ret: part cls %d unimpl",
- (int)pt->cls);
- }
- }
- }
- }
- /* Jump to epilogue. */
- emit32(mc, rv_jal(RV_ZERO, 0));
- mc->emit_label_ref(mc, a->epilogue_label, R_RV_JAL, 4, 0);
-}
-
-/* ---- panic stubs for features we don't yet cover ---- */
-
-static void rv_alloca_(CGTarget* t, Operand d, Operand sz, u32 align) {
- RImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
- if (d.kind != OPK_REG) {
- compiler_panic(t->c, a->loc, "rv64 alloca: dst must be REG");
- }
- if (align > 16) {
- compiler_panic(t->c, a->loc,
- "rv64 alloca: align %u > 16 not yet supported", align);
- }
- if (sz.kind == OPK_IMM) {
- i64 v = sz.v.imm;
- if (v < 0) compiler_panic(t->c, a->loc, "rv64 alloca: negative size");
- u64 aligned = ((u64)v + 15u) & ~(u64)15u;
- if (aligned == 0) aligned = 16;
- if (aligned > 2047u) {
- compiler_panic(t->c, a->loc,
- "rv64 alloca: const size %llu too large for v1",
- (unsigned long long)aligned);
- }
- emit32(mc, rv_addi(RV_SP, RV_SP, -(i32)aligned));
- } else if (sz.kind == OPK_REG) {
- u32 sz_reg = reg_num(sz);
- /* t0 = (sz + 15) & ~15; sp -= t0 */
- emit32(mc, rv_addi(RV_T0, sz_reg, 15));
- emit32(mc, rv_andi(RV_T0, RV_T0, -16));
- emit32(mc, rv_sub(RV_SP, RV_SP, RV_T0));
- } else {
- compiler_panic(t->c, a->loc, "rv64 alloca: size kind %d unsupported",
- (int)sz.kind);
- }
-
- /* Placeholder: addi dst, sp, max_outgoing (imm patched at func_end). */
- if (a->nadd_patches == a->add_patches_cap) {
- u32 ncap = a->add_patches_cap ? a->add_patches_cap * 2 : 4;
- struct RvAllocaPatch* nb = arena_array(t->c->tu, struct RvAllocaPatch, ncap);
- if (a->add_patches)
- memcpy(nb, a->add_patches, sizeof(*nb) * a->nadd_patches);
- a->add_patches = nb;
- a->add_patches_cap = ncap;
- }
- u32 dst_reg = reg_num(d);
- a->add_patches[a->nadd_patches].pos = mc->pos(mc);
- a->add_patches[a->nadd_patches].dst_reg = dst_reg;
- a->nadd_patches++;
- emit32(mc, rv_addi(dst_reg, RV_SP, 0));
- a->has_alloca = 1;
-}
-/* RV64 LP64D va_list: a single `void*` pointing at the next argument
- * slot. The prologue spills a_{nparams_int}..a7 into the save area at
- * [s0 + 16]. The save area lives at the top of the callee frame,
- * immediately above the saved-s0/ra pair, so save_area[8] coincides
- * with the caller's first stack arg — a single 8-byte stride covers
- * register and stack args alike. */
-static void rv_va_start_(CGTarget* t, Operand ap_op) {
- RImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
- if (!a->is_variadic) {
- compiler_panic(t->c, a->loc, "rv64 va_start: function not variadic");
- }
- u32 ap = reg_num(ap_op);
- /* *ap = s0 + 16 + next_param_int*8 (skip past named-int slots). */
- i32 off = 16 + (i32)(a->next_param_int * 8u);
- emit32(mc, rv_addi(RV_T0, RV_S0, off));
- emit32(mc, rv_sd(RV_T0, ap, 0));
-}
-
-static void rv_va_arg_(CGTarget* t, Operand dst, Operand ap_op,
- const Type* ty) {
- MCEmitter* mc = t->mc;
- u32 ap = reg_num(ap_op);
- u32 sz = type_byte_size(ty);
- /* t1 = *ap; load value; *ap = t1 + 8 (rounded up).
- * On RV64 LP64D every var arg occupies an 8-byte slot. */
- emit32(mc, rv_ld(RV_T1, ap, 0));
- if (dst.cls == RC_FP) {
- /* For variadic FP args on RV64 LP64D, the value sits in the integer
- * save area at the same bit pattern as a double bit-cast. Load and
- * bitcast. */
- if (sz == 8) {
- emit32(mc, rv_ld(RV_T2, RV_T1, 0));
- emit32(mc, rv_fmv_d_x(reg_num(dst), RV_T2));
- } else {
- emit32(mc, rv_lw(RV_T2, RV_T1, 0));
- emit32(mc, rv_fmv_w_x(reg_num(dst), RV_T2));
- }
- } else {
- int sx = type_is_signed(ty);
- emit32(mc, enc_int_load(sz, sx, reg_num(dst), RV_T1, 0));
- }
- /* advance ap by 8 bytes. */
- emit32(mc, rv_addi(RV_T1, RV_T1, 8));
- emit32(mc, rv_sd(RV_T1, ap, 0));
-}
-
-static void rv_va_end_(CGTarget* t, Operand a) {
- (void)t; (void)a;
-}
-
-static void rv_va_copy_(CGTarget* t, Operand d, Operand s) {
- MCEmitter* mc = t->mc;
- u32 dr = reg_num(d);
- u32 sr = reg_num(s);
- /* va_list is a single pointer (8 bytes). */
- emit32(mc, rv_ld(RV_T0, sr, 0));
- emit32(mc, rv_sd(RV_T0, dr, 0));
-}
-
-/* ---- atomics (LL/SC + AMO) ---- */
-
-static int mem_order_is_acquire(MemOrder o) {
- return o == MO_ACQUIRE || o == MO_ACQ_REL || o == MO_SEQ_CST || o == MO_CONSUME;
-}
-static int mem_order_is_release(MemOrder o) {
- return o == MO_RELEASE || o == MO_ACQ_REL || o == MO_SEQ_CST;
-}
-
-static void rv_atomic_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma,
- MemOrder o) {
- MCEmitter* mc = t->mc;
- u32 sf = (ma.size == 8) ? 1u : 0u;
- /* Resolve address to a register. */
- u32 base;
- if (addr.kind == OPK_REG) {
- base = reg_num(addr);
- } else if (addr.kind == OPK_LOCAL) {
- i32 off;
- base = addr_base(t, addr, &off, RV_T0);
- if (off) {
- emit32(mc, rv_addi(RV_T0, base, off));
- base = RV_T0;
- }
- } else {
- compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_load: addr kind %d NYI",
- (int)addr.kind);
- }
- if (mem_order_is_acquire(o)) {
- /* lr.w/d as ordered load (aq=1, rl=0). */
- emit32(mc, sf ? rv_lr_d(reg_num(dst), base, 1, 0)
- : rv_lr_w(reg_num(dst), base, 1, 0));
- } else {
- emit32(mc, enc_int_load(ma.size, 0, reg_num(dst), base, 0));
- }
-}
-
-static void rv_atomic_store(CGTarget* t, Operand addr, Operand src,
- MemAccess ma, MemOrder o) {
- MCEmitter* mc = t->mc;
- u32 sf = (ma.size == 8) ? 1u : 0u;
- u32 src_reg;
- if (src.kind == OPK_IMM) {
- emit_load_imm(mc, sf, RV_T1, src.v.imm);
- src_reg = RV_T1;
- } else if (src.kind == OPK_REG) {
- src_reg = reg_num(src);
- } else {
- compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_store: src kind %d NYI",
- (int)src.kind);
- }
- u32 base;
- if (addr.kind == OPK_REG) {
- base = reg_num(addr);
- } else if (addr.kind == OPK_LOCAL) {
- i32 off;
- base = addr_base(t, addr, &off, RV_T0);
- if (off) { emit32(mc, rv_addi(RV_T0, base, off)); base = RV_T0; }
- } else {
- compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_store: addr kind %d NYI",
- (int)addr.kind);
- }
- if (mem_order_is_release(o)) {
- /* fence rw,w; sw/sd src, 0(base). Conservative for SEQ_CST. */
- emit32(mc, rv_fence_rw_rw());
- emit32(mc, enc_int_store(ma.size, src_reg, base, 0));
- if (o == MO_SEQ_CST) emit32(mc, rv_fence_rw_rw());
- } else {
- emit32(mc, enc_int_store(ma.size, src_reg, base, 0));
- }
-}
-
-static void rv_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, Operand addr,
- Operand val, MemAccess ma, MemOrder o) {
- MCEmitter* mc = t->mc;
- u32 sf = (ma.size == 8) ? 1u : 0u;
- u32 base = RV_T0;
- if (addr.kind == OPK_REG) {
- emit32(mc, rv_addi(base, reg_num(addr), 0));
- } else if (addr.kind == OPK_LOCAL) {
- i32 off;
- u32 b = addr_base(t, addr, &off, RV_T0);
- if (b != RV_T0 || off) {
- emit32(mc, rv_addi(base, b, off));
- }
- } else {
- compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_rmw: addr NYI");
- }
- u32 vreg = RV_T1;
- if (val.kind == OPK_IMM) emit_load_imm(mc, sf, vreg, val.v.imm);
- else if (val.kind == OPK_REG) emit32(mc, rv_addi(vreg, reg_num(val), 0));
- else compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_rmw: val kind NYI");
-
- int aq = mem_order_is_acquire(o);
- int rl = mem_order_is_release(o);
-
- /* LR/SC loop for any op (simpler than per-op AMO encodings, but AMO is
- * preferred for the cases the corpus exercises). */
- MCLabel L_retry = mc->label_new(mc);
- mc->label_place(mc, L_retry);
- emit32(mc, sf ? rv_lr_d(reg_num(dst), base, (u32)aq, 0)
- : rv_lr_w(reg_num(dst), base, (u32)aq, 0));
- u32 new_r = RV_T2;
- switch (op) {
- case AO_XCHG: emit32(mc, rv_addi(new_r, vreg, 0)); break;
- case AO_ADD: emit32(mc, sf ? rv_add(new_r, reg_num(dst), vreg) : rv_addw(new_r, reg_num(dst), vreg)); break;
- case AO_SUB: emit32(mc, sf ? rv_sub(new_r, reg_num(dst), vreg) : rv_subw(new_r, reg_num(dst), vreg)); break;
- case AO_AND: emit32(mc, rv_and(new_r, reg_num(dst), vreg)); break;
- case AO_OR: emit32(mc, rv_or(new_r, reg_num(dst), vreg)); break;
- case AO_XOR: emit32(mc, rv_xor(new_r, reg_num(dst), vreg)); break;
- case AO_NAND:
- emit32(mc, rv_and(new_r, reg_num(dst), vreg));
- emit32(mc, rv_xori(new_r, new_r, -1));
- break;
- default: emit32(mc, rv_addi(new_r, vreg, 0)); break;
- }
- /* sc.w/d t3, new_r, (base); bnez t3, retry. */
- emit32(mc, sf ? rv_sc_d(RV_T3, base, new_r, 0, (u32)rl)
- : rv_sc_w(RV_T3, base, new_r, 0, (u32)rl));
- emit32(mc, rv_bne(RV_T3, RV_ZERO, 0));
- mc->emit_label_ref(mc, L_retry, R_RV_BRANCH, 4, 0);
-}
-
-static void rv_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr,
- Operand exp, Operand des, MemAccess ma,
- MemOrder succ, MemOrder fail) {
- MCEmitter* mc = t->mc;
- u32 sf = (ma.size == 8) ? 1u : 0u;
- (void)fail;
- u32 base = RV_T0;
- if (addr.kind == OPK_REG) emit32(mc, rv_addi(base, reg_num(addr), 0));
- else if (addr.kind == OPK_LOCAL) {
- i32 off; u32 b = addr_base(t, addr, &off, RV_T0);
- if (b != RV_T0 || off) emit32(mc, rv_addi(base, b, off));
- } else compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_cas: addr NYI");
- u32 ereg = RV_T1, dreg = RV_T2;
- if (exp.kind == OPK_IMM) emit_load_imm(mc, sf, ereg, exp.v.imm);
- else emit32(mc, rv_addi(ereg, reg_num(exp), 0));
- if (des.kind == OPK_IMM) emit_load_imm(mc, sf, dreg, des.v.imm);
- else emit32(mc, rv_addi(dreg, reg_num(des), 0));
-
- int aq = mem_order_is_acquire(succ);
- int rl = mem_order_is_release(succ);
-
- MCLabel L_retry = mc->label_new(mc);
- MCLabel L_fail = mc->label_new(mc);
- MCLabel L_done = mc->label_new(mc);
-
- mc->label_place(mc, L_retry);
- emit32(mc, sf ? rv_lr_d(reg_num(prior), base, (u32)aq, 0)
- : rv_lr_w(reg_num(prior), base, (u32)aq, 0));
- /* if (prior != expected) -> fail */
- emit32(mc, rv_bne(reg_num(prior), ereg, 0));
- mc->emit_label_ref(mc, L_fail, R_RV_BRANCH, 4, 0);
- /* sc.w/d t3, des, (base); bnez t3, retry */
- emit32(mc, sf ? rv_sc_d(RV_T3, base, dreg, 0, (u32)rl)
- : rv_sc_w(RV_T3, base, dreg, 0, (u32)rl));
- emit32(mc, rv_bne(RV_T3, RV_ZERO, 0));
- mc->emit_label_ref(mc, L_retry, R_RV_BRANCH, 4, 0);
- /* ok = 1; jump done */
- emit_load_imm(mc, 0, reg_num(ok), 1);
- emit32(mc, rv_jal(RV_ZERO, 0));
- mc->emit_label_ref(mc, L_done, R_RV_JAL, 4, 0);
-
- mc->label_place(mc, L_fail);
- emit_load_imm(mc, 0, reg_num(ok), 0);
-
- mc->label_place(mc, L_done);
-}
-
-static void rv_fence(CGTarget* t, MemOrder o) {
- if (o == MO_RELAXED) return;
- emit32(t->mc, rv_fence_rw_rw());
-}
-
-/* ---- intrinsics: do what we can; panic on the rest. ---- */
-static void rv_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd,
- const Operand* args, u32 na) {
- (void)nd; (void)na;
- MCEmitter* mc = t->mc;
- RImpl* a = impl_of(t);
- switch (kind) {
- case INTRIN_ASSUME_ALIGNED:
- case INTRIN_EXPECT: {
- /* dst = val (hint dropped). */
- Operand val = args[0];
- Operand dst = dsts[0];
- u32 sf = type_is_64(dst.type) ? 1u : 0u;
- if (val.kind == OPK_REG) {
- if (reg_num(val) != reg_num(dst))
- emit32(mc, rv_addi(reg_num(dst), reg_num(val), 0));
- } else if (val.kind == OPK_IMM) {
- emit_load_imm(mc, sf, reg_num(dst), val.v.imm);
- } else {
- compiler_panic(t->c, a->loc, "rv64 intrinsic: val kind %d NYI",
- (int)val.kind);
- }
- return;
- }
- case INTRIN_PREFETCH: return;
- case INTRIN_UNREACHABLE:
- case INTRIN_TRAP:
- emit32(mc, rv_ebreak());
- return;
- case INTRIN_BSWAP16: {
- /* rd = ((rs & 0xff) << 8) | ((rs >> 8) & 0xff) */
- u32 rd = reg_num(dsts[0]);
- u32 rs = reg_num(args[0]);
- emit32(mc, rv_slli(RV_T1, rs, 8)); /* t1 = rs << 8 */
- emit32(mc, rv_andi(RV_T1, RV_T1, 0)); /* placeholder */
- /* Use lui mask approach for portability: build mask 0xff00 in t2. */
- emit32(mc, rv_addi(RV_T2, RV_ZERO, 0));
- /* Simpler: 0xff00 fits in lui+addi pattern but is also small enough:
- * we can build via shift: t2 = 0xff << 8 = (0xff << 8). */
- emit32(mc, rv_addi(RV_T2, RV_ZERO, 0xff));
- emit32(mc, rv_slli(RV_T2, RV_T2, 8));
- /* t1 = (rs << 8) & 0xff00 */
- emit32(mc, rv_slli(RV_T1, rs, 8));
- emit32(mc, rv_and(RV_T1, RV_T1, RV_T2));
- /* t3 = (rs >> 8) & 0xff (use srli on RV64 — high bits zeroed by
- * preceding ANDI mask if input is uint16, but be safe and mask). */
- emit32(mc, rv_srli(RV_T3, rs, 8));
- emit32(mc, rv_andi(RV_T3, RV_T3, 0xff));
- emit32(mc, rv_or(rd, RV_T1, RV_T3));
- return;
- }
- case INTRIN_BSWAP32: {
- u32 rd = reg_num(dsts[0]);
- u32 rs = reg_num(args[0]);
- /* result = (b0<<24)|(b1<<16)|(b2<<8)|b3, where bi = (rs >> (8*i)) & 0xff. */
- /* t1 = ((rs >> 24) & 0xff) */
- emit32(mc, rv_srliw(RV_T1, rs, 24));
- emit32(mc, rv_andi(RV_T1, RV_T1, 0xff));
- /* t2 = ((rs >> 16) & 0xff) << 8 */
- emit32(mc, rv_srliw(RV_T2, rs, 16));
- emit32(mc, rv_andi(RV_T2, RV_T2, 0xff));
- emit32(mc, rv_slli(RV_T2, RV_T2, 8));
- emit32(mc, rv_or(RV_T1, RV_T1, RV_T2));
- /* t2 = ((rs >> 8) & 0xff) << 16 */
- emit32(mc, rv_srliw(RV_T2, rs, 8));
- emit32(mc, rv_andi(RV_T2, RV_T2, 0xff));
- emit32(mc, rv_slli(RV_T2, RV_T2, 16));
- emit32(mc, rv_or(RV_T1, RV_T1, RV_T2));
- /* t2 = (rs & 0xff) << 24 */
- emit32(mc, rv_andi(RV_T2, rs, 0xff));
- emit32(mc, rv_slli(RV_T2, RV_T2, 24));
- emit32(mc, rv_or(rd, RV_T1, RV_T2));
- /* zero-extend to 32 bits if dest is u32 */
- emit32(mc, rv_slli(rd, rd, 32));
- emit32(mc, rv_srli(rd, rd, 32));
- return;
- }
- case INTRIN_BSWAP64: {
- u32 rd = reg_num(dsts[0]);
- u32 rs = reg_num(args[0]);
- /* General bswap64: iterate over the 8 bytes. */
- /* t1 accumulator */
- emit32(mc, rv_addi(RV_T1, RV_ZERO, 0));
- for (int i = 0; i < 8; ++i) {
- /* t2 = (rs >> (8*i)) & 0xff */
- if (i == 0) {
- emit32(mc, rv_andi(RV_T2, rs, 0xff));
- } else {
- emit32(mc, rv_srli(RV_T2, rs, (u32)(8 * i)));
- emit32(mc, rv_andi(RV_T2, RV_T2, 0xff));
- }
- /* t2 <<= (56 - 8*i) (so byte 0 goes to top) */
- int sh = 56 - 8 * i;
- if (sh) emit32(mc, rv_slli(RV_T2, RV_T2, (u32)sh));
- emit32(mc, rv_or(RV_T1, RV_T1, RV_T2));
- }
- emit32(mc, rv_addi(rd, RV_T1, 0));
- return;
- }
- case INTRIN_POPCOUNT: {
- /* Software popcount. Use the bit-twiddling sequence on the
- * appropriate width. dst type drives width. */
- u32 rd = reg_num(dsts[0]);
- u32 rs = reg_num(args[0]);
- int is64 = type_is_64(args[0].type);
- /* Move rs into t1 to avoid clobbering input. */
- emit32(mc, rv_addi(RV_T1, rs, 0));
- if (!is64) {
- /* zext.w t1, t1 */
- emit32(mc, rv_slli(RV_T1, RV_T1, 32));
- emit32(mc, rv_srli(RV_T1, RV_T1, 32));
- }
- /* t1 = t1 - ((t1 >> 1) & 0x5555...) */
- emit32(mc, rv_srli(RV_T2, RV_T1, 1));
- emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x5555555555555555ll
- : (i64)0x55555555);
- emit32(mc, rv_and(RV_T2, RV_T2, RV_T3));
- emit32(mc, rv_sub(RV_T1, RV_T1, RV_T2));
- /* t1 = (t1 & 0x3333...) + ((t1 >> 2) & 0x3333...) */
- emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x3333333333333333ll
- : (i64)0x33333333);
- emit32(mc, rv_and(RV_T2, RV_T1, RV_T3));
- emit32(mc, rv_srli(RV_T1, RV_T1, 2));
- emit32(mc, rv_and(RV_T1, RV_T1, RV_T3));
- emit32(mc, rv_add(RV_T1, RV_T1, RV_T2));
- /* t1 = (t1 + (t1 >> 4)) & 0x0f0f... */
- emit32(mc, rv_srli(RV_T2, RV_T1, 4));
- emit32(mc, rv_add(RV_T1, RV_T1, RV_T2));
- emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x0f0f0f0f0f0f0f0fll
- : (i64)0x0f0f0f0f);
- emit32(mc, rv_and(RV_T1, RV_T1, RV_T3));
- /* t1 *= 0x0101010101... ; result in top byte */
- emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x0101010101010101ll
- : (i64)0x01010101);
- emit32(mc, rv_mul(RV_T1, RV_T1, RV_T3));
- /* shift right by (XLEN - 8) */
- emit32(mc, rv_srli(rd, RV_T1, is64 ? 56u : 24u));
- return;
- }
- case INTRIN_CTZ: {
- /* ctz(x) = popcount((x & -x) - 1) for x != 0. */
- u32 rd = reg_num(dsts[0]);
- u32 rs = reg_num(args[0]);
- int is64 = type_is_64(args[0].type);
- /* t1 = -x */
- emit32(mc, rv_sub(RV_T1, RV_ZERO, rs));
- /* t1 = x & -x */
- emit32(mc, rv_and(RV_T1, RV_T1, rs));
- /* t1 = t1 - 1 */
- emit32(mc, rv_addi(RV_T1, RV_T1, -1));
- if (!is64) {
- emit32(mc, rv_slli(RV_T1, RV_T1, 32));
- emit32(mc, rv_srli(RV_T1, RV_T1, 32));
- }
- /* popcount(t1) into rd */
- emit32(mc, rv_srli(RV_T2, RV_T1, 1));
- emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x5555555555555555ll
- : (i64)0x55555555);
- emit32(mc, rv_and(RV_T2, RV_T2, RV_T3));
- emit32(mc, rv_sub(RV_T1, RV_T1, RV_T2));
- emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x3333333333333333ll
- : (i64)0x33333333);
- emit32(mc, rv_and(RV_T2, RV_T1, RV_T3));
- emit32(mc, rv_srli(RV_T1, RV_T1, 2));
- emit32(mc, rv_and(RV_T1, RV_T1, RV_T3));
- emit32(mc, rv_add(RV_T1, RV_T1, RV_T2));
- emit32(mc, rv_srli(RV_T2, RV_T1, 4));
- emit32(mc, rv_add(RV_T1, RV_T1, RV_T2));
- emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x0f0f0f0f0f0f0f0fll
- : (i64)0x0f0f0f0f);
- emit32(mc, rv_and(RV_T1, RV_T1, RV_T3));
- emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x0101010101010101ll
- : (i64)0x01010101);
- emit32(mc, rv_mul(RV_T1, RV_T1, RV_T3));
- emit32(mc, rv_srli(rd, RV_T1, is64 ? 56u : 24u));
- return;
- }
- case INTRIN_CLZ: {
- /* Software clz: fold the high bit downward, then popcount the
- * inverted result. Standard recipe:
- * x |= x>>1; x |= x>>2; x |= x>>4; x |= x>>8; x |= x>>16;
- * [x |= x>>32;] // 64-bit
- * clz = popcount(~x) [for the appropriate width].
- */
- u32 rd = reg_num(dsts[0]);
- u32 rs = reg_num(args[0]);
- int is64 = type_is_64(args[0].type);
- emit32(mc, rv_addi(RV_T1, rs, 0));
- if (!is64) {
- /* zero-ext to 32 to make srli safe */
- emit32(mc, rv_slli(RV_T1, RV_T1, 32));
- emit32(mc, rv_srli(RV_T1, RV_T1, 32));
- }
- u32 shifts[6] = {1, 2, 4, 8, 16, 32};
- u32 ns = is64 ? 6u : 5u;
- for (u32 i = 0; i < ns; ++i) {
- emit32(mc, rv_srli(RV_T2, RV_T1, shifts[i]));
- emit32(mc, rv_or(RV_T1, RV_T1, RV_T2));
- }
- /* t1 = ~t1, then popcount and we want the (width - popcount) ... wait.
- * Actually clz(x) for the folded x = popcount(~x). Let me verify.
- * If x = 0b00011010, fold => 0b00011111. ~ => 0b11100000.
- * popcount(~folded) = 3 = clz(0b00011010) ✓. */
- emit32(mc, rv_xori(RV_T1, RV_T1, -1));
- if (!is64) {
- emit32(mc, rv_slli(RV_T1, RV_T1, 32));
- emit32(mc, rv_srli(RV_T1, RV_T1, 32));
- }
- /* popcount(t1) into rd */
- emit32(mc, rv_srli(RV_T2, RV_T1, 1));
- emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x5555555555555555ll
- : (i64)0x55555555);
- emit32(mc, rv_and(RV_T2, RV_T2, RV_T3));
- emit32(mc, rv_sub(RV_T1, RV_T1, RV_T2));
- emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x3333333333333333ll
- : (i64)0x33333333);
- emit32(mc, rv_and(RV_T2, RV_T1, RV_T3));
- emit32(mc, rv_srli(RV_T1, RV_T1, 2));
- emit32(mc, rv_and(RV_T1, RV_T1, RV_T3));
- emit32(mc, rv_add(RV_T1, RV_T1, RV_T2));
- emit32(mc, rv_srli(RV_T2, RV_T1, 4));
- emit32(mc, rv_add(RV_T1, RV_T1, RV_T2));
- emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x0f0f0f0f0f0f0f0fll
- : (i64)0x0f0f0f0f);
- emit32(mc, rv_and(RV_T1, RV_T1, RV_T3));
- emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x0101010101010101ll
- : (i64)0x01010101);
- emit32(mc, rv_mul(RV_T1, RV_T1, RV_T3));
- emit32(mc, rv_srli(rd, RV_T1, is64 ? 56u : 24u));
- return;
- }
- case INTRIN_ADD_OVERFLOW:
- case INTRIN_SUB_OVERFLOW: {
- /* dsts: [val, ovf]. Signed overflow check.
- * For ADD: ovf = ((a XOR result) & (b XOR result)) >> (width-1)
- * For SUB: ovf = ((a XOR b) & (a XOR result)) >> (width-1) */
- Operand a_op = args[0], b_op = args[1];
- Operand dval = dsts[0], dovf = dsts[1];
- int is64 = type_is_64(dval.type);
- u32 ra = force_reg_int(t, a_op, RV_T0);
- u32 rb = force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0);
- u32 rd = reg_num(dval);
- u32 rovf = reg_num(dovf);
- /* Compute result into t2 (avoid clobbering rd if rd == ra/rb). */
- if (kind == INTRIN_ADD_OVERFLOW) {
- emit32(mc, is64 ? rv_add(RV_T2, ra, rb) : rv_addw(RV_T2, ra, rb));
- } else {
- emit32(mc, is64 ? rv_sub(RV_T2, ra, rb) : rv_subw(RV_T2, ra, rb));
- }
- /* t3 = a XOR t2 */
- emit32(mc, rv_xor(RV_T3, ra, RV_T2));
- if (kind == INTRIN_ADD_OVERFLOW) {
- /* t4 = b XOR t2 */
- emit32(mc, rv_xor(rovf, rb, RV_T2));
- emit32(mc, rv_and(rovf, rovf, RV_T3));
- } else {
- /* t4 = a XOR b */
- emit32(mc, rv_xor(rovf, ra, rb));
- emit32(mc, rv_and(rovf, rovf, RV_T3));
- }
- /* shift right to extract sign bit */
- u32 sh = is64 ? 63u : 31u;
- emit32(mc, is64 ? rv_srli(rovf, rovf, sh) : rv_srliw(rovf, rovf, sh));
- emit32(mc, rv_andi(rovf, rovf, 1));
- /* Now write the value. */
- emit32(mc, rv_addi(rd, RV_T2, 0));
- return;
- }
- case INTRIN_MUL_OVERFLOW: {
- /* SMULL: full 64-bit signed product of two i32s, then compare
- * with sign-extend of low 32. For i64 inputs we panic for now. */
- Operand a_op = args[0], b_op = args[1];
- Operand dval = dsts[0], dovf = dsts[1];
- int is64 = type_is_64(dval.type);
- if (is64) {
- compiler_panic(t->c, a->loc, "rv64 intrinsic: mul_overflow i64 NYI");
- }
- u32 ra = force_reg_int(t, a_op, RV_T0);
- u32 rb = force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0);
- u32 rd = reg_num(dval);
- u32 rovf = reg_num(dovf);
- /* Sign-extend inputs from 32 to 64. */
- emit32(mc, rv_addiw(RV_T2, ra, 0));
- emit32(mc, rv_addiw(RV_T3, rb, 0));
- /* Full 64-bit product */
- emit32(mc, rv_mul(RV_T2, RV_T2, RV_T3));
- /* sign-ext of low 32 of product */
- emit32(mc, rv_addiw(RV_T3, RV_T2, 0));
- /* ovf = (T2 != T3) */
- emit32(mc, rv_xor(rovf, RV_T2, RV_T3));
- emit32(mc, rv_sltu(rovf, RV_ZERO, rovf));
- /* dval = low 32, sign-extended */
- emit32(mc, rv_addiw(rd, RV_T2, 0));
- return;
- }
- case INTRIN_MEMCPY:
- case INTRIN_MEMMOVE: {
- Operand da = args[0], sa = args[1], nb = args[2];
- if (da.kind != OPK_REG || sa.kind != OPK_REG || nb.kind != OPK_IMM) {
- compiler_panic(t->c, a->loc,
- "rv64 intrinsic: memcpy/memmove non-const NYI");
- }
- u32 dr = reg_num(da), sr = reg_num(sa), n = (u32)nb.v.imm;
- if (kind == INTRIN_MEMCPY) {
- u32 i = 0;
- while (i + 8 <= n) { emit32(mc, rv_ld(RV_T3, sr, (i32)i)); emit32(mc, rv_sd(RV_T3, dr, (i32)i)); i += 8; }
- while (i + 4 <= n) { emit32(mc, rv_lwu(RV_T3, sr, (i32)i)); emit32(mc, rv_sw(RV_T3, dr, (i32)i)); i += 4; }
- while (i + 2 <= n) { emit32(mc, rv_lhu(RV_T3, sr, (i32)i)); emit32(mc, rv_sh(RV_T3, dr, (i32)i)); i += 2; }
- while (i < n) { emit32(mc, rv_lbu(RV_T3, sr, (i32)i)); emit32(mc, rv_sb(RV_T3, dr, (i32)i)); i += 1; }
- } else {
- u32 i = n;
- while (i >= 8) { i -= 8; emit32(mc, rv_ld(RV_T3, sr, (i32)i)); emit32(mc, rv_sd(RV_T3, dr, (i32)i)); }
- while (i >= 4) { i -= 4; emit32(mc, rv_lwu(RV_T3, sr, (i32)i)); emit32(mc, rv_sw(RV_T3, dr, (i32)i)); }
- while (i >= 2) { i -= 2; emit32(mc, rv_lhu(RV_T3, sr, (i32)i)); emit32(mc, rv_sh(RV_T3, dr, (i32)i)); }
- while (i >= 1) { i -= 1; emit32(mc, rv_lbu(RV_T3, sr, (i32)i)); emit32(mc, rv_sb(RV_T3, dr, (i32)i)); }
- }
- return;
- }
- case INTRIN_MEMSET: {
- Operand da = args[0], bv = args[1], nb = args[2];
- if (da.kind != OPK_REG || nb.kind != OPK_IMM) {
- compiler_panic(t->c, a->loc, "rv64 intrinsic: memset non-const NYI");
- }
- u32 dr = reg_num(da), n = (u32)nb.v.imm;
- u32 src;
- if (bv.kind == OPK_IMM) {
- u32 byte = (u32)(bv.v.imm & 0xffu);
- if (byte == 0) src = RV_ZERO;
- else {
- u64 b = byte; b |= b << 8; b |= b << 16; b |= b << 32;
- emit_load_imm(mc, 1, RV_T3, (i64)b);
- src = RV_T3;
- }
- } else {
- compiler_panic(t->c, a->loc, "rv64 intrinsic: memset REG byte NYI");
- }
- u32 i = 0;
- while (i + 8 <= n) { emit32(mc, rv_sd(src, dr, (i32)i)); i += 8; }
- while (i + 4 <= n) { emit32(mc, rv_sw(src, dr, (i32)i)); i += 4; }
- while (i + 2 <= n) { emit32(mc, rv_sh(src, dr, (i32)i)); i += 2; }
- while (i < n) { emit32(mc, rv_sb(src, dr, (i32)i)); i += 1; }
- return;
- }
- default:
- compiler_panic(t->c, a->loc, "rv64 intrinsic kind %d NYI", (int)kind);
- }
-}
-
-static void rv_asm_block(CGTarget* t, const char* tmpl,
- const AsmConstraint* outs, u32 no, Operand* oo,
- const AsmConstraint* ins, u32 ni, const Operand* io,
- const Sym* clobs, u32 nc) {
- (void)tmpl; (void)outs; (void)no; (void)oo;
- (void)ins; (void)ni; (void)io; (void)clobs; (void)nc;
- rv_panic(t, "asm_block");
-}
-
-static void rv_set_loc(CGTarget* t, SrcLoc l) {
- ((RImpl*)t)->loc = l;
- if (t->mc) t->mc->set_loc(t->mc, l);
-}
-
-static void rv_finalize(CGTarget* t) { (void)t; }
-static void rv_destroy(CGTarget* t) { (void)t; }
-
-static void cgt_cleanup(void* arg) { cgtarget_free((CGTarget*)arg); }
-
-CGTarget* rv64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) {
- RImpl* x = arena_new(c->tu, RImpl);
- memset(x, 0, sizeof *x);
-
- CGTarget* t = &x->base;
- t->c = c;
- t->obj = o;
- t->mc = m;
-
- t->func_begin = rv_func_begin;
- t->func_end = rv_func_end;
-
- t->alloc_reg = rv_alloc_reg;
- t->free_reg = rv_free_reg;
- t->frame_slot = rv_frame_slot;
- t->param = rv_param;
- t->clobbers = rv_clobbers;
- t->spill_reg = rv_spill_reg;
- t->reload_reg = rv_reload_reg;
-
- t->label_new = rv_label_new;
- t->label_place = rv_label_place;
- t->jump = rv_jump;
- t->cmp_branch = rv_cmp_branch;
-
- t->scope_begin = rv_scope_begin;
- t->scope_else = rv_scope_else;
- t->scope_end = rv_scope_end;
- t->break_to = rv_break_to;
- t->continue_to = rv_continue_to;
-
- t->load_imm = rv_load_imm;
- t->load_const = rv_load_const;
- t->copy = rv_copy;
- t->load = rv_load;
- t->store = rv_store;
- t->addr_of = rv_addr_of;
- t->tls_addr_of = rv_tls_addr_of;
- t->copy_bytes = rv_copy_bytes;
- t->set_bytes = rv_set_bytes;
- t->bitfield_load = rv_bitfield_load;
- t->bitfield_store = rv_bitfield_store;
-
- t->binop = rv_binop;
- t->unop = rv_unop;
- t->cmp = rv_cmp;
- t->convert = rv_convert;
-
- t->call = rv_call;
- t->ret = rv_ret;
-
- t->alloca_ = rv_alloca_;
- t->va_start_ = rv_va_start_;
- t->va_arg_ = rv_va_arg_;
- t->va_end_ = rv_va_end_;
- t->va_copy_ = rv_va_copy_;
-
- t->setjmp_ = NULL;
- t->longjmp_ = NULL;
-
- t->atomic_load = rv_atomic_load;
- t->atomic_store = rv_atomic_store;
- t->atomic_rmw = rv_atomic_rmw;
- t->atomic_cas = rv_atomic_cas;
- t->fence = rv_fence;
-
- t->intrinsic = rv_intrinsic;
- t->asm_block = rv_asm_block;
-
- t->set_loc = rv_set_loc;
- t->finalize = rv_finalize;
- t->destroy = rv_destroy;
-
- (void)type_is_signed;
- compiler_defer(c, cgt_cleanup, t);
- return t;
-}
diff --git a/src/arch/rv64/alloc.c b/src/arch/rv64/alloc.c
@@ -0,0 +1,394 @@
+/* src/arch/rv64/alloc.c — register pool, spill/reload, labels, control flow. */
+
+#include "arch/rv64/internal.h"
+
+/* ---- regs / frame ---- */
+
+Reg rv_alloc_reg(CGTarget* t, RegClass cls, const Type* ty) {
+ RImpl* a = impl_of(t);
+ (void)ty;
+ if (cls == RC_INT) return regpool_alloc(&a->int_pool);
+ if (cls == RC_FP) return regpool_alloc(&a->fp_pool);
+ compiler_panic(t->c, a->loc, "rv64 alloc_reg: class %d unimpl", (int)cls);
+}
+
+void rv_free_reg(CGTarget* t, Reg r, RegClass cls) {
+ RImpl* a = impl_of(t);
+ RegPool* p;
+ switch (cls) {
+ case RC_INT: p = &a->int_pool; break;
+ case RC_FP: p = &a->fp_pool; break;
+ default:
+ compiler_panic(t->c, a->loc, "rv64 free_reg: class %d unimpl", (int)cls);
+ }
+ int rc = regpool_free(p, r);
+ if (rc == 1) return;
+ if (rc == -1) {
+ compiler_panic(t->c, a->loc, "rv64 free_reg: reg %u already free in %s pool",
+ (unsigned)r, cls == RC_FP ? "fp" : "int");
+ }
+ compiler_panic(t->c, a->loc, "rv64 free_reg: reg %u not in %s pool",
+ (unsigned)r, cls == RC_FP ? "fp" : "int");
+}
+
+FrameSlot rv_frame_slot(CGTarget* t, const FrameSlotDesc* d) {
+ RImpl* a = impl_of(t);
+ if (a->nslots == a->slots_cap) {
+ u32 ncap = a->slots_cap ? a->slots_cap * 2 : 8;
+ RvSlot* nbuf = arena_array(t->c->tu, RvSlot, ncap);
+ if (a->slots) memcpy(nbuf, a->slots, sizeof(RvSlot) * a->nslots);
+ a->slots = nbuf;
+ a->slots_cap = ncap;
+ }
+ u32 size = d->size ? d->size : 8;
+ u32 align = d->align ? d->align : 1;
+ u32 next = a->cum_off + size;
+ u32 mask = align - 1;
+ next = (next + mask) & ~mask;
+
+ RvSlot* s = &a->slots[a->nslots];
+ s->off = next;
+ s->size = size;
+ s->align = align;
+ s->kind = d->kind;
+
+ a->cum_off = next;
+ a->nslots++;
+ return (FrameSlot)(a->nslots);
+}
+
+RvSlot* rv64_slot_get(RImpl* a, FrameSlot fs) {
+ if (fs == FRAME_SLOT_NONE || fs > a->nslots) return NULL;
+ return &a->slots[fs - 1];
+}
+
+/* ---- param ---- */
+
+void rv_param(CGTarget* t, const CGParamDesc* p) {
+ RImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+ RvSlot* s = rv64_slot_get(a, p->slot);
+ if (!s) compiler_panic(t->c, a->loc, "rv64 param: bad slot");
+ const ABIArgInfo* ai = p->abi;
+ /* Caller's stack args start above the saved-s0/ra pair, plus the
+ * 64-byte variadic save area when this function is variadic. */
+ i32 caller_stack_base = 16 + (a->is_variadic ? 64 : 0);
+
+ if (ai->kind == ABI_ARG_IGNORE) return;
+ if (ai->kind == ABI_ARG_INDIRECT) {
+ /* Pointer-to-copy passed in a-register. Copy bytes from there into
+ * the home slot. Source pointer is in a0..a7. */
+ u32 ptr_reg;
+ if (a->next_param_int < 8) {
+ ptr_reg = RV_A0 + a->next_param_int;
+ a->next_param_int++;
+ } else {
+ u32 caller_off = a->next_param_stack;
+ a->next_param_stack += 8;
+ /* Incoming stack args live in the caller's outgoing-arg area,
+ * which is `frame_size - fp_pair_off` (= 16 + the saved-s0/ra
+ * pair) above s0 — same logic as aa64's `16 + caller_off`. */
+ rv64_emit32(mc, rv_ld(RV_T1, RV_S0, caller_stack_base + (i32)caller_off));
+ ptr_reg = RV_T1;
+ }
+ u32 nbytes = s->size;
+ u32 i = 0;
+ while (i + 8 <= nbytes) {
+ rv64_emit32(mc, rv_ld(RV_T2, ptr_reg, (i32)i));
+ rv64_emit32(mc, rv_sd(RV_T2, RV_S0, -(i32)s->off + (i32)i));
+ i += 8;
+ }
+ while (i + 4 <= nbytes) {
+ rv64_emit32(mc, rv_lwu(RV_T2, ptr_reg, (i32)i));
+ rv64_emit32(mc, rv_sw(RV_T2, RV_S0, -(i32)s->off + (i32)i));
+ i += 4;
+ }
+ while (i + 2 <= nbytes) {
+ rv64_emit32(mc, rv_lhu(RV_T2, ptr_reg, (i32)i));
+ rv64_emit32(mc, rv_sh(RV_T2, RV_S0, -(i32)s->off + (i32)i));
+ i += 2;
+ }
+ while (i < nbytes) {
+ rv64_emit32(mc, rv_lbu(RV_T2, ptr_reg, (i32)i));
+ rv64_emit32(mc, rv_sb(RV_T2, RV_S0, -(i32)s->off + (i32)i));
+ i += 1;
+ }
+ return;
+ }
+ /* DIRECT */
+ for (u16 i = 0; i < ai->nparts; ++i) {
+ const ABIArgPart* pt = &ai->parts[i];
+ u32 part_off = pt->src_offset;
+ u32 sz = pt->size;
+
+ if (pt->cls == ABI_CLASS_INT) {
+ if (a->next_param_int < 8) {
+ u32 reg = RV_A0 + a->next_param_int;
+ a->next_param_int++;
+ rv64_emit32(mc, enc_int_store(sz, reg, RV_S0,
+ -(i32)s->off + (i32)part_off));
+ } else {
+ u32 caller_off = a->next_param_stack;
+ a->next_param_stack += 8;
+ rv64_emit32(mc, enc_int_load(sz, 0, RV_T2, RV_S0,
+ caller_stack_base + (i32)caller_off));
+ rv64_emit32(mc, enc_int_store(sz, RV_T2, RV_S0,
+ -(i32)s->off + (i32)part_off));
+ }
+ } else if (pt->cls == ABI_CLASS_FP) {
+ if (a->next_param_fp < 8) {
+ u32 reg = a->next_param_fp; /* fa0..fa7 → freg 10..17 */
+ u32 freg = 10u + reg;
+ a->next_param_fp++;
+ if (sz == 8) {
+ rv64_emit32(mc, rv_fsd(freg, RV_S0, -(i32)s->off + (i32)part_off));
+ } else {
+ rv64_emit32(mc, rv_fsw(freg, RV_S0, -(i32)s->off + (i32)part_off));
+ }
+ } else {
+ u32 caller_off = a->next_param_stack;
+ a->next_param_stack += 8;
+ if (sz == 8) {
+ rv64_emit32(mc, rv_fld(0, RV_S0, caller_stack_base + (i32)caller_off));
+ rv64_emit32(mc, rv_fsd(0, RV_S0, -(i32)s->off + (i32)part_off));
+ } else {
+ rv64_emit32(mc, rv_flw(0, RV_S0, caller_stack_base + (i32)caller_off));
+ rv64_emit32(mc, rv_fsw(0, RV_S0, -(i32)s->off + (i32)part_off));
+ }
+ }
+ } else {
+ compiler_panic(t->c, a->loc, "rv64 param: ABI class %d unimpl",
+ (int)pt->cls);
+ }
+ }
+}
+
+const Reg* rv_clobbers(CGTarget* t, RegClass c, u32* n) {
+ (void)c;
+ (void)n;
+ rv_panic(t, "clobbers");
+}
+
+void rv_spill_reg(CGTarget* t, Operand src, FrameSlot slot,
+ MemAccess ma) {
+ RImpl* a = impl_of(t);
+ if (src.kind != OPK_REG) {
+ compiler_panic(t->c, a->loc, "rv64 spill_reg: src is not OPK_REG");
+ }
+ Operand addr;
+ memset(&addr, 0, sizeof addr);
+ addr.kind = OPK_LOCAL;
+ addr.cls = RC_INT;
+ addr.type = ma.type;
+ addr.v.frame_slot = slot;
+ rv_store(t, addr, src, ma);
+ rv_free_reg(t, src.v.reg, src.cls);
+}
+
+void rv_reload_reg(CGTarget* t, Operand dst, FrameSlot slot,
+ MemAccess ma) {
+ RImpl* a = impl_of(t);
+ if (dst.kind != OPK_REG) {
+ compiler_panic(t->c, a->loc, "rv64 reload_reg: dst is not OPK_REG");
+ }
+ Operand addr;
+ memset(&addr, 0, sizeof addr);
+ addr.kind = OPK_LOCAL;
+ addr.cls = RC_INT;
+ addr.type = ma.type;
+ addr.v.frame_slot = slot;
+ rv_load(t, dst, addr, ma);
+}
+
+/* ---- labels / control flow ---- */
+
+Label rv_label_new(CGTarget* t) {
+ return (Label)t->mc->label_new(t->mc);
+}
+void rv_label_place(CGTarget* t, Label l) {
+ t->mc->label_place(t->mc, (MCLabel)l);
+}
+void rv_jump(CGTarget* t, Label l) {
+ MCEmitter* mc = t->mc;
+ rv64_emit32(mc, rv_jal(RV_ZERO, 0));
+ mc->emit_label_ref(mc, (MCLabel)l, R_RV_JAL, 4, 0);
+}
+
+/* Force an integer Operand into a register; materializes IMM via scratch. */
+u32 rv64_force_reg_int(CGTarget* t, Operand op, u32 scratch) {
+ if (op.kind == OPK_REG) return reg_num(op);
+ if (op.kind == OPK_IMM) {
+ u32 sf = type_is_64(op.type) ? 1u : 0u;
+ rv64_emit_load_imm(t->mc, sf, scratch, op.v.imm);
+ return scratch;
+ }
+ compiler_panic(t->c, impl_of(t)->loc,
+ "rv64: operand kind %d unsupported here", (int)op.kind);
+}
+
+/* Emit a conditional branch (a OP b) → label. Uses BEQ/BNE/BLT/BGE etc. */
+void rv_cmp_branch(CGTarget* t, CmpOp op, Operand a_op, Operand b_op,
+ Label l) {
+ MCEmitter* mc = t->mc;
+ RImpl* a = impl_of(t);
+ /* For FP compares, fall through to materialize the result and CBNZ. */
+ if (op == CMP_LT_F || op == CMP_LE_F || op == CMP_GT_F || op == CMP_GE_F) {
+ compiler_panic(t->c, a->loc, "rv64 cmp_branch: FP cmp NYI");
+ }
+ u32 ra = rv64_force_reg_int(t, a_op, RV_T0);
+ u32 rb = rv64_force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0);
+ u32 word = 0;
+ switch (op) {
+ case CMP_EQ: word = rv_beq(ra, rb, 0); break;
+ case CMP_NE: word = rv_bne(ra, rb, 0); break;
+ case CMP_LT_S: word = rv_blt(ra, rb, 0); break;
+ case CMP_GE_S: word = rv_bge(ra, rb, 0); break;
+ case CMP_LT_U: word = rv_bltu(ra, rb, 0); break;
+ case CMP_GE_U: word = rv_bgeu(ra, rb, 0); break;
+ /* >= can become < with operands swapped: a > b ↔ b < a;
+ * a <= b ↔ b >= a. */
+ case CMP_GT_S: word = rv_blt(rb, ra, 0); break;
+ case CMP_LE_S: word = rv_bge(rb, ra, 0); break;
+ case CMP_GT_U: word = rv_bltu(rb, ra, 0); break;
+ case CMP_LE_U: word = rv_bgeu(rb, ra, 0); break;
+ default:
+ compiler_panic(t->c, a->loc, "rv64 cmp_branch: op %d unimpl", (int)op);
+ }
+ rv64_emit32(mc, word);
+ mc->emit_label_ref(mc, (MCLabel)l, R_RV_BRANCH, 4, 0);
+}
+
+/* Materialize 0/1 into dst from a comparison. */
+void rv_cmp(CGTarget* t, CmpOp op, Operand dst, Operand a_op,
+ Operand b_op) {
+ MCEmitter* mc = t->mc;
+ RImpl* a = impl_of(t);
+ u32 rd = reg_num(dst);
+
+ if (op == CMP_LT_F || op == CMP_LE_F || op == CMP_GT_F || op == CMP_GE_F) {
+ /* FP compare in fa,fb → rd. Use FLT/FLE/FEQ depending on op. */
+ int is_d = type_is_fp_double(a_op.type);
+ u32 fa = reg_num(a_op);
+ u32 fb = reg_num(b_op);
+ switch (op) {
+ case CMP_LT_F: rv64_emit32(mc, is_d ? rv_flt_d(rd, fa, fb) : rv_flt_s(rd, fa, fb)); return;
+ case CMP_LE_F: rv64_emit32(mc, is_d ? rv_fle_d(rd, fa, fb) : rv_fle_s(rd, fa, fb)); return;
+ case CMP_GT_F: rv64_emit32(mc, is_d ? rv_flt_d(rd, fb, fa) : rv_flt_s(rd, fb, fa)); return;
+ case CMP_GE_F: rv64_emit32(mc, is_d ? rv_fle_d(rd, fb, fa) : rv_fle_s(rd, fb, fa)); return;
+ default: break;
+ }
+ }
+ u32 ra = rv64_force_reg_int(t, a_op, RV_T0);
+ u32 rb = rv64_force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0);
+
+ switch (op) {
+ case CMP_EQ:
+ rv64_emit32(mc, rv_sub(rd, ra, rb));
+ rv64_emit32(mc, rv_sltiu(rd, rd, 1));
+ return;
+ case CMP_NE:
+ rv64_emit32(mc, rv_sub(rd, ra, rb));
+ rv64_emit32(mc, rv_sltu(rd, RV_ZERO, rd));
+ return;
+ case CMP_LT_S: rv64_emit32(mc, rv_slt(rd, ra, rb)); return;
+ case CMP_LT_U: rv64_emit32(mc, rv_sltu(rd, ra, rb)); return;
+ case CMP_GT_S: rv64_emit32(mc, rv_slt(rd, rb, ra)); return;
+ case CMP_GT_U: rv64_emit32(mc, rv_sltu(rd, rb, ra)); return;
+ case CMP_GE_S:
+ rv64_emit32(mc, rv_slt(rd, ra, rb));
+ rv64_emit32(mc, rv_xori(rd, rd, 1));
+ return;
+ case CMP_GE_U:
+ rv64_emit32(mc, rv_sltu(rd, ra, rb));
+ rv64_emit32(mc, rv_xori(rd, rd, 1));
+ return;
+ case CMP_LE_S:
+ rv64_emit32(mc, rv_slt(rd, rb, ra));
+ rv64_emit32(mc, rv_xori(rd, rd, 1));
+ return;
+ case CMP_LE_U:
+ rv64_emit32(mc, rv_sltu(rd, rb, ra));
+ rv64_emit32(mc, rv_xori(rd, rd, 1));
+ return;
+ default:
+ compiler_panic(t->c, a->loc, "rv64 cmp: op %d unimpl", (int)op);
+ }
+}
+
+/* ---- structured scopes (SCOPE_IF + SCOPE_LOOP/BLOCK bookkeep) ---- */
+
+CGScope rv_scope_begin(CGTarget* t, const CGScopeDesc* d) {
+ RImpl* a = impl_of(t);
+ if (a->nscopes == a->scopes_cap) {
+ u32 ncap = a->scopes_cap ? a->scopes_cap * 2u : 4u;
+ RvScope* nb = arena_array(t->c->tu, RvScope, ncap);
+ if (a->scopes) memcpy(nb, a->scopes, sizeof(RvScope) * a->nscopes);
+ a->scopes = nb;
+ a->scopes_cap = ncap;
+ }
+ RvScope* sc = &a->scopes[a->nscopes];
+ sc->kind = (u8)d->kind;
+ sc->has_else = 0;
+ sc->else_label = 0;
+ sc->end_label = 0;
+ sc->break_label = d->break_label;
+ sc->continue_label = d->continue_label;
+
+ if (d->kind == SCOPE_IF) {
+ sc->else_label = t->mc->label_new(t->mc);
+ sc->end_label = t->mc->label_new(t->mc);
+ u32 rn = rv64_force_reg_int(t, d->cond, RV_T0);
+ /* beq rn, x0, else_label */
+ rv64_emit32(t->mc, rv_beq(rn, RV_ZERO, 0));
+ t->mc->emit_label_ref(t->mc, sc->else_label, R_RV_BRANCH, 4, 0);
+ } else if (d->kind == SCOPE_LOOP || d->kind == SCOPE_BLOCK) {
+ /* bookkeep only */
+ } else {
+ compiler_panic(t->c, a->loc,
+ "rv64 scope_begin: kind %d not yet implemented",
+ (int)d->kind);
+ }
+ a->nscopes++;
+ return (CGScope)a->nscopes;
+}
+
+void rv_scope_else(CGTarget* t, CGScope s) {
+ RImpl* a = impl_of(t);
+ if (s == CG_SCOPE_NONE || s > a->nscopes) {
+ compiler_panic(t->c, a->loc, "rv64 scope_else: bad scope");
+ }
+ RvScope* sc = &a->scopes[s - 1];
+ /* jump end ; place else */
+ rv64_emit32(t->mc, rv_jal(RV_ZERO, 0));
+ t->mc->emit_label_ref(t->mc, sc->end_label, R_RV_JAL, 4, 0);
+ t->mc->label_place(t->mc, sc->else_label);
+ sc->has_else = 1;
+}
+
+void rv_scope_end(CGTarget* t, CGScope s) {
+ RImpl* a = impl_of(t);
+ if (s == CG_SCOPE_NONE || s > a->nscopes) {
+ compiler_panic(t->c, a->loc, "rv64 scope_end: bad scope");
+ }
+ RvScope* sc = &a->scopes[s - 1];
+ if (sc->kind == SCOPE_IF) {
+ if (!sc->has_else) t->mc->label_place(t->mc, sc->else_label);
+ t->mc->label_place(t->mc, sc->end_label);
+ }
+}
+
+void rv_break_to(CGTarget* t, CGScope s) {
+ RImpl* a = impl_of(t);
+ if (s == CG_SCOPE_NONE || s > a->nscopes) {
+ compiler_panic(t->c, a->loc, "rv64 break_to: bad scope");
+ }
+ rv_jump(t, a->scopes[s - 1].break_label);
+}
+
+void rv_continue_to(CGTarget* t, CGScope s) {
+ RImpl* a = impl_of(t);
+ if (s == CG_SCOPE_NONE || s > a->nscopes) {
+ compiler_panic(t->c, a->loc, "rv64 continue_to: bad scope");
+ }
+ rv_jump(t, a->scopes[s - 1].continue_label);
+}
diff --git a/src/arch/rv64/emit.c b/src/arch/rv64/emit.c
@@ -0,0 +1,332 @@
+/* src/arch/rv64/emit.c — immediate encoding, function lifecycle, frame setup. */
+
+#include "arch/rv64/internal.h"
+
+void rv64_emit32(MCEmitter* mc, u32 word) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ u8 b[4];
+ b[0] = (u8)(word & 0xff);
+ b[1] = (u8)((word >> 8) & 0xff);
+ b[2] = (u8)((word >> 16) & 0xff);
+ b[3] = (u8)((word >> 24) & 0xff);
+ mc->emit_bytes(mc, b, 4);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+void rv64_patch32(ObjBuilder* obj, u32 sec_id, u32 ofs, u32 word) {
+ u8 b[4];
+ b[0] = (u8)(word & 0xff);
+ b[1] = (u8)((word >> 8) & 0xff);
+ b[2] = (u8)((word >> 16) & 0xff);
+ b[3] = (u8)((word >> 24) & 0xff);
+ obj_patch(obj, sec_id, ofs, b, 4);
+}
+
+_Noreturn void rv_panic(CGTarget* t, const char* what) {
+ SrcLoc loc = impl_of(t)->loc;
+ compiler_panic(t->c, loc, "rv64: %s not implemented", what);
+}
+
+int fits_signed32(i64 v) { return v >= (i64)(i32)0x80000000 && v <= (i64)(i32)0x7fffffff; }
+
+void emit_li_32(MCEmitter* mc, u32 rd, i32 imm) {
+ if (imm >= -2048 && imm <= 2047) {
+ rv64_emit32(mc, rv_addi(rd, RV_ZERO, imm));
+ return;
+ }
+ /* hi20 + lo12, with 0x800 bias to compensate ADDIW's sign-ext. */
+ i32 hi = (i32)((u32)(imm + 0x800) >> 12);
+ i32 lo = (i32)((i32)imm - (i32)(hi << 12));
+ rv64_emit32(mc, rv_lui(rd, (u32)hi & 0xfffffu));
+ if (lo) rv64_emit32(mc, rv_addiw(rd, rd, lo));
+}
+
+void rv64_emit_load_imm(MCEmitter* mc, u32 sf, u32 rd, i64 imm) {
+ if (!sf) {
+ /* 32-bit destination: low 32 bits, sign-extended. */
+ emit_li_32(mc, rd, (i32)imm);
+ return;
+ }
+ if (fits_signed32(imm)) {
+ emit_li_32(mc, rd, (i32)imm);
+ return;
+ }
+ /* General 64-bit load: split into high and low 32 bits, place high
+ * into rd << 32, then OR in low via a temp register (t0=x5). The cg
+ * corpus has no IMM operands that collide with t0, so this is safe. */
+ i64 lo32 = (i64)(i32)(imm & 0xffffffffu); /* sign-ext low half */
+ i64 hi64 = (imm - lo32) >> 32; /* what remains in hi */
+ if (hi64 < (i64)(i32)0x80000000 ||
+ hi64 > (i64)(i32)0x7fffffff) {
+ /* Out of i32 range — fallback: use a smaller chunked approach.
+ * For the cg corpus this isn't hit; emit a conservative sequence:
+ * li rd, hi32; slli 32; li t0, lo32; or rd, rd, t0. */
+ i32 hi32 = (i32)(imm >> 32);
+ i32 lo32_i = (i32)imm;
+ emit_li_32(mc, rd, hi32);
+ rv64_emit32(mc, rv_slli(rd, rd, 32));
+ emit_li_32(mc, RV_T0, lo32_i);
+ /* zero-extend t0 to clear sign-extension before OR */
+ rv64_emit32(mc, rv_slli(RV_T0, RV_T0, 32));
+ rv64_emit32(mc, rv_srli(RV_T0, RV_T0, 32));
+ rv64_emit32(mc, rv_or(rd, rd, RV_T0));
+ return;
+ }
+ emit_li_32(mc, rd, (i32)hi64);
+ rv64_emit32(mc, rv_slli(rd, rd, 32));
+ if (lo32 != 0) {
+ emit_li_32(mc, RV_T0, (i32)lo32);
+ rv64_emit32(mc, rv_slli(RV_T0, RV_T0, 32));
+ rv64_emit32(mc, rv_srli(RV_T0, RV_T0, 32));
+ rv64_emit32(mc, rv_or(rd, rd, RV_T0));
+ }
+}
+
+/* sp += imm. imm can be any signed value the caller passes — we pick
+ * the shortest sequence. */
+void emit_sp_addi(MCEmitter* mc, i64 imm) {
+ if (imm >= -2048 && imm <= 2047) {
+ rv64_emit32(mc, rv_addi(RV_SP, RV_SP, (i32)imm));
+ return;
+ }
+ rv64_emit_load_imm(mc, 1, RV_T0, imm);
+ rv64_emit32(mc, rv_add(RV_SP, RV_SP, RV_T0));
+}
+
+/* ---- function lifecycle ---- */
+
+void rv_func_begin(CGTarget* t, const CGFuncDesc* fd) {
+ RImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+
+ mc->set_section(mc, fd->text_section_id);
+ mc->emit_align(mc, 4, 0);
+
+ a->fd = fd;
+ a->func_start = mc->pos(mc);
+ a->next_param_int = 0;
+ a->next_param_fp = 0;
+ a->next_param_stack = 0;
+ a->has_sret = (fd->abi && fd->abi->has_sret) ? 1 : 0;
+ a->cum_off = 0;
+ a->max_outgoing = 0;
+ a->fp_pair_off = 0;
+ regpool_init(&a->int_pool, /*base=*/18u, /*nregs=*/10u); /* s2..s11 */
+ regpool_init(&a->fp_pool, /*base=*/18u, /*nregs=*/10u); /* fs2..fs11 */
+ a->nslots = 0;
+ a->nscopes = 0;
+ a->has_alloca = 0;
+ a->nadd_patches = 0;
+ a->is_variadic = (fd->abi && fd->abi->variadic) ? 1 : 0;
+ a->gp_save_slot = FRAME_SLOT_NONE;
+ a->sret_ptr_slot = FRAME_SLOT_NONE;
+ a->epilogue_label = mc->label_new(mc);
+
+ mc->cfi_startproc(mc);
+
+ /* Reserve a NOP-filled prologue placeholder; func_end patches it. */
+ a->prologue_pos = mc->pos(mc);
+ for (u32 i = 0; i < RV_PROLOGUE_WORDS; ++i) rv64_emit32(mc, RV_NOP);
+
+ /* For an sret return, the caller passed the destination pointer in
+ * a0; reserve a hidden slot to spill it into so the body can use a0
+ * freely. The actual SD a0, ...(s0) is emitted in the patched
+ * prologue once the slot offset is known. */
+ if (a->has_sret) {
+ FrameSlotDesc fsd = {
+ .type = NULL,
+ .name = 0,
+ .loc = (SrcLoc){0, 0, 0},
+ .size = 8,
+ .align = 8,
+ .kind = FS_SPILL,
+ .flags = 0,
+ };
+ a->sret_ptr_slot = rv_frame_slot(t, &fsd);
+ /* Consume a0 — it is no longer available for the first real param. */
+ a->next_param_int = 1;
+ }
+
+ /* Variadic: a 64-byte GP save area for a0..a7 lives at the very top
+ * of the frame, immediately above the saved-s0/ra pair, so its bytes
+ * are contiguous with the caller's stack args. The patcher spills the
+ * unnamed a-regs into it as part of the prologue. The slot is implicit
+ * (not allocated through rv_frame_slot) — it sits at [s0 + 16] when
+ * is_variadic is set. */
+}
+
+void rv_func_end(CGTarget* t) {
+ RImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+ ObjBuilder* obj = t->obj;
+ u32 sec = a->fd->text_section_id;
+
+ u32 n_int_saves = a->int_pool.hwm; /* s2..s2+hwm-1 */
+ u32 n_fp_saves = a->fp_pool.hwm;
+ u32 max_out = (a->max_outgoing + 15u) & ~15u;
+ u32 int_saves_sz = n_int_saves * 8u;
+ u32 fp_saves_sz = n_fp_saves * 8u;
+
+ /* Variadic functions reserve a 64-byte save area at the very top of
+ * the frame so the save area and caller's stack args form a single
+ * contiguous byte stream walked by the va_list pointer. */
+ u32 va_save_sz = a->is_variadic ? 64u : 0u;
+ u32 locals_off = max_out + int_saves_sz + fp_saves_sz; /* from sp */
+ u32 fp_pair_off = locals_off + a->cum_off;
+ u32 frame_size = fp_pair_off + 16u + va_save_sz;
+ frame_size = (frame_size + 15u) & ~15u;
+ fp_pair_off = frame_size - 16u - va_save_sz;
+ a->fp_pair_off = fp_pair_off;
+
+ /* Place the epilogue label at current pos. */
+ mc->label_place(mc, a->epilogue_label);
+
+ /* Restore int and fp saves using s0-relative addressing so they
+ * don't depend on the final frame_size encoding (and survive
+ * alloca-induced sp shifts). */
+ /* layout below s0:
+ * s0 - 8 .. s0 - 16 saved s0/ra ? No — those are at sp+fp_pair_off
+ * We arranged saved-s0/ra at [sp+fp_pair_off], not below s0. So
+ * immediately below s0 are: int saves, then fp saves, then locals.
+ * Wait — let me recompute.
+ *
+ * sp + 0 outgoing args (max_out bytes)
+ * sp + max_out int saves
+ * sp + max_out + I fp saves
+ * sp + max_out+I+F locals (cum_off)
+ * sp + fp_pair_off saved s0_caller (8)
+ * sp + fp_pair_off+8 saved ra (8)
+ * sp + frame_size end
+ *
+ * s0 = sp + fp_pair_off (so [s0+0] = saved s0_caller).
+ * Locals at [s0 - off] where off in [1..cum_off].
+ * FP saves at [s0 - cum_off - 8*i].
+ * Int saves at [s0 - cum_off - F - 8*i]. */
+ /* Save slots sit at the start of an 8-byte cell below the locals
+ * area. fp_save_base = offset of the first fp save (=-(L+8)); each
+ * subsequent save is 8 bytes lower. int saves start below the fp
+ * block. */
+ i32 fp_save_base = -(i32)a->cum_off - 8;
+ i32 int_save_base = fp_save_base - (i32)fp_saves_sz;
+
+ /* Reverse order: ints first (lowest address) on restore, but we emit
+ * the restore loop in reverse to keep the prologue/epilogue symmetric. */
+ for (i32 i = (i32)n_int_saves - 1; i >= 0; --i) {
+ u32 r = 18u + (u32)i; /* s2 + i */
+ i32 off = int_save_base - 8 * (i32)i;
+ rv64_emit32(mc, rv_ld(r, RV_S0, off));
+ }
+ for (i32 i = (i32)n_fp_saves - 1; i >= 0; --i) {
+ u32 r = 18u + (u32)i; /* fs2 + i (fp reg number) */
+ i32 off = fp_save_base - 8 * (i32)i;
+ rv64_emit32(mc, rv_fld(r, RV_S0, off));
+ }
+ /* Restore sp from s0 first so alloca-induced offsets don't matter.
+ * After this, sp == its post-prologue value. */
+ if (a->has_alloca) {
+ if ((i32)fp_pair_off > 2047) {
+ compiler_panic(t->c, a->loc, "rv64: fp_pair_off too large for alloca");
+ }
+ rv64_emit32(mc, rv_addi(RV_SP, RV_S0, -(i32)fp_pair_off));
+ }
+ rv64_emit32(mc, rv_ld(RV_S0, RV_SP, (i32)fp_pair_off));
+ rv64_emit32(mc, rv_ld(RV_RA, RV_SP, (i32)fp_pair_off + 8));
+ emit_sp_addi(mc, (i64)frame_size);
+ rv64_emit32(mc, rv_ret_());
+
+ /* Now patch the prologue placeholder. */
+ u32 pos = a->prologue_pos;
+ u32 words[RV_PROLOGUE_WORDS];
+ for (u32 i = 0; i < RV_PROLOGUE_WORDS; ++i) words[i] = RV_NOP;
+ u32 wi = 0;
+
+ /* addi sp, sp, -frame_size (or 2-insn if too large) */
+ if ((i64)frame_size <= 2048) {
+ words[wi++] = rv_addi(RV_SP, RV_SP, -(i32)frame_size);
+ } else {
+ /* li t0, -frame_size; add sp, sp, t0 */
+ /* Use a small two-instruction expansion via LUI+ADDI if it fits 32-bit;
+ * otherwise we'd need a full load_imm but that's overkill for tests. */
+ i64 neg = -(i64)frame_size;
+ if (fits_signed32(neg)) {
+ i32 hi = (i32)((u32)((i32)neg + 0x800) >> 12);
+ i32 lo = (i32)neg - (hi << 12);
+ words[wi++] = rv_lui(RV_T0, (u32)hi & 0xfffffu);
+ if (lo) words[wi++] = rv_addiw(RV_T0, RV_T0, lo);
+ words[wi++] = rv_add(RV_SP, RV_SP, RV_T0);
+ } else {
+ compiler_panic(t->c, a->loc, "rv64: frame_size too large to patch");
+ }
+ }
+ /* sd s0, fp_pair_off(sp); sd ra, fp_pair_off+8(sp); addi s0, sp, fp_pair_off */
+ if ((i32)fp_pair_off > 2047 || (i32)(fp_pair_off + 8) > 2047) {
+ compiler_panic(t->c, a->loc, "rv64: fp_pair_off out of imm12 range");
+ }
+ words[wi++] = rv_sd(RV_S0, RV_SP, (i32)fp_pair_off);
+ words[wi++] = rv_sd(RV_RA, RV_SP, (i32)fp_pair_off + 8);
+ words[wi++] = rv_addi(RV_S0, RV_SP, (i32)fp_pair_off);
+
+ /* If sret, spill incoming a0 into the hidden slot. */
+ if (a->has_sret && a->sret_ptr_slot != FRAME_SLOT_NONE) {
+ RvSlot* s = rv64_slot_get(a, a->sret_ptr_slot);
+ if (s) {
+ if (wi >= RV_PROLOGUE_WORDS) goto overflow;
+ words[wi++] = rv_sd(RV_A0, RV_S0, -(i32)s->off);
+ }
+ }
+ /* Variadic: spill the still-unconsumed a-regs (a_{nparams_int}..a7)
+ * into the save area at [s0 + 16 + i*8]. The save area sits between
+ * the saved-s0/ra pair and the caller's stack args, so save_area[8]
+ * == caller's first stack arg. */
+ if (a->is_variadic) {
+ for (u32 i = a->next_param_int; i < 8; ++i) {
+ if (wi >= RV_PROLOGUE_WORDS) goto overflow;
+ words[wi++] = rv_sd(RV_A0 + i, RV_S0, 16 + (i32)i * 8);
+ }
+ }
+ /* int saves */
+ for (u32 i = 0; i < n_int_saves; ++i) {
+ u32 r = 18u + i;
+ i32 off = int_save_base - 8 * (i32)i;
+ if (wi >= RV_PROLOGUE_WORDS) goto overflow;
+ words[wi++] = rv_sd(r, RV_S0, off);
+ }
+ /* fp saves */
+ for (u32 i = 0; i < n_fp_saves; ++i) {
+ u32 r = 18u + i;
+ i32 off = fp_save_base - 8 * (i32)i;
+ if (wi >= RV_PROLOGUE_WORDS) goto overflow;
+ words[wi++] = rv_fsd(r, RV_S0, off);
+ }
+ if (0) {
+ overflow:
+ compiler_panic(t->c, a->loc,
+ "rv64: prologue placeholder too small (used %u of %u)", wi,
+ RV_PROLOGUE_WORDS);
+ }
+
+ for (u32 i = 0; i < RV_PROLOGUE_WORDS; ++i) {
+ rv64_patch32(obj, sec, pos + i * 4u, words[i]);
+ }
+
+ /* Patch alloca placeholders with max_outgoing. */
+ if (max_out > 2047u) {
+ compiler_panic(t->c, a->loc,
+ "rv64: max_outgoing %u out of imm12 for alloca patch",
+ max_out);
+ }
+ for (u32 i = 0; i < a->nadd_patches; ++i) {
+ u32 dr = a->add_patches[i].dst_reg;
+ u32 word = rv_addi(dr, RV_SP, (i32)max_out);
+ rv64_patch32(obj, sec, a->add_patches[i].pos, word);
+ }
+
+ /* Define the function symbol. */
+ u32 end = mc->pos(mc);
+ obj_symbol_define(obj, a->fd->sym, sec, (u64)a->func_start,
+ (u64)(end - a->func_start));
+
+ mc->cfi_endproc(mc);
+ a->fd = NULL;
+}
+
diff --git a/src/arch/rv64/internal.h b/src/arch/rv64/internal.h
@@ -0,0 +1,222 @@
+/* src/arch/rv64/internal.h — private header shared by emit.c, alloc.c, ops.c.
+ * Do not include from outside src/arch/rv64/. */
+#pragma once
+
+#include <string.h>
+
+#include "arch/arch.h"
+#include "arch/rv64.h"
+#include "arch/rv64_isa.h"
+#include "core/arena.h"
+#include "obj/obj.h"
+#include "type/type.h"
+
+#define RV_PROLOGUE_WORDS 32u
+
+/* ---- RegPool ---- */
+typedef struct RegPool {
+ u32 free;
+ u32 hwm;
+ u8 base;
+ u8 nregs;
+ u8 pad[2];
+} RegPool;
+
+/* ---- RvSlot / RvScope ---- */
+typedef struct RvSlot {
+ u32 off; /* bytes below s0 (positive); address = s0 - off */
+ u32 size;
+ u32 align;
+ u8 kind;
+ u8 pad[3];
+} RvSlot;
+
+typedef struct RvScope {
+ u8 kind;
+ u8 has_else;
+ u8 pad[2];
+ MCLabel else_label;
+ MCLabel end_label;
+ Label break_label;
+ Label continue_label;
+} RvScope;
+
+/* ---- RImpl ---- */
+typedef struct RImpl {
+ CGTarget base;
+ SrcLoc loc;
+ const CGFuncDesc* fd;
+
+ u32 func_start;
+ u32 prologue_pos;
+ MCLabel epilogue_label;
+
+ RvSlot* slots;
+ u32 nslots;
+ u32 slots_cap;
+ u32 cum_off;
+ u32 max_outgoing;
+ u32 fp_pair_off;
+
+ u32 next_param_int;
+ u32 next_param_fp;
+ u32 next_param_stack;
+ u8 has_sret;
+ FrameSlot sret_ptr_slot;
+
+ RegPool int_pool;
+ RegPool fp_pool;
+
+ RvScope* scopes;
+ u32 nscopes;
+ u32 scopes_cap;
+
+ u8 has_alloca;
+ struct RvAllocaPatch {
+ u32 pos;
+ u32 dst_reg;
+ }* add_patches;
+ u32 nadd_patches;
+ u32 add_patches_cap;
+
+ u8 is_variadic;
+ FrameSlot gp_save_slot;
+} RImpl;
+
+/* ---- impl_of ---- */
+static inline RImpl* impl_of(CGTarget* t) { return (RImpl*)t; }
+
+/* ---- type helpers ---- */
+static inline int type_is_64(const Type* t) {
+ if (!t) return 0;
+ switch (t->kind) {
+ case TY_LONG:
+ case TY_ULONG:
+ case TY_LLONG:
+ case TY_ULLONG:
+ case TY_PTR:
+ case TY_DOUBLE:
+ return 1;
+ default:
+ return 0;
+ }
+}
+static inline int type_is_fp_double(const Type* t) {
+ return t && (t->kind == TY_DOUBLE || t->kind == TY_LDOUBLE);
+}
+static inline u32 type_byte_size(const Type* t) {
+ if (!t) return 4;
+ switch (t->kind) {
+ case TY_CHAR:
+ case TY_SCHAR:
+ case TY_UCHAR:
+ case TY_BOOL:
+ return 1;
+ case TY_SHORT:
+ case TY_USHORT:
+ return 2;
+ case TY_INT:
+ case TY_UINT:
+ case TY_FLOAT:
+ return 4;
+ case TY_LONG:
+ case TY_ULONG:
+ case TY_LLONG:
+ case TY_ULLONG:
+ case TY_PTR:
+ case TY_DOUBLE:
+ return 8;
+ default:
+ return 8;
+ }
+}
+static inline int type_is_signed(const Type* t) {
+ if (!t) return 0;
+ switch (t->kind) {
+ case TY_CHAR:
+ case TY_SCHAR:
+ case TY_SHORT:
+ case TY_INT:
+ case TY_LONG:
+ case TY_LLONG:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static inline u32 reg_num(Operand op) { return op.v.reg & 0x1fu; }
+
+/* ---- RegPool ops (inlined — identical in each caller) ---- */
+static inline void regpool_init(RegPool* p, u8 base, u8 nregs) {
+ p->base = base;
+ p->nregs = nregs;
+ p->hwm = 0;
+ p->free = (nregs >= 32u) ? 0xFFFFFFFFu : ((1u << nregs) - 1u);
+}
+static inline Reg regpool_alloc(RegPool* p) {
+ if (p->free == 0) return (Reg)REG_NONE;
+ u32 idx = (u32)__builtin_ctz(p->free);
+ p->free &= ~(1u << idx);
+ if (idx + 1u > p->hwm) p->hwm = idx + 1u;
+ return (Reg)(p->base + idx);
+}
+static inline int regpool_free(RegPool* p, Reg r) {
+ u32 rn = (u32)r;
+ if (rn < p->base || rn >= (u32)(p->base + p->nregs)) return 0;
+ u32 idx = rn - p->base;
+ u32 bit = 1u << idx;
+ if (p->free & bit) return -1;
+ p->free |= bit;
+ return 1;
+}
+
+/* ---- emit.c: function lifecycle (referenced by ops.c vtable) ---- */
+void rv_func_begin(CGTarget* t, const CGFuncDesc* fd);
+void rv_func_end(CGTarget* t);
+
+/* ---- emit helpers (defined in emit.c, used cross-file) ---- */
+extern void debug_emit_row(Debug*, ObjSecId text_section, u32 offset, SrcLoc);
+
+void rv64_emit32(MCEmitter* mc, u32 word);
+void rv64_patch32(ObjBuilder* obj, u32 sec_id, u32 ofs, u32 word);
+int fits_signed32(i64 v);
+void emit_li_32(MCEmitter* mc, u32 rd, i32 imm);
+void rv64_emit_load_imm(MCEmitter* mc, u32 sf, u32 rd, i64 imm);
+void emit_sp_addi(MCEmitter* mc, i64 imm);
+_Noreturn void rv_panic(CGTarget* t, const char* what);
+
+/* ---- alloc.c: all functions (non-static; referenced by ops.c vtable) ---- */
+Reg rv_alloc_reg(CGTarget* t, RegClass cls, const Type* ty);
+void rv_free_reg(CGTarget* t, Reg r, RegClass cls);
+FrameSlot rv_frame_slot(CGTarget* t, const FrameSlotDesc* d);
+RvSlot* rv64_slot_get(RImpl* a, FrameSlot fs);
+void rv_param(CGTarget* t, const CGParamDesc* p);
+const Reg* rv_clobbers(CGTarget* t, RegClass c, u32* n);
+void rv_spill_reg(CGTarget* t, Operand src, FrameSlot slot, MemAccess ma);
+void rv_reload_reg(CGTarget* t, Operand dst, FrameSlot slot, MemAccess ma);
+Label rv_label_new(CGTarget* t);
+void rv_label_place(CGTarget* t, Label l);
+void rv_jump(CGTarget* t, Label l);
+u32 rv64_force_reg_int(CGTarget* t, Operand op, u32 scratch);
+void rv_cmp_branch(CGTarget* t, CmpOp op, Operand a_op, Operand b_op, Label l);
+void rv_cmp(CGTarget* t, CmpOp op, Operand dst, Operand a_op, Operand b_op);
+CGScope rv_scope_begin(CGTarget* t, const CGScopeDesc* d);
+void rv_scope_else(CGTarget* t, CGScope s);
+void rv_scope_end(CGTarget* t, CGScope s);
+void rv_break_to(CGTarget* t, CGScope s);
+void rv_continue_to(CGTarget* t, CGScope s);
+
+/* ---- ops.c: functions used cross-file ---- */
+void rv_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma);
+void rv_store(CGTarget* t, Operand addr, Operand src, MemAccess ma);
+u32 enc_int_store(u32 nbytes, u32 src, u32 base, i32 off);
+u32 enc_int_load(u32 nbytes, int sign_ext, u32 rd, u32 base, i32 off);
+u32 addr_base(CGTarget* t, Operand addr, i32* out_off, u32 tmp_reg);
+void rv64_emit_addr_adjust(MCEmitter* mc, u32 rd, u32 base, i32 off);
+ObjSymId emit_pcrel_anchor(CGTarget* t, u32 sec, u32 auipc_pos);
+void rv64_emit_got_load_addr(CGTarget* t, u32 dst_reg, ObjSymId sym);
+u32 agg_addr_reg(CGTarget* t, Operand op, u32 scratch);
+int rv64_use_got_for_sym(CGTarget* t, ObjSymId sym);
+int mem_order_is_acquire(MemOrder o);
+int mem_order_is_release(MemOrder o);
diff --git a/src/arch/rv64/ops.c b/src/arch/rv64/ops.c
@@ -0,0 +1,1840 @@
+/* src/arch/rv64/ops.c — data movement, arithmetic, calls, atomics, vtable. */
+
+#include "arch/rv64/internal.h"
+
+/* ---- For a memory access of `nbytes`, pick the right store opcode. ---- */
+u32 enc_int_store(u32 nbytes, u32 src, u32 base, i32 off) {
+ switch (nbytes) {
+ case 1: return rv_sb(src, base, off);
+ case 2: return rv_sh(src, base, off);
+ case 4: return rv_sw(src, base, off);
+ default: return rv_sd(src, base, off);
+ }
+}
+u32 enc_int_load(u32 nbytes, int sign_ext, u32 rd, u32 base, i32 off) {
+ switch (nbytes) {
+ case 1: return sign_ext ? rv_lb(rd, base, off) : rv_lbu(rd, base, off);
+ case 2: return sign_ext ? rv_lh(rd, base, off) : rv_lhu(rd, base, off);
+ case 4: return sign_ext ? rv_lw(rd, base, off) : rv_lwu(rd, base, off);
+ default: return rv_ld(rd, base, off);
+ }
+}
+
+/* ---- data movement ---- */
+
+static void rv_load_imm(CGTarget* t, Operand dst, i64 imm) {
+ u32 sf = type_is_64(dst.type) ? 1u : 0u;
+ rv64_emit_load_imm(t->mc, sf, reg_num(dst), imm);
+}
+
+static void rv_load_const(CGTarget* t, Operand dst, ConstBytes cb) {
+ RImpl* a = impl_of(t);
+ if (dst.cls != RC_FP) {
+ compiler_panic(t->c, a->loc, "rv64 load_const: only FP supported in v1");
+ }
+ Sym ro_name = pool_intern_cstr(t->c->global, ".rodata");
+ ObjSecId ro = obj_section(t->obj, ro_name, SEC_RODATA, SF_ALLOC, 1u);
+
+ u32 cur_section = t->mc->section_id;
+ t->mc->set_section(t->mc, ro);
+ u32 ro_off = obj_align_to(t->obj, ro, cb.align ? cb.align : 4);
+ t->mc->emit_bytes(t->mc, cb.bytes, cb.size);
+
+ char namebuf[64];
+ static u32 lit_seq = 0;
+ int len = 0;
+ {
+ const char* prefix = ".LCFP";
+ for (; prefix[len]; ++len) namebuf[len] = prefix[len];
+ u32 v = lit_seq++;
+ char tmp[16];
+ int tn = 0;
+ if (v == 0) tmp[tn++] = '0';
+ else {
+ while (v) { tmp[tn++] = '0' + (char)(v % 10); v /= 10; }
+ }
+ for (int i = tn - 1; i >= 0; --i) namebuf[len++] = tmp[i];
+ namebuf[len] = 0;
+ }
+ Sym sname = pool_intern_cstr(t->c->global, namebuf);
+ ObjSymId sym = obj_symbol(t->obj, sname, SB_LOCAL, SK_OBJ, ro, (u64)ro_off,
+ (u64)cb.size);
+ t->mc->set_section(t->mc, cur_section);
+
+ /* auipc t0, %pcrel_hi(sym) ; flw/fld dst, %pcrel_lo(...)(t0)
+ * The LO12_I reloc references the AUIPC's site address (a label/sym
+ * placed at the AUIPC). For simplicity we make a local symbol at the
+ * AUIPC and bind LO12_I to it. */
+ u32 sec = t->mc->section_id;
+ u32 auipc_pos = t->mc->pos(t->mc);
+ rv64_emit32(t->mc, rv_auipc(RV_T0, 0));
+ t->mc->emit_reloc_at(t->mc, sec, auipc_pos, R_RV_PCREL_HI20, sym, 0, 0, 0);
+ /* Create a local symbol at the AUIPC site to anchor PCREL_LO12. */
+ char anchor_buf[64];
+ int al = 0;
+ {
+ const char* p2 = ".LpcrelHi";
+ for (; p2[al]; ++al) anchor_buf[al] = p2[al];
+ static u32 seq2 = 0;
+ u32 v = seq2++;
+ char tmp[16]; int tn = 0;
+ if (v == 0) tmp[tn++] = '0';
+ else { while (v) { tmp[tn++] = '0' + (char)(v % 10); v /= 10; } }
+ for (int i = tn - 1; i >= 0; --i) anchor_buf[al++] = tmp[i];
+ anchor_buf[al] = 0;
+ }
+ Sym aname = pool_intern_cstr(t->c->global, anchor_buf);
+ ObjSymId anchor = obj_symbol(t->obj, aname, SB_LOCAL, SK_OBJ, sec,
+ (u64)auipc_pos, 0);
+ u32 lpos = t->mc->pos(t->mc);
+ if (cb.size == 8) {
+ rv64_emit32(t->mc, rv_fld(reg_num(dst), RV_T0, 0));
+ } else {
+ rv64_emit32(t->mc, rv_flw(reg_num(dst), RV_T0, 0));
+ }
+ t->mc->emit_reloc_at(t->mc, sec, lpos, R_RV_PCREL_LO12_I, anchor, 0, 0, 0);
+}
+
+static void rv_copy(CGTarget* t, Operand dst, Operand src) {
+ if (dst.cls == RC_FP || src.cls == RC_FP) {
+ u32 fmt = type_is_fp_double(dst.type) ? RV_FMT_D : RV_FMT_S;
+ /* fmv.fmt rd, rs = fsgnj.fmt rd, rs, rs */
+ u32 r = reg_num(src);
+ rv64_emit32(t->mc, rv_fsgnj(fmt, reg_num(dst), r, r));
+ return;
+ }
+ /* mv rd, rs = addi rd, rs, 0 (works for both 32 and 64-bit copies) */
+ rv64_emit32(t->mc, rv_addi(reg_num(dst), reg_num(src), 0));
+}
+
+/* ---- address resolution ---- */
+
+/* Materialize the address of `addr` (LOCAL or INDIRECT or GLOBAL) into
+ * `tmp_reg`. Returns the register holding the base and writes the
+ * effective signed offset to *out_off (0 when we synthesized into tmp).
+ * For OPK_GLOBAL, emits AUIPC + an LO12 reloc on the caller's load/store. */
+u32 addr_base(CGTarget* t, Operand addr, i32* out_off, u32 tmp_reg) {
+ RImpl* a = impl_of(t);
+ if (addr.kind == OPK_LOCAL) {
+ RvSlot* s = rv64_slot_get(a, addr.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "rv64 addr_base: bad slot");
+ i32 off = -(i32)s->off;
+ if (off >= -2048 && off <= 2047) {
+ *out_off = off;
+ return RV_S0;
+ }
+ rv64_emit_load_imm(t->mc, 1, tmp_reg, (i64)off);
+ rv64_emit32(t->mc, rv_add(tmp_reg, RV_S0, tmp_reg));
+ *out_off = 0;
+ return tmp_reg;
+ }
+ if (addr.kind == OPK_INDIRECT) {
+ i32 off = addr.v.ind.ofs;
+ u32 base = addr.v.ind.base & 0x1f;
+ if (off >= -2048 && off <= 2047) {
+ *out_off = off;
+ return base;
+ }
+ rv64_emit_load_imm(t->mc, 1, tmp_reg, (i64)off);
+ rv64_emit32(t->mc, rv_add(tmp_reg, base, tmp_reg));
+ *out_off = 0;
+ return tmp_reg;
+ }
+ compiler_panic(t->c, a->loc, "rv64 addr_base: kind %d unsupported",
+ (int)addr.kind);
+}
+
+int rv64_use_got_for_sym(CGTarget* t, ObjSymId sym) {
+ return obj_symbol_extern_via_got(t->c, t->obj, sym);
+}
+
+/* Anchor symbol management for PCREL_LO12_*. Each AUIPC site gets a
+ * fresh local sym; the paired LO12 reloc references the anchor. */
+ObjSymId emit_pcrel_anchor(CGTarget* t, u32 sec, u32 auipc_pos) {
+ char buf[64];
+ int len = 0;
+ const char* p = ".LpcrelHi";
+ for (; p[len]; ++len) buf[len] = p[len];
+ static u32 seq = 0;
+ u32 v = seq++;
+ char tmp[16]; int tn = 0;
+ if (v == 0) tmp[tn++] = '0';
+ else { while (v) { tmp[tn++] = '0' + (char)(v % 10); v /= 10; } }
+ for (int i = tn - 1; i >= 0; --i) buf[len++] = tmp[i];
+ buf[len] = 0;
+ Sym n = pool_intern_cstr(t->c->global, buf);
+ return obj_symbol(t->obj, n, SB_LOCAL, SK_OBJ, sec, (u64)auipc_pos, 0);
+}
+
+/* Emit `auipc dst, %got_pcrel_hi(sym) ; ld dst, %pcrel_lo(.)(dst)`,
+ * leaving the runtime address of `sym` (the GOT slot's contents) in
+ * `dst_reg`. Addends are omitted from the GOT relocs — most loaders
+ * disallow nonzero addends on GOT-load fixups — so callers apply any
+ * displacement with a follow-on ADDI/ADD against the loaded base. */
+void rv64_emit_got_load_addr(CGTarget* t, u32 dst_reg, ObjSymId sym) {
+ MCEmitter* mc = t->mc;
+ u32 sec = mc->section_id;
+ u32 ap = mc->pos(mc);
+ rv64_emit32(mc, rv_auipc(dst_reg, 0));
+ mc->emit_reloc_at(mc, sec, ap, R_RV_GOT_HI20, sym, 0, 0, 0);
+ ObjSymId anchor = emit_pcrel_anchor(t, sec, ap);
+ u32 lp = mc->pos(mc);
+ rv64_emit32(mc, rv_ld(dst_reg, dst_reg, 0));
+ mc->emit_reloc_at(mc, sec, lp, R_RV_PCREL_LO12_I, anchor, 0, 0, 0);
+}
+
+/* Add a signed displacement `off` to `base`, writing into `rd`. Uses
+ * ADDI for ±2047, otherwise materializes the offset via rv64_emit_load_imm
+ * + ADD. Mirrors rv64_emit_addr_adjust in aarch64.c. */
+void rv64_emit_addr_adjust(MCEmitter* mc, u32 rd, u32 base, i32 off) {
+ if (off == 0) {
+ if (rd != base) rv64_emit32(mc, rv_addi(rd, base, 0));
+ return;
+ }
+ if (off >= -2048 && off <= 2047) {
+ rv64_emit32(mc, rv_addi(rd, base, off));
+ return;
+ }
+ rv64_emit_load_imm(mc, 1, RV_T1, (i64)off);
+ rv64_emit32(mc, rv_add(rd, base, RV_T1));
+}
+
+void rv_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) {
+ u32 sz = ma.size ? ma.size : type_byte_size(addr.type);
+ MCEmitter* mc = t->mc;
+
+ if (addr.kind == OPK_GLOBAL) {
+ u32 sec = mc->section_id;
+ ObjSymId sym = addr.v.global.sym;
+ i64 add = addr.v.global.addend;
+ /* Extern-via-GOT path: load &sym from GOT, then load the value at
+ * +addend (addend baked into the data load's imm12; relies on the
+ * common case of `add` fitting ±2047 — larger addends would need a
+ * follow-on ADD). */
+ if (rv64_use_got_for_sym(t, sym)) {
+ rv64_emit_got_load_addr(t, RV_T0, sym);
+ i32 ao = (i32)add;
+ if (dst.cls == RC_FP) {
+ if (sz == 8) rv64_emit32(mc, rv_fld(reg_num(dst), RV_T0, ao));
+ else rv64_emit32(mc, rv_flw(reg_num(dst), RV_T0, ao));
+ } else {
+ int sx = type_is_signed(addr.type);
+ rv64_emit32(mc, enc_int_load(sz, sx, reg_num(dst), RV_T0, ao));
+ }
+ return;
+ }
+ u32 ap = mc->pos(mc);
+ rv64_emit32(mc, rv_auipc(RV_T0, 0));
+ mc->emit_reloc_at(mc, sec, ap, R_RV_PCREL_HI20, sym, add, 0, 0);
+ ObjSymId anchor = emit_pcrel_anchor(t, sec, ap);
+ u32 lp = mc->pos(mc);
+ if (dst.cls == RC_FP) {
+ if (sz == 8) rv64_emit32(mc, rv_fld(reg_num(dst), RV_T0, 0));
+ else rv64_emit32(mc, rv_flw(reg_num(dst), RV_T0, 0));
+ } else {
+ int sx = type_is_signed(addr.type);
+ rv64_emit32(mc, enc_int_load(sz, sx, reg_num(dst), RV_T0, 0));
+ }
+ mc->emit_reloc_at(mc, sec, lp, R_RV_PCREL_LO12_I, anchor, 0, 0, 0);
+ return;
+ }
+
+ i32 off;
+ u32 base = addr_base(t, addr, &off, RV_T0);
+ if (dst.cls == RC_FP) {
+ if (sz == 8) rv64_emit32(mc, rv_fld(reg_num(dst), base, off));
+ else rv64_emit32(mc, rv_flw(reg_num(dst), base, off));
+ } else {
+ int sx = type_is_signed(addr.type);
+ rv64_emit32(mc, enc_int_load(sz, sx, reg_num(dst), base, off));
+ }
+}
+
+void rv_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) {
+ u32 sz = ma.size ? ma.size : type_byte_size(addr.type);
+ MCEmitter* mc = t->mc;
+
+ if (addr.kind == OPK_GLOBAL) {
+ u32 sec = mc->section_id;
+ ObjSymId sym = addr.v.global.sym;
+ i64 add = addr.v.global.addend;
+ u32 src_reg;
+ int src_fp = 0;
+ if (src.kind == OPK_IMM) {
+ u32 sf = (sz == 8) ? 1u : 0u;
+ rv64_emit_load_imm(mc, sf, RV_T1, src.v.imm);
+ src_reg = RV_T1;
+ } else if (src.cls == RC_FP) {
+ src_reg = reg_num(src);
+ src_fp = 1;
+ } else {
+ src_reg = reg_num(src);
+ }
+ /* Extern-via-GOT path: load &sym from GOT into t0, then store with
+ * addend baked into the imm12 (no reloc on the store). */
+ if (rv64_use_got_for_sym(t, sym)) {
+ rv64_emit_got_load_addr(t, RV_T0, sym);
+ i32 ao = (i32)add;
+ if (src_fp) {
+ if (sz == 8) rv64_emit32(mc, rv_fsd(src_reg, RV_T0, ao));
+ else rv64_emit32(mc, rv_fsw(src_reg, RV_T0, ao));
+ } else {
+ rv64_emit32(mc, enc_int_store(sz, src_reg, RV_T0, ao));
+ }
+ return;
+ }
+ u32 ap = mc->pos(mc);
+ rv64_emit32(mc, rv_auipc(RV_T0, 0));
+ mc->emit_reloc_at(mc, sec, ap, R_RV_PCREL_HI20, sym, add, 0, 0);
+ ObjSymId anchor = emit_pcrel_anchor(t, sec, ap);
+ u32 sp_pos = mc->pos(mc);
+ if (src_fp) {
+ if (sz == 8) rv64_emit32(mc, rv_fsd(src_reg, RV_T0, 0));
+ else rv64_emit32(mc, rv_fsw(src_reg, RV_T0, 0));
+ } else {
+ rv64_emit32(mc, enc_int_store(sz, src_reg, RV_T0, 0));
+ }
+ mc->emit_reloc_at(mc, sec, sp_pos, R_RV_PCREL_LO12_S, anchor, 0, 0, 0);
+ return;
+ }
+
+ i32 off;
+ u32 base = addr_base(t, addr, &off,
+ (src.kind == OPK_IMM) ? RV_T1 : RV_T0);
+ if (src.kind == OPK_IMM) {
+ u32 sf = (sz == 8) ? 1u : 0u;
+ rv64_emit_load_imm(mc, sf, RV_T0, src.v.imm);
+ rv64_emit32(mc, enc_int_store(sz, RV_T0, base, off));
+ return;
+ }
+ if (src.cls == RC_FP) {
+ if (sz == 8) rv64_emit32(mc, rv_fsd(reg_num(src), base, off));
+ else rv64_emit32(mc, rv_fsw(reg_num(src), base, off));
+ } else {
+ rv64_emit32(mc, enc_int_store(sz, reg_num(src), base, off));
+ }
+}
+
+static void rv_addr_of(CGTarget* t, Operand dst, Operand lv) {
+ RImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+ u32 rd = reg_num(dst);
+ if (lv.kind == OPK_LOCAL) {
+ RvSlot* s = rv64_slot_get(a, lv.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "rv64 addr_of: bad slot");
+ i32 off = -(i32)s->off;
+ if (off >= -2048 && off <= 2047) {
+ rv64_emit32(mc, rv_addi(rd, RV_S0, off));
+ } else {
+ rv64_emit_load_imm(mc, 1, rd, (i64)off);
+ rv64_emit32(mc, rv_add(rd, RV_S0, rd));
+ }
+ return;
+ }
+ if (lv.kind == OPK_INDIRECT) {
+ i32 ofs = lv.v.ind.ofs;
+ u32 base = lv.v.ind.base & 0x1f;
+ if (ofs >= -2048 && ofs <= 2047) {
+ rv64_emit32(mc, rv_addi(rd, base, ofs));
+ } else {
+ rv64_emit_load_imm(mc, 1, rd, (i64)ofs);
+ rv64_emit32(mc, rv_add(rd, base, rd));
+ }
+ return;
+ }
+ if (lv.kind == OPK_GLOBAL) {
+ ObjSymId sym = lv.v.global.sym;
+ i64 addend = lv.v.global.addend;
+ /* Extern-via-GOT path: GOT load yields &sym directly; apply any
+ * addend with a follow-on ADDI/ADD (GOT relocs disallow addends). */
+ if (rv64_use_got_for_sym(t, sym)) {
+ rv64_emit_got_load_addr(t, rd, sym);
+ if (addend) rv64_emit_addr_adjust(mc, rd, rd, (i32)addend);
+ return;
+ }
+ u32 sec = mc->section_id;
+ u32 ap = mc->pos(mc);
+ rv64_emit32(mc, rv_auipc(rd, 0));
+ mc->emit_reloc_at(mc, sec, ap, R_RV_PCREL_HI20, sym, addend, 0, 0);
+ ObjSymId anchor = emit_pcrel_anchor(t, sec, ap);
+ u32 ip = mc->pos(mc);
+ rv64_emit32(mc, rv_addi(rd, rd, 0));
+ mc->emit_reloc_at(mc, sec, ip, R_RV_PCREL_LO12_I, anchor, 0, 0, 0);
+ return;
+ }
+ rv_panic(t, "addr_of");
+}
+
+static void rv_tls_addr_of(CGTarget* t, Operand dst, ObjSymId sym, i64 addend) {
+ /* TLS Local-Exec: lui tmp, %tprel_hi(sym); add tmp, tp, tmp; addi dst,
+ * tmp, %tprel_lo(sym). Uses R_RV_TPREL_HI20 / R_RV_TPREL_LO12_I. */
+ MCEmitter* mc = t->mc;
+ u32 sec = mc->section_id;
+ u32 rd = reg_num(dst);
+ u32 hp = mc->pos(mc);
+ rv64_emit32(mc, rv_lui(RV_T0, 0));
+ mc->emit_reloc_at(mc, sec, hp, R_RV_TPREL_HI20, sym, addend, 0, 0);
+ rv64_emit32(mc, rv_add(RV_T0, RV_TP, RV_T0));
+ u32 lp = mc->pos(mc);
+ rv64_emit32(mc, rv_addi(rd, RV_T0, 0));
+ mc->emit_reloc_at(mc, sec, lp, R_RV_TPREL_LO12_I, sym, addend, 0, 0);
+}
+
+/* ---- aggregate ops ---- */
+
+u32 agg_addr_reg(CGTarget* t, Operand op, u32 scratch) {
+ RImpl* a = impl_of(t);
+ if (op.kind == OPK_REG) return reg_num(op);
+ if (op.kind == OPK_LOCAL) {
+ RvSlot* s = rv64_slot_get(a, op.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "rv64 agg: bad slot");
+ i32 off = -(i32)s->off;
+ if (off >= -2048 && off <= 2047) {
+ rv64_emit32(t->mc, rv_addi(scratch, RV_S0, off));
+ } else {
+ rv64_emit_load_imm(t->mc, 1, scratch, (i64)off);
+ rv64_emit32(t->mc, rv_add(scratch, RV_S0, scratch));
+ }
+ return scratch;
+ }
+ compiler_panic(t->c, a->loc, "rv64 agg: address kind %d unsupported",
+ (int)op.kind);
+}
+
+static void rv_copy_bytes(CGTarget* t, Operand dst_addr, Operand src_addr,
+ AggregateAccess agg) {
+ MCEmitter* mc = t->mc;
+ u32 dr = agg_addr_reg(t, dst_addr, RV_T0);
+ u32 sr = agg_addr_reg(t, src_addr, (dr == RV_T1) ? RV_T2 : RV_T1);
+ u32 n = agg.size;
+ u32 i = 0;
+ while (i + 8 <= n) {
+ rv64_emit32(mc, rv_ld(RV_T3, sr, (i32)i));
+ rv64_emit32(mc, rv_sd(RV_T3, dr, (i32)i));
+ i += 8;
+ }
+ while (i + 4 <= n) {
+ rv64_emit32(mc, rv_lwu(RV_T3, sr, (i32)i));
+ rv64_emit32(mc, rv_sw(RV_T3, dr, (i32)i));
+ i += 4;
+ }
+ while (i + 2 <= n) {
+ rv64_emit32(mc, rv_lhu(RV_T3, sr, (i32)i));
+ rv64_emit32(mc, rv_sh(RV_T3, dr, (i32)i));
+ i += 2;
+ }
+ while (i < n) {
+ rv64_emit32(mc, rv_lbu(RV_T3, sr, (i32)i));
+ rv64_emit32(mc, rv_sb(RV_T3, dr, (i32)i));
+ i += 1;
+ }
+}
+
+static void rv_set_bytes(CGTarget* t, Operand dst_addr, Operand byte_value,
+ AggregateAccess agg) {
+ MCEmitter* mc = t->mc;
+ u32 dr = agg_addr_reg(t, dst_addr, RV_T0);
+ u32 byte;
+ if (byte_value.kind == OPK_IMM) {
+ byte = (u32)(byte_value.v.imm & 0xffu);
+ } else {
+ compiler_panic(t->c, impl_of(t)->loc,
+ "rv64 set_bytes: REG byte NYI");
+ }
+ u32 n = agg.size;
+ u32 src;
+ if (byte == 0) {
+ src = RV_ZERO;
+ } else {
+ u64 b = byte;
+ b |= b << 8; b |= b << 16; b |= b << 32;
+ rv64_emit_load_imm(mc, 1, RV_T3, (i64)b);
+ src = RV_T3;
+ }
+ u32 i = 0;
+ while (i + 8 <= n) { rv64_emit32(mc, rv_sd(src, dr, (i32)i)); i += 8; }
+ while (i + 4 <= n) { rv64_emit32(mc, rv_sw(src, dr, (i32)i)); i += 4; }
+ while (i + 2 <= n) { rv64_emit32(mc, rv_sh(src, dr, (i32)i)); i += 2; }
+ while (i < n) { rv64_emit32(mc, rv_sb(src, dr, (i32)i)); i += 1; }
+}
+
+static void rv_bitfield_load(CGTarget* t, Operand dst, Operand record_addr,
+ BitFieldAccess bf) {
+ MCEmitter* mc = t->mc;
+ u32 base = agg_addr_reg(t, record_addr, RV_T0);
+ u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u;
+ u32 rd = reg_num(dst);
+ /* Load full storage unit (zero-ext for shifts). */
+ rv64_emit32(mc, enc_int_load(storage_bytes, 0, rd, base, (i32)bf.storage_offset));
+ /* Shift left by (XLEN - (bit_offset + bit_width)) then arithmetic
+ * right-shift by (XLEN - bit_width). Use 64-bit shifts. */
+ u32 lsb = bf.bit_offset;
+ u32 width = bf.bit_width ? bf.bit_width : 1u;
+ u32 sh_left = 64u - (lsb + width);
+ u32 sh_right = 64u - width;
+ rv64_emit32(mc, rv_slli(rd, rd, sh_left));
+ if (bf.signed_) rv64_emit32(mc, rv_srai(rd, rd, sh_right));
+ else rv64_emit32(mc, rv_srli(rd, rd, sh_right));
+}
+
+static void rv_bitfield_store(CGTarget* t, Operand record_addr, Operand src,
+ BitFieldAccess bf) {
+ MCEmitter* mc = t->mc;
+ u32 base = agg_addr_reg(t, record_addr, RV_T0);
+ u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u;
+ /* Load current value into t1 */
+ rv64_emit32(mc, enc_int_load(storage_bytes, 0, RV_T1, base,
+ (i32)bf.storage_offset));
+ u32 src_reg;
+ if (src.kind == OPK_IMM) {
+ rv64_emit_load_imm(mc, 1, RV_T2, src.v.imm);
+ src_reg = RV_T2;
+ } else if (src.kind == OPK_REG) {
+ src_reg = reg_num(src);
+ } else {
+ compiler_panic(t->c, impl_of(t)->loc,
+ "rv64 bitfield_store: src kind %d NYI", (int)src.kind);
+ }
+ u32 lsb = bf.bit_offset;
+ u32 width = bf.bit_width ? bf.bit_width : 1u;
+ /* mask = ((1 << width) - 1) << lsb */
+ u64 mask = ((u64)1 << width) - 1u;
+ /* t3 = src & ((1<<width)-1), then shifted to lsb */
+ rv64_emit_load_imm(mc, 1, RV_T3, (i64)mask);
+ rv64_emit32(mc, rv_and(RV_T3, src_reg, RV_T3));
+ if (lsb) rv64_emit32(mc, rv_slli(RV_T3, RV_T3, lsb));
+ /* clear the field bits in t1: andi or and-not pattern */
+ u64 mask_in = mask << lsb;
+ rv64_emit_load_imm(mc, 1, RV_T2, (i64)~mask_in);
+ rv64_emit32(mc, rv_and(RV_T1, RV_T1, RV_T2));
+ rv64_emit32(mc, rv_or(RV_T1, RV_T1, RV_T3));
+ rv64_emit32(mc, enc_int_store(storage_bytes, RV_T1, base,
+ (i32)bf.storage_offset));
+}
+
+/* ---- arithmetic ---- */
+
+static void rv_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op,
+ Operand b_op) {
+ MCEmitter* mc = t->mc;
+ if (op == BO_FADD || op == BO_FSUB || op == BO_FMUL || op == BO_FDIV) {
+ u32 fmt = type_is_fp_double(dst.type) ? RV_FMT_D : RV_FMT_S;
+ u32 rd = reg_num(dst);
+ u32 fa = reg_num(a_op);
+ u32 fb = reg_num(b_op);
+ switch (op) {
+ case BO_FADD: rv64_emit32(mc, rv_fadd(fmt, rd, fa, fb)); return;
+ case BO_FSUB: rv64_emit32(mc, rv_fsub(fmt, rd, fa, fb)); return;
+ case BO_FMUL: rv64_emit32(mc, rv_fmul(fmt, rd, fa, fb)); return;
+ case BO_FDIV: rv64_emit32(mc, rv_fdiv(fmt, rd, fa, fb)); return;
+ default: break;
+ }
+ }
+ u32 sf = type_is_64(dst.type) ? 1u : 0u;
+ u32 rd = reg_num(dst);
+
+ /* Canonicalize IMM to the RHS for commutative ops so the imm-form
+ * check below handles `3 + a` the same as `a + 3`. ISUB is not
+ * commutative — IMM-on-LHS still materializes. */
+ switch (op) {
+ case BO_IADD:
+ case BO_AND:
+ case BO_OR:
+ case BO_XOR: {
+ if (a_op.kind == OPK_IMM && b_op.kind != OPK_IMM) {
+ Operand t_op = a_op; a_op = b_op; b_op = t_op;
+ }
+ break;
+ }
+ default: break;
+ }
+
+ /* IMM-form fast paths. RV-I admits a 12-bit signed immediate for
+ * ADDI/ANDI/ORI/XORI/SLTI/SLTIU (range [-2048, 2047]). ISUB has no
+ * SUBI — we encode it as ADDI with the negated literal when -imm
+ * fits the same range (i.e., imm ∈ [-2047, 2048]; INT_MIN is
+ * intentionally excluded since -INT_MIN overflows). Shifts admit a
+ * shamt: 6 bits (0..63) on the 64-bit forms, 5 bits (0..31) on the
+ * W-variants. */
+ if (b_op.kind == OPK_IMM && a_op.kind != OPK_IMM) {
+ u32 ra = reg_num(a_op);
+ i64 imm = b_op.v.imm;
+ int fits12 = imm >= -2048 && imm <= 2047;
+ switch (op) {
+ case BO_IADD:
+ if (fits12) {
+ rv64_emit32(mc, sf ? rv_addi(rd, ra, (i32)imm) : rv_addiw(rd, ra, (i32)imm));
+ return;
+ }
+ break;
+ case BO_ISUB:
+ if (imm >= -2047 && imm <= 2048) {
+ rv64_emit32(mc, sf ? rv_addi(rd, ra, (i32)-imm) : rv_addiw(rd, ra, (i32)-imm));
+ return;
+ }
+ break;
+ case BO_AND:
+ if (fits12) { rv64_emit32(mc, rv_andi(rd, ra, (i32)imm)); return; }
+ break;
+ case BO_OR:
+ if (fits12) { rv64_emit32(mc, rv_ori(rd, ra, (i32)imm)); return; }
+ break;
+ case BO_XOR:
+ if (fits12) { rv64_emit32(mc, rv_xori(rd, ra, (i32)imm)); return; }
+ break;
+ case BO_SHL: {
+ u32 width = sf ? 64u : 32u;
+ u32 sh = (u32)((u64)imm & (width - 1u));
+ rv64_emit32(mc, sf ? rv_slli(rd, ra, sh) : rv_slliw(rd, ra, sh));
+ return;
+ }
+ case BO_SHR_U: {
+ u32 width = sf ? 64u : 32u;
+ u32 sh = (u32)((u64)imm & (width - 1u));
+ rv64_emit32(mc, sf ? rv_srli(rd, ra, sh) : rv_srliw(rd, ra, sh));
+ return;
+ }
+ case BO_SHR_S: {
+ u32 width = sf ? 64u : 32u;
+ u32 sh = (u32)((u64)imm & (width - 1u));
+ rv64_emit32(mc, sf ? rv_srai(rd, ra, sh) : rv_sraiw(rd, ra, sh));
+ return;
+ }
+ default: break;
+ }
+ }
+
+ u32 ra = rv64_force_reg_int(t, a_op, RV_T0);
+ u32 rb = rv64_force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0);
+
+ switch (op) {
+ case BO_IADD: rv64_emit32(mc, sf ? rv_add(rd, ra, rb) : rv_addw(rd, ra, rb)); return;
+ case BO_ISUB: rv64_emit32(mc, sf ? rv_sub(rd, ra, rb) : rv_subw(rd, ra, rb)); return;
+ case BO_IMUL: rv64_emit32(mc, sf ? rv_mul(rd, ra, rb) : rv_mulw(rd, ra, rb)); return;
+ case BO_AND: rv64_emit32(mc, rv_and(rd, ra, rb)); return;
+ case BO_OR: rv64_emit32(mc, rv_or(rd, ra, rb)); return;
+ case BO_XOR: rv64_emit32(mc, rv_xor(rd, ra, rb)); return;
+ case BO_SHL: rv64_emit32(mc, sf ? rv_sll(rd, ra, rb) : rv_sllw(rd, ra, rb)); return;
+ case BO_SHR_U: rv64_emit32(mc, sf ? rv_srl(rd, ra, rb) : rv_srlw(rd, ra, rb)); return;
+ case BO_SHR_S: rv64_emit32(mc, sf ? rv_sra(rd, ra, rb) : rv_sraw(rd, ra, rb)); return;
+ case BO_SDIV: rv64_emit32(mc, sf ? rv_div(rd, ra, rb) : rv_divw(rd, ra, rb)); return;
+ case BO_UDIV: rv64_emit32(mc, sf ? rv_divu(rd, ra, rb) : rv_divuw(rd, ra, rb)); return;
+ case BO_SREM: rv64_emit32(mc, sf ? rv_rem(rd, ra, rb) : rv_remw(rd, ra, rb)); return;
+ case BO_UREM: rv64_emit32(mc, sf ? rv_remu(rd, ra, rb) : rv_remuw(rd, ra, rb)); return;
+ default:
+ compiler_panic(t->c, impl_of(t)->loc, "rv64 binop: op %d unimpl", (int)op);
+ }
+}
+
+static void rv_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) {
+ MCEmitter* mc = t->mc;
+ u32 sf = type_is_64(dst.type) ? 1u : 0u;
+ u32 rd = reg_num(dst);
+ /* IMM operand is legal per the CGTarget contract (arch.h); materialize
+ * into t0 when not already a register. cg folds literal unops upstream
+ * via cg_fold_unop. */
+ u32 rn = rv64_force_reg_int(t, a_op, RV_T0);
+ switch (op) {
+ case UO_NEG:
+ rv64_emit32(mc, sf ? rv_sub(rd, RV_ZERO, rn) : rv_subw(rd, RV_ZERO, rn));
+ return;
+ case UO_BNOT:
+ rv64_emit32(mc, rv_xori(rd, rn, -1));
+ return;
+ case UO_NOT:
+ /* logical: 1 if rn==0 else 0 → sltiu rd, rn, 1 */
+ rv64_emit32(mc, rv_sltiu(rd, rn, 1));
+ return;
+ default:
+ compiler_panic(t->c, impl_of(t)->loc, "rv64 unop: op %d unimpl", (int)op);
+ }
+}
+
+static void rv_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) {
+ RImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+ u32 rd = reg_num(dst);
+ u32 rn = reg_num(src);
+
+ switch (k) {
+ case CV_SEXT: {
+ u32 src_bits = type_byte_size(src.type) * 8u;
+ if (src_bits == 32u) {
+ /* sext.w rd, rs = addiw rd, rs, 0 */
+ rv64_emit32(mc, rv_addiw(rd, rn, 0));
+ return;
+ }
+ /* slli + srai by (64 - src_bits) */
+ u32 sh = 64u - src_bits;
+ rv64_emit32(mc, rv_slli(rd, rn, sh));
+ rv64_emit32(mc, rv_srai(rd, rd, sh));
+ return;
+ }
+ case CV_ZEXT: {
+ u32 src_bits = type_byte_size(src.type) * 8u;
+ if (src_bits == 32u) {
+ /* zext.w: slli rd, rs, 32; srli rd, rd, 32 */
+ rv64_emit32(mc, rv_slli(rd, rn, 32));
+ rv64_emit32(mc, rv_srli(rd, rd, 32));
+ } else {
+ u32 sh = 64u - src_bits;
+ rv64_emit32(mc, rv_slli(rd, rn, sh));
+ rv64_emit32(mc, rv_srli(rd, rd, sh));
+ }
+ return;
+ }
+ case CV_TRUNC:
+ /* Truncate to W: addiw rd, rs, 0 puts low 32 in rd sign-extended.
+ * For narrower widths the consumer (store) handles it. */
+ rv64_emit32(mc, rv_addiw(rd, rn, 0));
+ return;
+ case CV_ITOF_S: {
+ int sf_src = type_is_64(src.type);
+ int dst_d = type_is_fp_double(dst.type);
+ if (dst_d) {
+ rv64_emit32(mc, sf_src ? rv_fcvt_d_l(rd, rn) : rv_fcvt_d_w(rd, rn));
+ } else {
+ rv64_emit32(mc, sf_src ? rv_fcvt_s_l(rd, rn) : rv_fcvt_s_w(rd, rn));
+ }
+ return;
+ }
+ case CV_ITOF_U: {
+ int sf_src = type_is_64(src.type);
+ int dst_d = type_is_fp_double(dst.type);
+ if (dst_d) {
+ rv64_emit32(mc, sf_src ? rv_fcvt_d_lu(rd, rn) : rv_fcvt_d_wu(rd, rn));
+ } else {
+ rv64_emit32(mc, sf_src ? rv_fcvt_s_lu(rd, rn) : rv_fcvt_s_wu(rd, rn));
+ }
+ return;
+ }
+ case CV_FTOI_S: {
+ int sf_dst = type_is_64(dst.type);
+ int src_d = type_is_fp_double(src.type);
+ if (src_d) {
+ rv64_emit32(mc, sf_dst ? rv_fcvt_l_d(rd, rn) : rv_fcvt_w_d(rd, rn));
+ } else {
+ rv64_emit32(mc, sf_dst ? rv_fcvt_l_s(rd, rn) : rv_fcvt_w_s(rd, rn));
+ }
+ return;
+ }
+ case CV_FTOI_U: {
+ int sf_dst = type_is_64(dst.type);
+ int src_d = type_is_fp_double(src.type);
+ if (src_d) {
+ rv64_emit32(mc, sf_dst ? rv_fcvt_lu_d(rd, rn) : rv_fcvt_wu_d(rd, rn));
+ } else {
+ rv64_emit32(mc, sf_dst ? rv_fcvt_lu_s(rd, rn) : rv_fcvt_wu_s(rd, rn));
+ }
+ return;
+ }
+ case CV_FEXT: rv64_emit32(mc, rv_fcvt_d_s(rd, rn)); return;
+ case CV_FTRUNC: rv64_emit32(mc, rv_fcvt_s_d(rd, rn)); return;
+ case CV_BITCAST: {
+ if (src.cls == RC_INT && dst.cls == RC_FP) {
+ u32 sz = type_byte_size(dst.type);
+ rv64_emit32(mc, sz == 8 ? rv_fmv_d_x(rd, rn) : rv_fmv_w_x(rd, rn));
+ } else if (src.cls == RC_FP && dst.cls == RC_INT) {
+ u32 sz = type_byte_size(src.type);
+ rv64_emit32(mc, sz == 8 ? rv_fmv_x_d(rd, rn) : rv_fmv_x_w(rd, rn));
+ } else {
+ compiler_panic(t->c, a->loc, "rv64 BITCAST: same-class NYI");
+ }
+ return;
+ }
+ default:
+ compiler_panic(t->c, a->loc, "rv64 convert kind %d unimpl", (int)k);
+ }
+}
+
+/* ---- calls / return ---- */
+
+static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
+ u32* next_fp, u32* stack_off) {
+ RImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+
+ /* For variadic args (av->abi NULL) synthesize a one-part DIRECT shape.
+ * On RV64 LP64D, variadic args go through the integer registers
+ * regardless of FP-ness (per the psABI). */
+ ABIArgInfo va_ai;
+ ABIArgPart va_pt;
+ const ABIArgInfo* ai = av->abi;
+ if (!ai) {
+ u32 sz = type_byte_size(av->type);
+ memset(&va_ai, 0, sizeof va_ai);
+ memset(&va_pt, 0, sizeof va_pt);
+ va_ai.kind = ABI_ARG_DIRECT;
+ va_ai.parts = &va_pt;
+ va_ai.nparts = 1;
+ va_pt.cls = ABI_CLASS_INT;
+ va_pt.size = sz;
+ va_pt.align = sz;
+ va_pt.src_offset = 0;
+ ai = &va_ai;
+ }
+ if (ai->kind == ABI_ARG_IGNORE) return;
+
+ if (ai->kind == ABI_ARG_INDIRECT) {
+ /* Pass the address of the storage in the next integer slot. */
+ int to_stack = (*next_int >= 8);
+ u32 dst_reg = to_stack ? RV_T0 : (RV_A0 + (*next_int)++);
+ if (av->storage.kind == OPK_LOCAL) {
+ RvSlot* s = rv64_slot_get(a, av->storage.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "rv64 call: bad byval slot");
+ i32 off = -(i32)s->off;
+ if (off >= -2048 && off <= 2047) {
+ rv64_emit32(mc, rv_addi(dst_reg, RV_S0, off));
+ } else {
+ rv64_emit_load_imm(mc, 1, dst_reg, (i64)off);
+ rv64_emit32(mc, rv_add(dst_reg, RV_S0, dst_reg));
+ }
+ } else if (av->storage.kind == OPK_INDIRECT) {
+ u32 base = av->storage.v.ind.base & 0x1fu;
+ i32 off = av->storage.v.ind.ofs;
+ if (off >= -2048 && off <= 2047) {
+ rv64_emit32(mc, rv_addi(dst_reg, base, off));
+ } else {
+ rv64_emit_load_imm(mc, 1, dst_reg, (i64)off);
+ rv64_emit32(mc, rv_add(dst_reg, base, dst_reg));
+ }
+ } else {
+ compiler_panic(t->c, a->loc,
+ "rv64 call: INDIRECT storage kind %d NYI",
+ (int)av->storage.kind);
+ }
+ if (to_stack) {
+ rv64_emit32(mc, rv_sd(dst_reg, RV_SP, (i32)*stack_off));
+ *stack_off += 8;
+ }
+ return;
+ }
+
+ for (u16 i = 0; i < ai->nparts; ++i) {
+ const ABIArgPart* pt = &ai->parts[i];
+ u32 sz = pt->size;
+
+ if (pt->cls == ABI_CLASS_INT) {
+ int to_stack = (*next_int >= 8);
+ u32 dst_reg = to_stack ? RV_T0 : (RV_A0 + (*next_int)++);
+ switch (av->storage.kind) {
+ case OPK_IMM: {
+ u32 sf = (sz == 8) ? 1u : 0u;
+ rv64_emit_load_imm(mc, sf, dst_reg, av->storage.v.imm);
+ break;
+ }
+ case OPK_REG: {
+ /* Variadic FP arg pinned into an integer register: bitcast
+ * via FMV.X.{D,W}. Otherwise normal MV. */
+ if (av->storage.cls == RC_FP) {
+ rv64_emit32(mc, (sz == 8) ? rv_fmv_x_d(dst_reg, reg_num(av->storage))
+ : rv_fmv_x_w(dst_reg, reg_num(av->storage)));
+ } else {
+ rv64_emit32(mc, rv_addi(dst_reg, reg_num(av->storage), 0));
+ }
+ break;
+ }
+ case OPK_LOCAL: {
+ RvSlot* s = rv64_slot_get(a, av->storage.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "rv64 call: bad arg slot");
+ i32 off = -(i32)s->off + (i32)pt->src_offset;
+ rv64_emit32(mc, enc_int_load(sz, 0, dst_reg, RV_S0, off));
+ break;
+ }
+ case OPK_INDIRECT: {
+ /* cg holds INDIRECT base regs in s2..s11, disjoint from arg
+ * regs a0..a7 and the t0 stack-arg scratch. */
+ u32 base = av->storage.v.ind.base & 0x1fu;
+ i32 off = av->storage.v.ind.ofs + (i32)pt->src_offset;
+ rv64_emit32(mc, enc_int_load(sz, 0, dst_reg, base, off));
+ break;
+ }
+ default:
+ compiler_panic(t->c, a->loc,
+ "rv64 call: storage kind %d NYI",
+ (int)av->storage.kind);
+ }
+ if (to_stack) {
+ rv64_emit32(mc, rv_sd(dst_reg, RV_SP, (i32)*stack_off));
+ *stack_off += 8;
+ }
+ } else if (pt->cls == ABI_CLASS_FP) {
+ int to_stack = (*next_fp >= 8);
+ if (!to_stack) {
+ u32 freg = 10u + (*next_fp)++;
+ switch (av->storage.kind) {
+ case OPK_REG: {
+ u32 fmt = (sz == 8) ? RV_FMT_D : RV_FMT_S;
+ u32 r = reg_num(av->storage);
+ rv64_emit32(mc, rv_fsgnj(fmt, freg, r, r));
+ break;
+ }
+ case OPK_INDIRECT: {
+ u32 base = av->storage.v.ind.base & 0x1fu;
+ i32 off = av->storage.v.ind.ofs + (i32)pt->src_offset;
+ rv64_emit32(mc, (sz == 8) ? rv_fld(freg, base, off)
+ : rv_flw(freg, base, off));
+ break;
+ }
+ default:
+ compiler_panic(t->c, a->loc, "rv64 call: FP storage kind %d NYI",
+ (int)av->storage.kind);
+ }
+ } else {
+ switch (av->storage.kind) {
+ case OPK_REG:
+ if (sz == 8) rv64_emit32(mc, rv_fsd(reg_num(av->storage), RV_SP, (i32)*stack_off));
+ else rv64_emit32(mc, rv_fsw(reg_num(av->storage), RV_SP, (i32)*stack_off));
+ break;
+ case OPK_INDIRECT: {
+ /* Route through ft0 — it is in {ft0..ft7}, caller-saved
+ * scratch outside the cg fs2..fs11 pool. */
+ u32 base = av->storage.v.ind.base & 0x1fu;
+ i32 off = av->storage.v.ind.ofs + (i32)pt->src_offset;
+ if (sz == 8) {
+ rv64_emit32(mc, rv_fld(/*ft0=*/0u, base, off));
+ rv64_emit32(mc, rv_fsd(/*ft0=*/0u, RV_SP, (i32)*stack_off));
+ } else {
+ rv64_emit32(mc, rv_flw(/*ft0=*/0u, base, off));
+ rv64_emit32(mc, rv_fsw(/*ft0=*/0u, RV_SP, (i32)*stack_off));
+ }
+ break;
+ }
+ default:
+ compiler_panic(t->c, a->loc, "rv64 call: FP stack-arg NYI");
+ }
+ *stack_off += 8;
+ }
+ } else {
+ compiler_panic(t->c, a->loc, "rv64 call: ABI class %d unimpl",
+ (int)pt->cls);
+ }
+ }
+}
+
+static void rv_call(CGTarget* t, const CGCallDesc* d) {
+ RImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+
+ u32 next_int = 0, next_fp = 0, stack_off = 0;
+
+ /* sret: caller passes destination pointer in a0. */
+ if (d->abi && d->abi->has_sret) {
+ if (d->ret.storage.kind != OPK_LOCAL) {
+ compiler_panic(t->c, a->loc, "rv64 call: sret dst must be LOCAL");
+ }
+ RvSlot* s = rv64_slot_get(a, d->ret.storage.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "rv64 call: bad sret slot");
+ i32 off = -(i32)s->off;
+ if (off >= -2048 && off <= 2047) {
+ rv64_emit32(mc, rv_addi(RV_A0, RV_S0, off));
+ } else {
+ rv64_emit_load_imm(mc, 1, RV_A0, (i64)off);
+ rv64_emit32(mc, rv_add(RV_A0, RV_S0, RV_A0));
+ }
+ next_int = 1;
+ }
+
+ for (u32 i = 0; i < d->nargs; ++i) {
+ emit_arg_value(t, &d->args[i], &next_int, &next_fp, &stack_off);
+ }
+ u32 needed = (stack_off + 15u) & ~15u;
+ if (needed > a->max_outgoing) a->max_outgoing = needed;
+
+ if (d->callee.kind == OPK_GLOBAL) {
+ /* AUIPC ra, 0 ; JALR ra, ra, 0 with R_RV_CALL on AUIPC */
+ u32 sec = mc->section_id;
+ u32 pos = mc->pos(mc);
+ rv64_emit32(mc, rv_auipc(RV_RA, 0));
+ rv64_emit32(mc, rv_jalr(RV_RA, RV_RA, 0));
+ mc->emit_reloc_at(mc, sec, pos, R_RV_CALL,
+ d->callee.v.global.sym, d->callee.v.global.addend, 0, 0);
+ } else if (d->callee.kind == OPK_REG) {
+ rv64_emit32(mc, rv_jalr(RV_RA, reg_num(d->callee), 0));
+ } else {
+ compiler_panic(t->c, a->loc, "rv64 call: callee kind %d unsupported",
+ (int)d->callee.kind);
+ }
+
+ /* Receive return value. */
+ const ABIArgInfo* ri = &d->abi->ret;
+ if (ri->kind == ABI_ARG_IGNORE || ri->kind == ABI_ARG_INDIRECT) return;
+ if (ri->nparts == 0) return;
+
+ Operand rs = d->ret.storage;
+ u32 nir = 0, nfr = 0;
+ for (u16 i = 0; i < ri->nparts; ++i) {
+ const ABIArgPart* p = &ri->parts[i];
+ u32 src_reg = (p->cls == ABI_CLASS_INT) ? (RV_A0 + nir++) : (10u + nfr++);
+
+ if (rs.kind == OPK_REG) {
+ if (ri->nparts != 1) {
+ compiler_panic(t->c, a->loc, "rv64 call: REG ret with %u parts",
+ (unsigned)ri->nparts);
+ }
+ if (p->cls == ABI_CLASS_INT) {
+ rv64_emit32(mc, rv_addi(reg_num(rs), src_reg, 0));
+ } else {
+ u32 fmt = (p->size == 8) ? RV_FMT_D : RV_FMT_S;
+ rv64_emit32(mc, rv_fsgnj(fmt, reg_num(rs), src_reg, src_reg));
+ }
+ } else if (rs.kind == OPK_LOCAL || rs.kind == OPK_INDIRECT) {
+ u32 base_reg;
+ i32 base_off;
+ if (rs.kind == OPK_LOCAL) {
+ RvSlot* s = rv64_slot_get(a, rs.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "rv64 call: bad ret slot");
+ base_reg = RV_S0;
+ base_off = -(i32)s->off;
+ } else {
+ base_reg = rs.v.ind.base & 0x1fu;
+ base_off = rs.v.ind.ofs;
+ }
+ i32 off = base_off + (i32)p->src_offset;
+ if (p->cls == ABI_CLASS_INT) {
+ rv64_emit32(mc, enc_int_store(p->size, src_reg, base_reg, off));
+ } else {
+ if (p->size == 8) rv64_emit32(mc, rv_fsd(src_reg, base_reg, off));
+ else rv64_emit32(mc, rv_fsw(src_reg, base_reg, off));
+ }
+ } else if (rs.kind == OPK_IMM && rs.type && rs.type->kind == TY_VOID) {
+ /* void return placeholder — nothing to do. */
+ } else {
+ compiler_panic(t->c, a->loc, "rv64 call: ret_storage kind %d unsupported",
+ (int)rs.kind);
+ }
+ }
+}
+
+static void rv_ret(CGTarget* t, const CGABIValue* val) {
+ RImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+
+ if (val) {
+ const ABIArgInfo* ri = val->abi;
+ if (ri && ri->kind == ABI_ARG_INDIRECT) {
+ /* sret: reload destination pointer from sret_ptr_slot into t0,
+ * then memcpy from val->storage into [t0]. */
+ u32 src_base;
+ i32 src_base_off;
+ u32 nbytes;
+ if (val->storage.kind == OPK_LOCAL) {
+ RvSlot* s = rv64_slot_get(a, val->storage.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "rv64 ret: bad sret slot");
+ src_base = RV_S0;
+ src_base_off = -(i32)s->off;
+ nbytes = s->size;
+ } else if (val->storage.kind == OPK_INDIRECT) {
+ src_base = val->storage.v.ind.base & 0x1fu;
+ src_base_off = val->storage.v.ind.ofs;
+ nbytes = val->size;
+ if (!nbytes) {
+ compiler_panic(t->c, a->loc,
+ "rv64 ret indirect: missing aggregate size");
+ }
+ } else {
+ compiler_panic(t->c, a->loc,
+ "rv64 ret indirect: storage kind %d NYI",
+ (int)val->storage.kind);
+ }
+ RvSlot* sp = (a->sret_ptr_slot != FRAME_SLOT_NONE)
+ ? rv64_slot_get(a, a->sret_ptr_slot)
+ : NULL;
+ if (sp) rv64_emit32(mc, rv_ld(RV_T0, RV_S0, -(i32)sp->off));
+ u32 i = 0;
+ while (i + 8 <= nbytes) {
+ rv64_emit32(mc, rv_ld(RV_T1, src_base, src_base_off + (i32)i));
+ rv64_emit32(mc, rv_sd(RV_T1, RV_T0, (i32)i));
+ i += 8;
+ }
+ while (i + 4 <= nbytes) {
+ rv64_emit32(mc, rv_lwu(RV_T1, src_base, src_base_off + (i32)i));
+ rv64_emit32(mc, rv_sw(RV_T1, RV_T0, (i32)i));
+ i += 4;
+ }
+ while (i + 2 <= nbytes) {
+ rv64_emit32(mc, rv_lhu(RV_T1, src_base, src_base_off + (i32)i));
+ rv64_emit32(mc, rv_sh(RV_T1, RV_T0, (i32)i));
+ i += 2;
+ }
+ while (i < nbytes) {
+ rv64_emit32(mc, rv_lbu(RV_T1, src_base, src_base_off + (i32)i));
+ rv64_emit32(mc, rv_sb(RV_T1, RV_T0, (i32)i));
+ i += 1;
+ }
+ } else if (val->storage.kind == OPK_REG) {
+ if (val->storage.cls == RC_FP) {
+ u32 fmt = type_is_fp_double(val->storage.type) ? RV_FMT_D : RV_FMT_S;
+ u32 r = reg_num(val->storage);
+ rv64_emit32(mc, rv_fsgnj(fmt, 10u, r, r)); /* fa0 = freg 10 */
+ } else {
+ rv64_emit32(mc, rv_addi(RV_A0, reg_num(val->storage), 0));
+ }
+ } else if (val->storage.kind == OPK_IMM) {
+ u32 sf = type_is_64(val->storage.type) ? 1u : 0u;
+ rv64_emit_load_imm(mc, sf, RV_A0, val->storage.v.imm);
+ } else if (val->storage.kind == OPK_LOCAL ||
+ val->storage.kind == OPK_INDIRECT) {
+ u32 base_reg;
+ i32 base_off;
+ if (val->storage.kind == OPK_LOCAL) {
+ RvSlot* s = rv64_slot_get(a, val->storage.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "rv64 ret: bad local slot");
+ base_reg = RV_S0;
+ base_off = -(i32)s->off;
+ } else {
+ base_reg = val->storage.v.ind.base & 0x1fu;
+ base_off = val->storage.v.ind.ofs;
+ }
+ const ABIArgInfo* ri2 = val->abi;
+ u32 nir = 0, nfr = 0;
+ for (u16 i = 0; i < (ri2 ? ri2->nparts : 0); ++i) {
+ const ABIArgPart* pt = &ri2->parts[i];
+ i32 off = base_off + (i32)pt->src_offset;
+ if (pt->cls == ABI_CLASS_INT) {
+ rv64_emit32(mc, enc_int_load(pt->size, 0, RV_A0 + nir++, base_reg, off));
+ } else if (pt->cls == ABI_CLASS_FP) {
+ u32 freg = 10u + nfr++;
+ if (pt->size == 8) rv64_emit32(mc, rv_fld(freg, base_reg, off));
+ else rv64_emit32(mc, rv_flw(freg, base_reg, off));
+ } else {
+ compiler_panic(t->c, a->loc, "rv64 ret: part cls %d unimpl",
+ (int)pt->cls);
+ }
+ }
+ }
+ }
+ /* Jump to epilogue. */
+ rv64_emit32(mc, rv_jal(RV_ZERO, 0));
+ mc->emit_label_ref(mc, a->epilogue_label, R_RV_JAL, 4, 0);
+}
+
+/* ---- panic stubs for features we don't yet cover ---- */
+
+static void rv_alloca_(CGTarget* t, Operand d, Operand sz, u32 align) {
+ RImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+ if (d.kind != OPK_REG) {
+ compiler_panic(t->c, a->loc, "rv64 alloca: dst must be REG");
+ }
+ if (align > 16) {
+ compiler_panic(t->c, a->loc,
+ "rv64 alloca: align %u > 16 not yet supported", align);
+ }
+ if (sz.kind == OPK_IMM) {
+ i64 v = sz.v.imm;
+ if (v < 0) compiler_panic(t->c, a->loc, "rv64 alloca: negative size");
+ u64 aligned = ((u64)v + 15u) & ~(u64)15u;
+ if (aligned == 0) aligned = 16;
+ if (aligned > 2047u) {
+ compiler_panic(t->c, a->loc,
+ "rv64 alloca: const size %llu too large for v1",
+ (unsigned long long)aligned);
+ }
+ rv64_emit32(mc, rv_addi(RV_SP, RV_SP, -(i32)aligned));
+ } else if (sz.kind == OPK_REG) {
+ u32 sz_reg = reg_num(sz);
+ /* t0 = (sz + 15) & ~15; sp -= t0 */
+ rv64_emit32(mc, rv_addi(RV_T0, sz_reg, 15));
+ rv64_emit32(mc, rv_andi(RV_T0, RV_T0, -16));
+ rv64_emit32(mc, rv_sub(RV_SP, RV_SP, RV_T0));
+ } else {
+ compiler_panic(t->c, a->loc, "rv64 alloca: size kind %d unsupported",
+ (int)sz.kind);
+ }
+
+ /* Placeholder: addi dst, sp, max_outgoing (imm patched at func_end). */
+ if (a->nadd_patches == a->add_patches_cap) {
+ u32 ncap = a->add_patches_cap ? a->add_patches_cap * 2 : 4;
+ struct RvAllocaPatch* nb = arena_array(t->c->tu, struct RvAllocaPatch, ncap);
+ if (a->add_patches)
+ memcpy(nb, a->add_patches, sizeof(*nb) * a->nadd_patches);
+ a->add_patches = nb;
+ a->add_patches_cap = ncap;
+ }
+ u32 dst_reg = reg_num(d);
+ a->add_patches[a->nadd_patches].pos = mc->pos(mc);
+ a->add_patches[a->nadd_patches].dst_reg = dst_reg;
+ a->nadd_patches++;
+ rv64_emit32(mc, rv_addi(dst_reg, RV_SP, 0));
+ a->has_alloca = 1;
+}
+/* RV64 LP64D va_list: a single `void*` pointing at the next argument
+ * slot. The prologue spills a_{nparams_int}..a7 into the save area at
+ * [s0 + 16]. The save area lives at the top of the callee frame,
+ * immediately above the saved-s0/ra pair, so save_area[8] coincides
+ * with the caller's first stack arg — a single 8-byte stride covers
+ * register and stack args alike. */
+static void rv_va_start_(CGTarget* t, Operand ap_op) {
+ RImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+ if (!a->is_variadic) {
+ compiler_panic(t->c, a->loc, "rv64 va_start: function not variadic");
+ }
+ u32 ap = reg_num(ap_op);
+ /* *ap = s0 + 16 + next_param_int*8 (skip past named-int slots). */
+ i32 off = 16 + (i32)(a->next_param_int * 8u);
+ rv64_emit32(mc, rv_addi(RV_T0, RV_S0, off));
+ rv64_emit32(mc, rv_sd(RV_T0, ap, 0));
+}
+
+static void rv_va_arg_(CGTarget* t, Operand dst, Operand ap_op,
+ const Type* ty) {
+ MCEmitter* mc = t->mc;
+ u32 ap = reg_num(ap_op);
+ u32 sz = type_byte_size(ty);
+ /* t1 = *ap; load value; *ap = t1 + 8 (rounded up).
+ * On RV64 LP64D every var arg occupies an 8-byte slot. */
+ rv64_emit32(mc, rv_ld(RV_T1, ap, 0));
+ if (dst.cls == RC_FP) {
+ /* For variadic FP args on RV64 LP64D, the value sits in the integer
+ * save area at the same bit pattern as a double bit-cast. Load and
+ * bitcast. */
+ if (sz == 8) {
+ rv64_emit32(mc, rv_ld(RV_T2, RV_T1, 0));
+ rv64_emit32(mc, rv_fmv_d_x(reg_num(dst), RV_T2));
+ } else {
+ rv64_emit32(mc, rv_lw(RV_T2, RV_T1, 0));
+ rv64_emit32(mc, rv_fmv_w_x(reg_num(dst), RV_T2));
+ }
+ } else {
+ int sx = type_is_signed(ty);
+ rv64_emit32(mc, enc_int_load(sz, sx, reg_num(dst), RV_T1, 0));
+ }
+ /* advance ap by 8 bytes. */
+ rv64_emit32(mc, rv_addi(RV_T1, RV_T1, 8));
+ rv64_emit32(mc, rv_sd(RV_T1, ap, 0));
+}
+
+static void rv_va_end_(CGTarget* t, Operand a) {
+ (void)t; (void)a;
+}
+
+static void rv_va_copy_(CGTarget* t, Operand d, Operand s) {
+ MCEmitter* mc = t->mc;
+ u32 dr = reg_num(d);
+ u32 sr = reg_num(s);
+ /* va_list is a single pointer (8 bytes). */
+ rv64_emit32(mc, rv_ld(RV_T0, sr, 0));
+ rv64_emit32(mc, rv_sd(RV_T0, dr, 0));
+}
+
+/* ---- atomics (LL/SC + AMO) ---- */
+
+int mem_order_is_acquire(MemOrder o) {
+ return o == MO_ACQUIRE || o == MO_ACQ_REL || o == MO_SEQ_CST || o == MO_CONSUME;
+}
+int mem_order_is_release(MemOrder o) {
+ return o == MO_RELEASE || o == MO_ACQ_REL || o == MO_SEQ_CST;
+}
+
+static void rv_atomic_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma,
+ MemOrder o) {
+ MCEmitter* mc = t->mc;
+ u32 sf = (ma.size == 8) ? 1u : 0u;
+ /* Resolve address to a register. */
+ u32 base;
+ if (addr.kind == OPK_REG) {
+ base = reg_num(addr);
+ } else if (addr.kind == OPK_LOCAL) {
+ i32 off;
+ base = addr_base(t, addr, &off, RV_T0);
+ if (off) {
+ rv64_emit32(mc, rv_addi(RV_T0, base, off));
+ base = RV_T0;
+ }
+ } else {
+ compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_load: addr kind %d NYI",
+ (int)addr.kind);
+ }
+ if (mem_order_is_acquire(o)) {
+ /* lr.w/d as ordered load (aq=1, rl=0). */
+ rv64_emit32(mc, sf ? rv_lr_d(reg_num(dst), base, 1, 0)
+ : rv_lr_w(reg_num(dst), base, 1, 0));
+ } else {
+ rv64_emit32(mc, enc_int_load(ma.size, 0, reg_num(dst), base, 0));
+ }
+}
+
+static void rv_atomic_store(CGTarget* t, Operand addr, Operand src,
+ MemAccess ma, MemOrder o) {
+ MCEmitter* mc = t->mc;
+ u32 sf = (ma.size == 8) ? 1u : 0u;
+ u32 src_reg;
+ if (src.kind == OPK_IMM) {
+ rv64_emit_load_imm(mc, sf, RV_T1, src.v.imm);
+ src_reg = RV_T1;
+ } else if (src.kind == OPK_REG) {
+ src_reg = reg_num(src);
+ } else {
+ compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_store: src kind %d NYI",
+ (int)src.kind);
+ }
+ u32 base;
+ if (addr.kind == OPK_REG) {
+ base = reg_num(addr);
+ } else if (addr.kind == OPK_LOCAL) {
+ i32 off;
+ base = addr_base(t, addr, &off, RV_T0);
+ if (off) { rv64_emit32(mc, rv_addi(RV_T0, base, off)); base = RV_T0; }
+ } else {
+ compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_store: addr kind %d NYI",
+ (int)addr.kind);
+ }
+ if (mem_order_is_release(o)) {
+ /* fence rw,w; sw/sd src, 0(base). Conservative for SEQ_CST. */
+ rv64_emit32(mc, rv_fence_rw_rw());
+ rv64_emit32(mc, enc_int_store(ma.size, src_reg, base, 0));
+ if (o == MO_SEQ_CST) rv64_emit32(mc, rv_fence_rw_rw());
+ } else {
+ rv64_emit32(mc, enc_int_store(ma.size, src_reg, base, 0));
+ }
+}
+
+static void rv_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, Operand addr,
+ Operand val, MemAccess ma, MemOrder o) {
+ MCEmitter* mc = t->mc;
+ u32 sf = (ma.size == 8) ? 1u : 0u;
+ u32 base = RV_T0;
+ if (addr.kind == OPK_REG) {
+ rv64_emit32(mc, rv_addi(base, reg_num(addr), 0));
+ } else if (addr.kind == OPK_LOCAL) {
+ i32 off;
+ u32 b = addr_base(t, addr, &off, RV_T0);
+ if (b != RV_T0 || off) {
+ rv64_emit32(mc, rv_addi(base, b, off));
+ }
+ } else {
+ compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_rmw: addr NYI");
+ }
+ u32 vreg = RV_T1;
+ if (val.kind == OPK_IMM) rv64_emit_load_imm(mc, sf, vreg, val.v.imm);
+ else if (val.kind == OPK_REG) rv64_emit32(mc, rv_addi(vreg, reg_num(val), 0));
+ else compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_rmw: val kind NYI");
+
+ int aq = mem_order_is_acquire(o);
+ int rl = mem_order_is_release(o);
+
+ /* LR/SC loop for any op (simpler than per-op AMO encodings, but AMO is
+ * preferred for the cases the corpus exercises). */
+ MCLabel L_retry = mc->label_new(mc);
+ mc->label_place(mc, L_retry);
+ rv64_emit32(mc, sf ? rv_lr_d(reg_num(dst), base, (u32)aq, 0)
+ : rv_lr_w(reg_num(dst), base, (u32)aq, 0));
+ u32 new_r = RV_T2;
+ switch (op) {
+ case AO_XCHG: rv64_emit32(mc, rv_addi(new_r, vreg, 0)); break;
+ case AO_ADD: rv64_emit32(mc, sf ? rv_add(new_r, reg_num(dst), vreg) : rv_addw(new_r, reg_num(dst), vreg)); break;
+ case AO_SUB: rv64_emit32(mc, sf ? rv_sub(new_r, reg_num(dst), vreg) : rv_subw(new_r, reg_num(dst), vreg)); break;
+ case AO_AND: rv64_emit32(mc, rv_and(new_r, reg_num(dst), vreg)); break;
+ case AO_OR: rv64_emit32(mc, rv_or(new_r, reg_num(dst), vreg)); break;
+ case AO_XOR: rv64_emit32(mc, rv_xor(new_r, reg_num(dst), vreg)); break;
+ case AO_NAND:
+ rv64_emit32(mc, rv_and(new_r, reg_num(dst), vreg));
+ rv64_emit32(mc, rv_xori(new_r, new_r, -1));
+ break;
+ default: rv64_emit32(mc, rv_addi(new_r, vreg, 0)); break;
+ }
+ /* sc.w/d t3, new_r, (base); bnez t3, retry. */
+ rv64_emit32(mc, sf ? rv_sc_d(RV_T3, base, new_r, 0, (u32)rl)
+ : rv_sc_w(RV_T3, base, new_r, 0, (u32)rl));
+ rv64_emit32(mc, rv_bne(RV_T3, RV_ZERO, 0));
+ mc->emit_label_ref(mc, L_retry, R_RV_BRANCH, 4, 0);
+}
+
+static void rv_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr,
+ Operand exp, Operand des, MemAccess ma,
+ MemOrder succ, MemOrder fail) {
+ MCEmitter* mc = t->mc;
+ u32 sf = (ma.size == 8) ? 1u : 0u;
+ (void)fail;
+ u32 base = RV_T0;
+ if (addr.kind == OPK_REG) rv64_emit32(mc, rv_addi(base, reg_num(addr), 0));
+ else if (addr.kind == OPK_LOCAL) {
+ i32 off; u32 b = addr_base(t, addr, &off, RV_T0);
+ if (b != RV_T0 || off) rv64_emit32(mc, rv_addi(base, b, off));
+ } else compiler_panic(t->c, impl_of(t)->loc, "rv64 atomic_cas: addr NYI");
+ u32 ereg = RV_T1, dreg = RV_T2;
+ if (exp.kind == OPK_IMM) rv64_emit_load_imm(mc, sf, ereg, exp.v.imm);
+ else rv64_emit32(mc, rv_addi(ereg, reg_num(exp), 0));
+ if (des.kind == OPK_IMM) rv64_emit_load_imm(mc, sf, dreg, des.v.imm);
+ else rv64_emit32(mc, rv_addi(dreg, reg_num(des), 0));
+
+ int aq = mem_order_is_acquire(succ);
+ int rl = mem_order_is_release(succ);
+
+ MCLabel L_retry = mc->label_new(mc);
+ MCLabel L_fail = mc->label_new(mc);
+ MCLabel L_done = mc->label_new(mc);
+
+ mc->label_place(mc, L_retry);
+ rv64_emit32(mc, sf ? rv_lr_d(reg_num(prior), base, (u32)aq, 0)
+ : rv_lr_w(reg_num(prior), base, (u32)aq, 0));
+ /* if (prior != expected) -> fail */
+ rv64_emit32(mc, rv_bne(reg_num(prior), ereg, 0));
+ mc->emit_label_ref(mc, L_fail, R_RV_BRANCH, 4, 0);
+ /* sc.w/d t3, des, (base); bnez t3, retry */
+ rv64_emit32(mc, sf ? rv_sc_d(RV_T3, base, dreg, 0, (u32)rl)
+ : rv_sc_w(RV_T3, base, dreg, 0, (u32)rl));
+ rv64_emit32(mc, rv_bne(RV_T3, RV_ZERO, 0));
+ mc->emit_label_ref(mc, L_retry, R_RV_BRANCH, 4, 0);
+ /* ok = 1; jump done */
+ rv64_emit_load_imm(mc, 0, reg_num(ok), 1);
+ rv64_emit32(mc, rv_jal(RV_ZERO, 0));
+ mc->emit_label_ref(mc, L_done, R_RV_JAL, 4, 0);
+
+ mc->label_place(mc, L_fail);
+ rv64_emit_load_imm(mc, 0, reg_num(ok), 0);
+
+ mc->label_place(mc, L_done);
+}
+
+static void rv_fence(CGTarget* t, MemOrder o) {
+ if (o == MO_RELAXED) return;
+ rv64_emit32(t->mc, rv_fence_rw_rw());
+}
+
+/* ---- intrinsics: do what we can; panic on the rest. ---- */
+static void rv_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd,
+ const Operand* args, u32 na) {
+ (void)nd; (void)na;
+ MCEmitter* mc = t->mc;
+ RImpl* a = impl_of(t);
+ switch (kind) {
+ case INTRIN_ASSUME_ALIGNED:
+ case INTRIN_EXPECT: {
+ /* dst = val (hint dropped). */
+ Operand val = args[0];
+ Operand dst = dsts[0];
+ u32 sf = type_is_64(dst.type) ? 1u : 0u;
+ if (val.kind == OPK_REG) {
+ if (reg_num(val) != reg_num(dst))
+ rv64_emit32(mc, rv_addi(reg_num(dst), reg_num(val), 0));
+ } else if (val.kind == OPK_IMM) {
+ rv64_emit_load_imm(mc, sf, reg_num(dst), val.v.imm);
+ } else {
+ compiler_panic(t->c, a->loc, "rv64 intrinsic: val kind %d NYI",
+ (int)val.kind);
+ }
+ return;
+ }
+ case INTRIN_PREFETCH: return;
+ case INTRIN_UNREACHABLE:
+ case INTRIN_TRAP:
+ rv64_emit32(mc, rv_ebreak());
+ return;
+ case INTRIN_BSWAP16: {
+ /* rd = ((rs & 0xff) << 8) | ((rs >> 8) & 0xff) */
+ u32 rd = reg_num(dsts[0]);
+ u32 rs = reg_num(args[0]);
+ rv64_emit32(mc, rv_slli(RV_T1, rs, 8)); /* t1 = rs << 8 */
+ rv64_emit32(mc, rv_andi(RV_T1, RV_T1, 0)); /* placeholder */
+ /* Use lui mask approach for portability: build mask 0xff00 in t2. */
+ rv64_emit32(mc, rv_addi(RV_T2, RV_ZERO, 0));
+ /* Simpler: 0xff00 fits in lui+addi pattern but is also small enough:
+ * we can build via shift: t2 = 0xff << 8 = (0xff << 8). */
+ rv64_emit32(mc, rv_addi(RV_T2, RV_ZERO, 0xff));
+ rv64_emit32(mc, rv_slli(RV_T2, RV_T2, 8));
+ /* t1 = (rs << 8) & 0xff00 */
+ rv64_emit32(mc, rv_slli(RV_T1, rs, 8));
+ rv64_emit32(mc, rv_and(RV_T1, RV_T1, RV_T2));
+ /* t3 = (rs >> 8) & 0xff (use srli on RV64 — high bits zeroed by
+ * preceding ANDI mask if input is uint16, but be safe and mask). */
+ rv64_emit32(mc, rv_srli(RV_T3, rs, 8));
+ rv64_emit32(mc, rv_andi(RV_T3, RV_T3, 0xff));
+ rv64_emit32(mc, rv_or(rd, RV_T1, RV_T3));
+ return;
+ }
+ case INTRIN_BSWAP32: {
+ u32 rd = reg_num(dsts[0]);
+ u32 rs = reg_num(args[0]);
+ /* result = (b0<<24)|(b1<<16)|(b2<<8)|b3, where bi = (rs >> (8*i)) & 0xff. */
+ /* t1 = ((rs >> 24) & 0xff) */
+ rv64_emit32(mc, rv_srliw(RV_T1, rs, 24));
+ rv64_emit32(mc, rv_andi(RV_T1, RV_T1, 0xff));
+ /* t2 = ((rs >> 16) & 0xff) << 8 */
+ rv64_emit32(mc, rv_srliw(RV_T2, rs, 16));
+ rv64_emit32(mc, rv_andi(RV_T2, RV_T2, 0xff));
+ rv64_emit32(mc, rv_slli(RV_T2, RV_T2, 8));
+ rv64_emit32(mc, rv_or(RV_T1, RV_T1, RV_T2));
+ /* t2 = ((rs >> 8) & 0xff) << 16 */
+ rv64_emit32(mc, rv_srliw(RV_T2, rs, 8));
+ rv64_emit32(mc, rv_andi(RV_T2, RV_T2, 0xff));
+ rv64_emit32(mc, rv_slli(RV_T2, RV_T2, 16));
+ rv64_emit32(mc, rv_or(RV_T1, RV_T1, RV_T2));
+ /* t2 = (rs & 0xff) << 24 */
+ rv64_emit32(mc, rv_andi(RV_T2, rs, 0xff));
+ rv64_emit32(mc, rv_slli(RV_T2, RV_T2, 24));
+ rv64_emit32(mc, rv_or(rd, RV_T1, RV_T2));
+ /* zero-extend to 32 bits if dest is u32 */
+ rv64_emit32(mc, rv_slli(rd, rd, 32));
+ rv64_emit32(mc, rv_srli(rd, rd, 32));
+ return;
+ }
+ case INTRIN_BSWAP64: {
+ u32 rd = reg_num(dsts[0]);
+ u32 rs = reg_num(args[0]);
+ /* General bswap64: iterate over the 8 bytes. */
+ /* t1 accumulator */
+ rv64_emit32(mc, rv_addi(RV_T1, RV_ZERO, 0));
+ for (int i = 0; i < 8; ++i) {
+ /* t2 = (rs >> (8*i)) & 0xff */
+ if (i == 0) {
+ rv64_emit32(mc, rv_andi(RV_T2, rs, 0xff));
+ } else {
+ rv64_emit32(mc, rv_srli(RV_T2, rs, (u32)(8 * i)));
+ rv64_emit32(mc, rv_andi(RV_T2, RV_T2, 0xff));
+ }
+ /* t2 <<= (56 - 8*i) (so byte 0 goes to top) */
+ int sh = 56 - 8 * i;
+ if (sh) rv64_emit32(mc, rv_slli(RV_T2, RV_T2, (u32)sh));
+ rv64_emit32(mc, rv_or(RV_T1, RV_T1, RV_T2));
+ }
+ rv64_emit32(mc, rv_addi(rd, RV_T1, 0));
+ return;
+ }
+ case INTRIN_POPCOUNT: {
+ /* Software popcount. Use the bit-twiddling sequence on the
+ * appropriate width. dst type drives width. */
+ u32 rd = reg_num(dsts[0]);
+ u32 rs = reg_num(args[0]);
+ int is64 = type_is_64(args[0].type);
+ /* Move rs into t1 to avoid clobbering input. */
+ rv64_emit32(mc, rv_addi(RV_T1, rs, 0));
+ if (!is64) {
+ /* zext.w t1, t1 */
+ rv64_emit32(mc, rv_slli(RV_T1, RV_T1, 32));
+ rv64_emit32(mc, rv_srli(RV_T1, RV_T1, 32));
+ }
+ /* t1 = t1 - ((t1 >> 1) & 0x5555...) */
+ rv64_emit32(mc, rv_srli(RV_T2, RV_T1, 1));
+ rv64_emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x5555555555555555ll
+ : (i64)0x55555555);
+ rv64_emit32(mc, rv_and(RV_T2, RV_T2, RV_T3));
+ rv64_emit32(mc, rv_sub(RV_T1, RV_T1, RV_T2));
+ /* t1 = (t1 & 0x3333...) + ((t1 >> 2) & 0x3333...) */
+ rv64_emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x3333333333333333ll
+ : (i64)0x33333333);
+ rv64_emit32(mc, rv_and(RV_T2, RV_T1, RV_T3));
+ rv64_emit32(mc, rv_srli(RV_T1, RV_T1, 2));
+ rv64_emit32(mc, rv_and(RV_T1, RV_T1, RV_T3));
+ rv64_emit32(mc, rv_add(RV_T1, RV_T1, RV_T2));
+ /* t1 = (t1 + (t1 >> 4)) & 0x0f0f... */
+ rv64_emit32(mc, rv_srli(RV_T2, RV_T1, 4));
+ rv64_emit32(mc, rv_add(RV_T1, RV_T1, RV_T2));
+ rv64_emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x0f0f0f0f0f0f0f0fll
+ : (i64)0x0f0f0f0f);
+ rv64_emit32(mc, rv_and(RV_T1, RV_T1, RV_T3));
+ /* t1 *= 0x0101010101... ; result in top byte */
+ rv64_emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x0101010101010101ll
+ : (i64)0x01010101);
+ rv64_emit32(mc, rv_mul(RV_T1, RV_T1, RV_T3));
+ /* shift right by (XLEN - 8) */
+ rv64_emit32(mc, rv_srli(rd, RV_T1, is64 ? 56u : 24u));
+ return;
+ }
+ case INTRIN_CTZ: {
+ /* ctz(x) = popcount((x & -x) - 1) for x != 0. */
+ u32 rd = reg_num(dsts[0]);
+ u32 rs = reg_num(args[0]);
+ int is64 = type_is_64(args[0].type);
+ /* t1 = -x */
+ rv64_emit32(mc, rv_sub(RV_T1, RV_ZERO, rs));
+ /* t1 = x & -x */
+ rv64_emit32(mc, rv_and(RV_T1, RV_T1, rs));
+ /* t1 = t1 - 1 */
+ rv64_emit32(mc, rv_addi(RV_T1, RV_T1, -1));
+ if (!is64) {
+ rv64_emit32(mc, rv_slli(RV_T1, RV_T1, 32));
+ rv64_emit32(mc, rv_srli(RV_T1, RV_T1, 32));
+ }
+ /* popcount(t1) into rd */
+ rv64_emit32(mc, rv_srli(RV_T2, RV_T1, 1));
+ rv64_emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x5555555555555555ll
+ : (i64)0x55555555);
+ rv64_emit32(mc, rv_and(RV_T2, RV_T2, RV_T3));
+ rv64_emit32(mc, rv_sub(RV_T1, RV_T1, RV_T2));
+ rv64_emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x3333333333333333ll
+ : (i64)0x33333333);
+ rv64_emit32(mc, rv_and(RV_T2, RV_T1, RV_T3));
+ rv64_emit32(mc, rv_srli(RV_T1, RV_T1, 2));
+ rv64_emit32(mc, rv_and(RV_T1, RV_T1, RV_T3));
+ rv64_emit32(mc, rv_add(RV_T1, RV_T1, RV_T2));
+ rv64_emit32(mc, rv_srli(RV_T2, RV_T1, 4));
+ rv64_emit32(mc, rv_add(RV_T1, RV_T1, RV_T2));
+ rv64_emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x0f0f0f0f0f0f0f0fll
+ : (i64)0x0f0f0f0f);
+ rv64_emit32(mc, rv_and(RV_T1, RV_T1, RV_T3));
+ rv64_emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x0101010101010101ll
+ : (i64)0x01010101);
+ rv64_emit32(mc, rv_mul(RV_T1, RV_T1, RV_T3));
+ rv64_emit32(mc, rv_srli(rd, RV_T1, is64 ? 56u : 24u));
+ return;
+ }
+ case INTRIN_CLZ: {
+ /* Software clz: fold the high bit downward, then popcount the
+ * inverted result. Standard recipe:
+ * x |= x>>1; x |= x>>2; x |= x>>4; x |= x>>8; x |= x>>16;
+ * [x |= x>>32;] // 64-bit
+ * clz = popcount(~x) [for the appropriate width].
+ */
+ u32 rd = reg_num(dsts[0]);
+ u32 rs = reg_num(args[0]);
+ int is64 = type_is_64(args[0].type);
+ rv64_emit32(mc, rv_addi(RV_T1, rs, 0));
+ if (!is64) {
+ /* zero-ext to 32 to make srli safe */
+ rv64_emit32(mc, rv_slli(RV_T1, RV_T1, 32));
+ rv64_emit32(mc, rv_srli(RV_T1, RV_T1, 32));
+ }
+ u32 shifts[6] = {1, 2, 4, 8, 16, 32};
+ u32 ns = is64 ? 6u : 5u;
+ for (u32 i = 0; i < ns; ++i) {
+ rv64_emit32(mc, rv_srli(RV_T2, RV_T1, shifts[i]));
+ rv64_emit32(mc, rv_or(RV_T1, RV_T1, RV_T2));
+ }
+ /* t1 = ~t1, then popcount and we want the (width - popcount) ... wait.
+ * Actually clz(x) for the folded x = popcount(~x). Let me verify.
+ * If x = 0b00011010, fold => 0b00011111. ~ => 0b11100000.
+ * popcount(~folded) = 3 = clz(0b00011010) ✓. */
+ rv64_emit32(mc, rv_xori(RV_T1, RV_T1, -1));
+ if (!is64) {
+ rv64_emit32(mc, rv_slli(RV_T1, RV_T1, 32));
+ rv64_emit32(mc, rv_srli(RV_T1, RV_T1, 32));
+ }
+ /* popcount(t1) into rd */
+ rv64_emit32(mc, rv_srli(RV_T2, RV_T1, 1));
+ rv64_emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x5555555555555555ll
+ : (i64)0x55555555);
+ rv64_emit32(mc, rv_and(RV_T2, RV_T2, RV_T3));
+ rv64_emit32(mc, rv_sub(RV_T1, RV_T1, RV_T2));
+ rv64_emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x3333333333333333ll
+ : (i64)0x33333333);
+ rv64_emit32(mc, rv_and(RV_T2, RV_T1, RV_T3));
+ rv64_emit32(mc, rv_srli(RV_T1, RV_T1, 2));
+ rv64_emit32(mc, rv_and(RV_T1, RV_T1, RV_T3));
+ rv64_emit32(mc, rv_add(RV_T1, RV_T1, RV_T2));
+ rv64_emit32(mc, rv_srli(RV_T2, RV_T1, 4));
+ rv64_emit32(mc, rv_add(RV_T1, RV_T1, RV_T2));
+ rv64_emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x0f0f0f0f0f0f0f0fll
+ : (i64)0x0f0f0f0f);
+ rv64_emit32(mc, rv_and(RV_T1, RV_T1, RV_T3));
+ rv64_emit_load_imm(mc, 1, RV_T3, is64 ? (i64)0x0101010101010101ll
+ : (i64)0x01010101);
+ rv64_emit32(mc, rv_mul(RV_T1, RV_T1, RV_T3));
+ rv64_emit32(mc, rv_srli(rd, RV_T1, is64 ? 56u : 24u));
+ return;
+ }
+ case INTRIN_ADD_OVERFLOW:
+ case INTRIN_SUB_OVERFLOW: {
+ /* dsts: [val, ovf]. Signed overflow check.
+ * For ADD: ovf = ((a XOR result) & (b XOR result)) >> (width-1)
+ * For SUB: ovf = ((a XOR b) & (a XOR result)) >> (width-1) */
+ Operand a_op = args[0], b_op = args[1];
+ Operand dval = dsts[0], dovf = dsts[1];
+ int is64 = type_is_64(dval.type);
+ u32 ra = rv64_force_reg_int(t, a_op, RV_T0);
+ u32 rb = rv64_force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0);
+ u32 rd = reg_num(dval);
+ u32 rovf = reg_num(dovf);
+ /* Compute result into t2 (avoid clobbering rd if rd == ra/rb). */
+ if (kind == INTRIN_ADD_OVERFLOW) {
+ rv64_emit32(mc, is64 ? rv_add(RV_T2, ra, rb) : rv_addw(RV_T2, ra, rb));
+ } else {
+ rv64_emit32(mc, is64 ? rv_sub(RV_T2, ra, rb) : rv_subw(RV_T2, ra, rb));
+ }
+ /* t3 = a XOR t2 */
+ rv64_emit32(mc, rv_xor(RV_T3, ra, RV_T2));
+ if (kind == INTRIN_ADD_OVERFLOW) {
+ /* t4 = b XOR t2 */
+ rv64_emit32(mc, rv_xor(rovf, rb, RV_T2));
+ rv64_emit32(mc, rv_and(rovf, rovf, RV_T3));
+ } else {
+ /* t4 = a XOR b */
+ rv64_emit32(mc, rv_xor(rovf, ra, rb));
+ rv64_emit32(mc, rv_and(rovf, rovf, RV_T3));
+ }
+ /* shift right to extract sign bit */
+ u32 sh = is64 ? 63u : 31u;
+ rv64_emit32(mc, is64 ? rv_srli(rovf, rovf, sh) : rv_srliw(rovf, rovf, sh));
+ rv64_emit32(mc, rv_andi(rovf, rovf, 1));
+ /* Now write the value. */
+ rv64_emit32(mc, rv_addi(rd, RV_T2, 0));
+ return;
+ }
+ case INTRIN_MUL_OVERFLOW: {
+ /* SMULL: full 64-bit signed product of two i32s, then compare
+ * with sign-extend of low 32. For i64 inputs we panic for now. */
+ Operand a_op = args[0], b_op = args[1];
+ Operand dval = dsts[0], dovf = dsts[1];
+ int is64 = type_is_64(dval.type);
+ if (is64) {
+ compiler_panic(t->c, a->loc, "rv64 intrinsic: mul_overflow i64 NYI");
+ }
+ u32 ra = rv64_force_reg_int(t, a_op, RV_T0);
+ u32 rb = rv64_force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0);
+ u32 rd = reg_num(dval);
+ u32 rovf = reg_num(dovf);
+ /* Sign-extend inputs from 32 to 64. */
+ rv64_emit32(mc, rv_addiw(RV_T2, ra, 0));
+ rv64_emit32(mc, rv_addiw(RV_T3, rb, 0));
+ /* Full 64-bit product */
+ rv64_emit32(mc, rv_mul(RV_T2, RV_T2, RV_T3));
+ /* sign-ext of low 32 of product */
+ rv64_emit32(mc, rv_addiw(RV_T3, RV_T2, 0));
+ /* ovf = (T2 != T3) */
+ rv64_emit32(mc, rv_xor(rovf, RV_T2, RV_T3));
+ rv64_emit32(mc, rv_sltu(rovf, RV_ZERO, rovf));
+ /* dval = low 32, sign-extended */
+ rv64_emit32(mc, rv_addiw(rd, RV_T2, 0));
+ return;
+ }
+ case INTRIN_MEMCPY:
+ case INTRIN_MEMMOVE: {
+ Operand da = args[0], sa = args[1], nb = args[2];
+ if (da.kind != OPK_REG || sa.kind != OPK_REG || nb.kind != OPK_IMM) {
+ compiler_panic(t->c, a->loc,
+ "rv64 intrinsic: memcpy/memmove non-const NYI");
+ }
+ u32 dr = reg_num(da), sr = reg_num(sa), n = (u32)nb.v.imm;
+ if (kind == INTRIN_MEMCPY) {
+ u32 i = 0;
+ while (i + 8 <= n) { rv64_emit32(mc, rv_ld(RV_T3, sr, (i32)i)); rv64_emit32(mc, rv_sd(RV_T3, dr, (i32)i)); i += 8; }
+ while (i + 4 <= n) { rv64_emit32(mc, rv_lwu(RV_T3, sr, (i32)i)); rv64_emit32(mc, rv_sw(RV_T3, dr, (i32)i)); i += 4; }
+ while (i + 2 <= n) { rv64_emit32(mc, rv_lhu(RV_T3, sr, (i32)i)); rv64_emit32(mc, rv_sh(RV_T3, dr, (i32)i)); i += 2; }
+ while (i < n) { rv64_emit32(mc, rv_lbu(RV_T3, sr, (i32)i)); rv64_emit32(mc, rv_sb(RV_T3, dr, (i32)i)); i += 1; }
+ } else {
+ u32 i = n;
+ while (i >= 8) { i -= 8; rv64_emit32(mc, rv_ld(RV_T3, sr, (i32)i)); rv64_emit32(mc, rv_sd(RV_T3, dr, (i32)i)); }
+ while (i >= 4) { i -= 4; rv64_emit32(mc, rv_lwu(RV_T3, sr, (i32)i)); rv64_emit32(mc, rv_sw(RV_T3, dr, (i32)i)); }
+ while (i >= 2) { i -= 2; rv64_emit32(mc, rv_lhu(RV_T3, sr, (i32)i)); rv64_emit32(mc, rv_sh(RV_T3, dr, (i32)i)); }
+ while (i >= 1) { i -= 1; rv64_emit32(mc, rv_lbu(RV_T3, sr, (i32)i)); rv64_emit32(mc, rv_sb(RV_T3, dr, (i32)i)); }
+ }
+ return;
+ }
+ case INTRIN_MEMSET: {
+ Operand da = args[0], bv = args[1], nb = args[2];
+ if (da.kind != OPK_REG || nb.kind != OPK_IMM) {
+ compiler_panic(t->c, a->loc, "rv64 intrinsic: memset non-const NYI");
+ }
+ u32 dr = reg_num(da), n = (u32)nb.v.imm;
+ u32 src;
+ if (bv.kind == OPK_IMM) {
+ u32 byte = (u32)(bv.v.imm & 0xffu);
+ if (byte == 0) src = RV_ZERO;
+ else {
+ u64 b = byte; b |= b << 8; b |= b << 16; b |= b << 32;
+ rv64_emit_load_imm(mc, 1, RV_T3, (i64)b);
+ src = RV_T3;
+ }
+ } else {
+ compiler_panic(t->c, a->loc, "rv64 intrinsic: memset REG byte NYI");
+ }
+ u32 i = 0;
+ while (i + 8 <= n) { rv64_emit32(mc, rv_sd(src, dr, (i32)i)); i += 8; }
+ while (i + 4 <= n) { rv64_emit32(mc, rv_sw(src, dr, (i32)i)); i += 4; }
+ while (i + 2 <= n) { rv64_emit32(mc, rv_sh(src, dr, (i32)i)); i += 2; }
+ while (i < n) { rv64_emit32(mc, rv_sb(src, dr, (i32)i)); i += 1; }
+ return;
+ }
+ default:
+ compiler_panic(t->c, a->loc, "rv64 intrinsic kind %d NYI", (int)kind);
+ }
+}
+
+static void rv_asm_block(CGTarget* t, const char* tmpl,
+ const AsmConstraint* outs, u32 no, Operand* oo,
+ const AsmConstraint* ins, u32 ni, const Operand* io,
+ const Sym* clobs, u32 nc) {
+ (void)tmpl; (void)outs; (void)no; (void)oo;
+ (void)ins; (void)ni; (void)io; (void)clobs; (void)nc;
+ rv_panic(t, "asm_block");
+}
+
+static void rv_set_loc(CGTarget* t, SrcLoc l) {
+ ((RImpl*)t)->loc = l;
+ if (t->mc) t->mc->set_loc(t->mc, l);
+}
+
+static void rv_finalize(CGTarget* t) { (void)t; }
+static void rv_destroy(CGTarget* t) { (void)t; }
+
+static void cgt_cleanup(void* arg) { cgtarget_free((CGTarget*)arg); }
+
+CGTarget* rv64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) {
+ RImpl* x = arena_new(c->tu, RImpl);
+ memset(x, 0, sizeof *x);
+
+ CGTarget* t = &x->base;
+ t->c = c;
+ t->obj = o;
+ t->mc = m;
+
+ t->func_begin = rv_func_begin;
+ t->func_end = rv_func_end;
+
+ t->alloc_reg = rv_alloc_reg;
+ t->free_reg = rv_free_reg;
+ t->frame_slot = rv_frame_slot;
+ t->param = rv_param;
+ t->clobbers = rv_clobbers;
+ t->spill_reg = rv_spill_reg;
+ t->reload_reg = rv_reload_reg;
+
+ t->label_new = rv_label_new;
+ t->label_place = rv_label_place;
+ t->jump = rv_jump;
+ t->cmp_branch = rv_cmp_branch;
+
+ t->scope_begin = rv_scope_begin;
+ t->scope_else = rv_scope_else;
+ t->scope_end = rv_scope_end;
+ t->break_to = rv_break_to;
+ t->continue_to = rv_continue_to;
+
+ t->load_imm = rv_load_imm;
+ t->load_const = rv_load_const;
+ t->copy = rv_copy;
+ t->load = rv_load;
+ t->store = rv_store;
+ t->addr_of = rv_addr_of;
+ t->tls_addr_of = rv_tls_addr_of;
+ t->copy_bytes = rv_copy_bytes;
+ t->set_bytes = rv_set_bytes;
+ t->bitfield_load = rv_bitfield_load;
+ t->bitfield_store = rv_bitfield_store;
+
+ t->binop = rv_binop;
+ t->unop = rv_unop;
+ t->cmp = rv_cmp;
+ t->convert = rv_convert;
+
+ t->call = rv_call;
+ t->ret = rv_ret;
+
+ t->alloca_ = rv_alloca_;
+ t->va_start_ = rv_va_start_;
+ t->va_arg_ = rv_va_arg_;
+ t->va_end_ = rv_va_end_;
+ t->va_copy_ = rv_va_copy_;
+
+ t->setjmp_ = NULL;
+ t->longjmp_ = NULL;
+
+ t->atomic_load = rv_atomic_load;
+ t->atomic_store = rv_atomic_store;
+ t->atomic_rmw = rv_atomic_rmw;
+ t->atomic_cas = rv_atomic_cas;
+ t->fence = rv_fence;
+
+ t->intrinsic = rv_intrinsic;
+ t->asm_block = rv_asm_block;
+
+ t->set_loc = rv_set_loc;
+ t->finalize = rv_finalize;
+ t->destroy = rv_destroy;
+
+ (void)type_is_signed;
+ compiler_defer(c, cgt_cleanup, t);
+ return t;
+}
diff --git a/src/arch/x64.c b/src/arch/x64.c
@@ -1,3075 +0,0 @@
-/* Minimal x86_64 (SysV AMD64, Linux ELF) CGTarget.
- *
- * Single-pass codegen mirroring the structure of src/arch/aarch64.c
- * and src/arch/rv64.c. The frame uses rbp as a frame pointer; locals
- * live at negative offsets from rbp, callee-save spills live below
- * the local area at known offsets, and outgoing args sit at sp+0.
- * The prologue is reserved as a NOP-filled placeholder at func_begin
- * and patched at func_end once frame_size and the callee-save high-
- * water mark are known.
- *
- * Reg allocator: lowest-bit-first over a fixed preference list. INT
- * pool has callee-saves (rbx, r12..r15) at the low bits, then a single
- * caller-saved tail (r10) — so the first reg handed out is callee-saved,
- * which is what tests like g11_caller_saved_live_across_call rely on.
- * FP pool is xmm6..xmm15 (10 regs, all caller-saved on SysV).
- *
- * The six SysV arg-passing GPRs (rdi, rsi, rdx, rcx, r8, r9) are
- * deliberately kept OUT of the pool. If they were in the pool, the
- * arg-emit loop in x_call could clobber an arg's source register
- * before reading it: e.g. `mov rdi, [arg1_local]; mov r8d, edi` for
- * arg5 reads the wrong edi. Mirrors aarch64, which keeps x0..x7 out
- * of its allocator pool for the same reason.
- *
- * Scratches kept outside the pools: rax (primary), rcx, rdx, r11
- * (secondary). rax is also the int return reg; xmm0 is the FP return
- * reg.
- *
- * Scope: the test/cg spine (Groups A–H) plus alloca/VLA (Group I) and
- * SysV varargs (Group J). Remaining unimplemented methods past that
- * panic with a clear message — see doc/X64.md. */
-
-#include <string.h>
-
-#include "arch/arch.h"
-#include "arch/x64.h"
-#include "arch/x64_isa.h"
-#include "core/arena.h"
-#include "core/pool.h"
-#include "obj/obj.h"
-#include "type/type.h"
-
-#define X64_PROLOGUE_BYTES 96u
-
-/* ============================================================
- * Custom register pool.
- *
- * Unlike aa64/rv64 the x64 pool is non-contiguous (skipping rax,
- * rcx, rdx, rsp, rbp, r11). So we keep a bitmap over a static
- * preference order rather than a (base, nregs) range. */
-typedef struct XRegPool {
- u32 free; /* bit i set ⇔ alloc_order[i] is free */
- u32 hwm; /* highest index+1 ever allocated */
- const u8* order; /* alloc_order; first n_cs are callee-saved */
- u8 nregs;
- u8 n_cs;
- u8 pad[2];
-} XRegPool;
-
-static void xpool_init(XRegPool* p, const u8* order, u8 nregs, u8 n_cs) {
- p->order = order;
- p->nregs = nregs;
- p->n_cs = n_cs;
- p->hwm = 0;
- p->free = (nregs >= 32u) ? 0xFFFFFFFFu : ((1u << nregs) - 1u);
-}
-
-static Reg xpool_alloc(XRegPool* p) {
- if (p->free == 0) return (Reg)REG_NONE;
- u32 idx = (u32)__builtin_ctz(p->free);
- p->free &= ~(1u << idx);
- if (idx + 1u > p->hwm) p->hwm = idx + 1u;
- return (Reg)p->order[idx];
-}
-
-static int xpool_free(XRegPool* p, Reg r) {
- for (u8 i = 0; i < p->nregs; ++i) {
- if (p->order[i] == (u8)r) {
- u32 bit = 1u << i;
- if (p->free & bit) return -1;
- p->free |= bit;
- return 1;
- }
- }
- return 0;
-}
-
-static const u8 g_int_order[6] = {
- X64_RBX, X64_R12, X64_R13, X64_R14, X64_R15, /* callee-saved (n_cs=5) */
- X64_R10, /* caller-saved tail */
-};
-
-static const u8 g_fp_order[10] = {
- /* All xmm regs are caller-saved on SysV; preference order is xmm6
- * upward to keep the low arg/return regs (xmm0..5) clear for calls. */
- X64_XMM6, X64_XMM7, X64_XMM8, X64_XMM0 + 9, X64_XMM0 + 10,
- X64_XMM0 + 11, X64_XMM0 + 12, X64_XMM0 + 13, X64_XMM0 + 14, X64_XMM15,
-};
-
-static const u32 g_int_arg_regs[6] = {X64_RDI, X64_RSI, X64_RDX,
- X64_RCX, X64_R8, X64_R9};
-
-/* ============================================================
- * XImpl */
-
-typedef struct XSlot {
- u32 off; /* bytes below rbp (positive); address = rbp - off */
- u32 size;
- u32 align;
- u8 kind;
- u8 pad[3];
-} XSlot;
-
-typedef struct XScope {
- u8 kind;
- u8 has_else;
- u8 pad[2];
- MCLabel else_label;
- MCLabel end_label;
- Label break_label;
- Label continue_label;
-} XScope;
-
-/* alloca emits a placeholder `lea dst, [rsp + 0]` whose disp32 is patched
- * at func_end with the final max_outgoing value. disp_pos records the
- * byte offset of that disp32 in the active text section. */
-typedef struct XAllocaPatch {
- u32 disp_pos;
-} XAllocaPatch;
-
-typedef struct XImpl {
- CGTarget base;
- SrcLoc loc;
- const CGFuncDesc* fd;
-
- u32 func_start;
- u32 prologue_pos;
- MCLabel epilogue_label;
-
- XSlot* slots;
- u32 nslots;
- u32 slots_cap;
- u32 cum_off;
- u32 max_outgoing;
-
- u32 next_param_int;
- u32 next_param_fp;
- u32 next_param_stack;
- u8 has_sret;
- u8 has_alloca;
- u8 is_variadic;
- u8 pad0;
- FrameSlot sret_ptr_slot;
- FrameSlot reg_save_slot; /* variadic: 176-byte __va_list_tag reg save area */
-
- XRegPool int_pool;
- XRegPool fp_pool;
-
- XScope* scopes;
- u32 nscopes;
- u32 scopes_cap;
-
- XAllocaPatch* alloca_patches;
- u32 nalloca_patches;
- u32 alloca_patches_cap;
-} XImpl;
-
-static XImpl* impl_of(CGTarget* t) { return (XImpl*)t; }
-
-/* Forward declarations. */
-static FrameSlot x_frame_slot(CGTarget* t, const FrameSlotDesc* d);
-static XSlot* slot_get(XImpl* a, FrameSlot fs);
-static void x_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma);
-static void x_store(CGTarget* t, Operand addr, Operand src, MemAccess ma);
-static void x_free_reg(CGTarget* t, Reg r, RegClass cls);
-
-extern void debug_emit_row(Debug*, ObjSecId text_section, u32 offset, SrcLoc);
-
-/* ---- type helpers ---- */
-static int type_is_64(const Type* t) {
- if (!t) return 0;
- switch (t->kind) {
- case TY_LONG:
- case TY_ULONG:
- case TY_LLONG:
- case TY_ULLONG:
- case TY_PTR:
- case TY_DOUBLE:
- return 1;
- default:
- return 0;
- }
-}
-static int type_is_fp_double(const Type* t) {
- return t && (t->kind == TY_DOUBLE || t->kind == TY_LDOUBLE);
-}
-static u32 type_byte_size(const Type* t) {
- if (!t) return 4;
- switch (t->kind) {
- case TY_CHAR:
- case TY_SCHAR:
- case TY_UCHAR:
- case TY_BOOL:
- return 1;
- case TY_SHORT:
- case TY_USHORT:
- return 2;
- case TY_INT:
- case TY_UINT:
- case TY_FLOAT:
- return 4;
- case TY_LONG:
- case TY_ULONG:
- case TY_LLONG:
- case TY_ULLONG:
- case TY_PTR:
- case TY_DOUBLE:
- return 8;
- default:
- return 8;
- }
-}
-static int type_is_signed(const Type* t) {
- if (!t) return 0;
- switch (t->kind) {
- case TY_CHAR:
- case TY_SCHAR:
- case TY_SHORT:
- case TY_INT:
- case TY_LONG:
- case TY_LLONG:
- return 1;
- default:
- return 0;
- }
-}
-
-static _Noreturn void x_panic(CGTarget* t, const char* what) {
- SrcLoc loc = impl_of(t)->loc;
- compiler_panic(t->c, loc, "x64: %s not implemented", what);
-}
-
-/* ============================================================
- * Byte-level emit helpers.
- *
- * x64 instructions are variable length: optional legacy prefix(es),
- * optional REX, 1-3 byte opcode, ModR/M, optional SIB, optional
- * displacement, optional immediate. Helpers below build sequences
- * into the active MCEmitter section, recording one Debug row per
- * instruction-start. */
-static void emit1(MCEmitter* mc, u8 b) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- mc->emit_bytes(mc, &b, 1);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-static void emit_u32le(MCEmitter* mc, u32 v) {
- u8 b[4];
- b[0] = (u8)v;
- b[1] = (u8)(v >> 8);
- b[2] = (u8)(v >> 16);
- b[3] = (u8)(v >> 24);
- mc->emit_bytes(mc, b, 4);
-}
-static void emit_u64le(MCEmitter* mc, u64 v) {
- u8 b[8];
- for (int i = 0; i < 8; ++i) b[i] = (u8)(v >> (i * 8));
- mc->emit_bytes(mc, b, 8);
-}
-
-static u8 make_rex(int w, u32 reg, u32 index, u32 rm) {
- u8 r = 0;
- if (w) r |= X64_REX_W;
- if (reg & 8) r |= X64_REX_R;
- if (index & 8) r |= X64_REX_X;
- if (rm & 8) r |= X64_REX_B;
- return r ? (u8)(X64_REX_BASE | r) : 0;
-}
-static void emit_rex(MCEmitter* mc, int w, u32 reg, u32 index, u32 rm) {
- u8 r = make_rex(w, reg, index, rm);
- if (r) mc->emit_bytes(mc, &r, 1);
-}
-/* Force REX (even REX=0x40) — required for byte-reg encodings that
- * promote SIL/DIL/etc. */
-static void emit_rex_force(MCEmitter* mc, int w, u32 reg, u32 index, u32 rm) {
- u8 r = (u8)(X64_REX_BASE | (w ? X64_REX_W : 0) | ((reg & 8) ? X64_REX_R : 0) |
- ((index & 8) ? X64_REX_X : 0) | ((rm & 8) ? X64_REX_B : 0));
- mc->emit_bytes(mc, &r, 1);
-}
-
-static u8 modrm(u32 mod, u32 reg, u32 rm) {
- return (u8)(((mod & 3u) << 6) | ((reg & 7u) << 3) | (rm & 7u));
-}
-static u8 sib(u32 scale, u32 index, u32 base) {
- return (u8)(((scale & 3u) << 6) | ((index & 7u) << 3) | (base & 7u));
-}
-
-static u32 disp_mod(u32 base, i32 disp) {
- if (disp == 0 && (base & 7u) != 5u) return 0u; /* [base] */
- if (disp >= -128 && disp <= 127) return 1u; /* [base + disp8] */
- return 2u; /* [base + disp32] */
-}
-
-static void emit_mem_operand(MCEmitter* mc, u32 reg, u32 base, i32 disp) {
- u32 m = disp_mod(base, disp);
- if ((base & 7u) == 4u) {
- /* SIB byte required: index=4 (none), base=base. */
- u8 mr = modrm(m, reg, 4u);
- mc->emit_bytes(mc, &mr, 1);
- u8 s = sib(0, 4u, base);
- mc->emit_bytes(mc, &s, 1);
- } else {
- u8 mr = modrm(m, reg, base);
- mc->emit_bytes(mc, &mr, 1);
- }
- if (m == 1u) {
- u8 d = (u8)(i8)disp;
- mc->emit_bytes(mc, &d, 1);
- } else if (m == 2u) {
- emit_u32le(mc, (u32)disp);
- }
-}
-static void emit_rm_reg(MCEmitter* mc, u32 reg, u32 rm) {
- u8 mr = modrm(3u, reg, rm);
- mc->emit_bytes(mc, &mr, 1);
-}
-
-/* ---- specific instruction emitters ---- */
-
-/* mov rd, rs (64-bit if w, else 32-bit). */
-static void emit_mov_rr(MCEmitter* mc, int w, u32 dst, u32 src) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- emit_rex(mc, w, src, 0, dst);
- u8 op = 0x89; /* MOV r/m, r */
- mc->emit_bytes(mc, &op, 1);
- emit_rm_reg(mc, src, dst);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-/* mov reg, [base + disp]; size 1/2/4/8. */
-static void emit_mov_load(MCEmitter* mc, u32 size, int signed_ext, u32 dst,
- u32 base, i32 disp) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- if (size == 8) {
- emit_rex(mc, 1, dst, 0, base);
- u8 op = 0x8B;
- mc->emit_bytes(mc, &op, 1);
- emit_mem_operand(mc, dst, base, disp);
- } else if (size == 4) {
- emit_rex(mc, 0, dst, 0, base);
- u8 op = 0x8B;
- mc->emit_bytes(mc, &op, 1);
- emit_mem_operand(mc, dst, base, disp);
- } else if (size == 2) {
- emit_rex(mc, 0, dst, 0, base);
- u8 op[2] = {0x0F, signed_ext ? 0xBF : 0xB7};
- mc->emit_bytes(mc, op, 2);
- emit_mem_operand(mc, dst, base, disp);
- } else if (size == 1) {
- emit_rex(mc, 0, dst, 0, base);
- u8 op[2] = {0x0F, signed_ext ? 0xBE : 0xB6};
- mc->emit_bytes(mc, op, 2);
- emit_mem_operand(mc, dst, base, disp);
- }
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-/* mov [base + disp], src; size 1/2/4/8. */
-static void emit_mov_store(MCEmitter* mc, u32 size, u32 src, u32 base,
- i32 disp) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- if (size == 8) {
- emit_rex(mc, 1, src, 0, base);
- u8 op = 0x89;
- mc->emit_bytes(mc, &op, 1);
- emit_mem_operand(mc, src, base, disp);
- } else if (size == 4) {
- emit_rex(mc, 0, src, 0, base);
- u8 op = 0x89;
- mc->emit_bytes(mc, &op, 1);
- emit_mem_operand(mc, src, base, disp);
- } else if (size == 2) {
- u8 p = 0x66;
- mc->emit_bytes(mc, &p, 1);
- emit_rex(mc, 0, src, 0, base);
- u8 op = 0x89;
- mc->emit_bytes(mc, &op, 1);
- emit_mem_operand(mc, src, base, disp);
- } else if (size == 1) {
- /* Force REX so SIL/DIL/etc are addressable as byte regs. */
- emit_rex_force(mc, 0, src, 0, base);
- u8 op = 0x88;
- mc->emit_bytes(mc, &op, 1);
- emit_mem_operand(mc, src, base, disp);
- }
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-static void emit_lea(MCEmitter* mc, u32 dst, u32 base, i32 disp) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- emit_rex(mc, 1, dst, 0, base);
- u8 op = 0x8D;
- mc->emit_bytes(mc, &op, 1);
- emit_mem_operand(mc, dst, base, disp);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-/* movabs reg, imm64 (REX.W + B8+r imm64) for is64; mov r32, imm32 (B8+r
- * imm32) for !is64. Both 10/5 bytes. */
-static void emit_load_imm(MCEmitter* mc, int is64, u32 dst, i64 imm) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- if (is64) {
- emit_rex(mc, 1, 0, 0, dst);
- u8 op = (u8)(0xB8 | (dst & 7));
- mc->emit_bytes(mc, &op, 1);
- emit_u64le(mc, (u64)imm);
- } else {
- emit_rex(mc, 0, 0, 0, dst);
- u8 op = (u8)(0xB8 | (dst & 7));
- mc->emit_bytes(mc, &op, 1);
- emit_u32le(mc, (u32)imm);
- }
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-/* Two-operand ALU r/m, r. op picks ADD(01)/SUB(29)/AND(21)/OR(09)/XOR(31)/
- * CMP(39)/MOV(89)/TEST(85). */
-static void emit_alu_rr(MCEmitter* mc, int w, u8 op, u32 dst, u32 src) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- emit_rex(mc, w, src, 0, dst);
- mc->emit_bytes(mc, &op, 1);
- emit_rm_reg(mc, src, dst);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-static void emit_imul_rr(MCEmitter* mc, int w, u32 dst, u32 src) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- emit_rex(mc, w, dst, 0, src);
- u8 op[2] = {0x0F, 0xAF};
- mc->emit_bytes(mc, op, 2);
- emit_rm_reg(mc, dst, src);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-static void emit_f7_rm(MCEmitter* mc, int w, u32 sub, u32 reg) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- emit_rex(mc, w, 0, 0, reg);
- u8 op = 0xF7;
- mc->emit_bytes(mc, &op, 1);
- emit_rm_reg(mc, sub, reg);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-static void emit_shift_cl(MCEmitter* mc, int w, u32 sub, u32 reg) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- emit_rex(mc, w, 0, 0, reg);
- u8 op = 0xD3;
- mc->emit_bytes(mc, &op, 1);
- emit_rm_reg(mc, sub, reg);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-/* Shift r/m by imm8: opcode C1 /sub ib. sub: SHL=4, SHR=5, SAR=7. */
-static void emit_shift_imm(MCEmitter* mc, int w, u32 sub, u32 reg, u8 imm) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- emit_rex(mc, w, 0, 0, reg);
- u8 buf[3];
- buf[0] = 0xC1;
- buf[1] = modrm(3u, sub, reg);
- buf[2] = imm;
- mc->emit_bytes(mc, buf, 3);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-static void emit_cqo_or_cdq(MCEmitter* mc, int w) {
- if (w) {
- u8 buf[2] = {X64_REX_BASE | X64_REX_W, 0x99};
- mc->emit_bytes(mc, buf, 2);
- } else {
- u8 op = 0x99;
- mc->emit_bytes(mc, &op, 1);
- }
-}
-
-static void emit_xor_self(MCEmitter* mc, int w, u32 r) {
- emit_alu_rr(mc, w, 0x31, r, r);
-}
-
-/* cmp r/m, imm8 (0x83 /7). */
-static void emit_cmp_imm8(MCEmitter* mc, int w, u32 reg, i8 imm) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- emit_rex(mc, w, 0, 0, reg);
- u8 buf[3];
- buf[0] = 0x83;
- buf[1] = modrm(3u, 7u, reg);
- buf[2] = (u8)imm;
- mc->emit_bytes(mc, buf, 3);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-/* ALU r/m, imm8: opcode 0x83 /sub ib (sign-extended). sub: ADD=0,
- * OR=1, ADC=2, SBB=3, AND=4, SUB=5, XOR=6, CMP=7. */
-static void emit_alu_imm8(MCEmitter* mc, int w, u32 sub, u32 reg, i8 imm) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- emit_rex(mc, w, 0, 0, reg);
- u8 buf[3];
- buf[0] = 0x83;
- buf[1] = modrm(3u, sub, reg);
- buf[2] = (u8)imm;
- mc->emit_bytes(mc, buf, 3);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-/* ALU r/m, imm32: opcode 0x81 /sub id (sign-extended for w=1). */
-static void emit_alu_imm32(MCEmitter* mc, int w, u32 sub, u32 reg, i32 imm) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- emit_rex(mc, w, 0, 0, reg);
- u8 buf[6];
- buf[0] = 0x81;
- buf[1] = modrm(3u, sub, reg);
- buf[2] = (u8)(imm & 0xFF);
- buf[3] = (u8)((imm >> 8) & 0xFF);
- buf[4] = (u8)((imm >> 16) & 0xFF);
- buf[5] = (u8)((imm >> 24) & 0xFF);
- mc->emit_bytes(mc, buf, 6);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-/* IMUL r, r/m, imm: 0x6B /r ib (imm8 sext) or 0x69 /r id (imm32 sext).
- * Both forms write the result back to the same `dst` register so the
- * caller doesn't need an explicit copy beforehand — unlike the ALU
- * forms which read-modify-write a single operand. */
-static void emit_imul_imm8(MCEmitter* mc, int w, u32 dst, u32 src, i8 imm) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- emit_rex(mc, w, dst, 0, src);
- u8 buf[3];
- buf[0] = 0x6B;
- buf[1] = modrm(3u, dst, src);
- buf[2] = (u8)imm;
- mc->emit_bytes(mc, buf, 3);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-static void emit_imul_imm32(MCEmitter* mc, int w, u32 dst, u32 src, i32 imm) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- emit_rex(mc, w, dst, 0, src);
- u8 buf[6];
- buf[0] = 0x69;
- buf[1] = modrm(3u, dst, src);
- buf[2] = (u8)(imm & 0xFF);
- buf[3] = (u8)((imm >> 8) & 0xFF);
- buf[4] = (u8)((imm >> 16) & 0xFF);
- buf[5] = (u8)((imm >> 24) & 0xFF);
- mc->emit_bytes(mc, buf, 6);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-/* Width predicate: does `imm` fit in an i8 (used by the 0x83/0x6B
- * imm8-sign-extended forms)? */
-static int imm_fits_i8(i64 imm) { return imm >= -128 && imm <= 127; }
-/* Width predicate: does `imm` fit in a signed 32-bit value (the 0x81/
- * 0x69 imm32-sign-extended forms; for w=1 the imm is sign-extended to
- * 64). Returns 0 for values outside [INT32_MIN, INT32_MAX] — those
- * require a full materialization through emit_load_imm. */
-static int imm_fits_i32(i64 imm) {
- return imm >= -2147483648LL && imm <= 2147483647LL;
-}
-
-static void emit_test_self(MCEmitter* mc, int w, u32 reg) {
- emit_alu_rr(mc, w, 0x85, reg, reg);
-}
-
-static void emit_setcc(MCEmitter* mc, u32 cc, u32 reg) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- emit_rex_force(mc, 0, 0, 0, reg);
- u8 op[2] = {0x0F, (u8)(0x90 | (cc & 0xF))};
- mc->emit_bytes(mc, op, 2);
- emit_rm_reg(mc, 0u, reg);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-static void emit_movzx_r32_r8(MCEmitter* mc, u32 dst, u32 src) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- emit_rex_force(mc, 0, dst, 0, src);
- u8 op[2] = {0x0F, 0xB6};
- mc->emit_bytes(mc, op, 2);
- emit_rm_reg(mc, dst, src);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-/* movzx/movsx r→r. src_size is source byte width. */
-static void emit_extend_rr(MCEmitter* mc, int w, int signed_ext, u32 src_size,
- u32 dst, u32 src) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- if (src_size == 4 && signed_ext) {
- /* movsxd r64, r32: REX.W 0x63 ModRM */
- emit_rex(mc, 1, dst, 0, src);
- u8 op = 0x63;
- mc->emit_bytes(mc, &op, 1);
- emit_rm_reg(mc, dst, src);
- } else if (src_size == 4 && !signed_ext) {
- /* zext 32→64 is `mov r32, r32` (clears high 32). */
- emit_rex(mc, 0, src, 0, dst);
- u8 op = 0x89;
- mc->emit_bytes(mc, &op, 1);
- emit_rm_reg(mc, src, dst);
- } else if (src_size == 1) {
- emit_rex_force(mc, w, dst, 0, src);
- u8 op[2] = {0x0F, signed_ext ? 0xBE : 0xB6};
- mc->emit_bytes(mc, op, 2);
- emit_rm_reg(mc, dst, src);
- } else if (src_size == 2) {
- emit_rex(mc, w, dst, 0, src);
- u8 op[2] = {0x0F, signed_ext ? 0xBF : 0xB7};
- mc->emit_bytes(mc, op, 2);
- emit_rm_reg(mc, dst, src);
- }
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-static void emit_ret(MCEmitter* mc) {
- u8 op = 0xC3;
- mc->emit_bytes(mc, &op, 1);
-}
-static void emit_leave(MCEmitter* mc) {
- u8 op = 0xC9;
- mc->emit_bytes(mc, &op, 1);
-}
-
-/* ---- SSE scalar FP encoders ---- */
-static void emit_sse_rr(MCEmitter* mc, u8 prefix, u8 opcode, u32 dst, u32 src) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- if (prefix) mc->emit_bytes(mc, &prefix, 1);
- emit_rex(mc, 0, dst, 0, src);
- u8 op[2] = {0x0F, opcode};
- mc->emit_bytes(mc, op, 2);
- emit_rm_reg(mc, dst, src);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-static void emit_sse_load(MCEmitter* mc, u8 prefix, u8 opcode, u32 dst,
- u32 base, i32 disp) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- if (prefix) mc->emit_bytes(mc, &prefix, 1);
- emit_rex(mc, 0, dst, 0, base);
- u8 op[2] = {0x0F, opcode};
- mc->emit_bytes(mc, op, 2);
- emit_mem_operand(mc, dst, base, disp);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-static void emit_sse_store(MCEmitter* mc, u8 prefix, u8 opcode, u32 src,
- u32 base, i32 disp) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- if (prefix) mc->emit_bytes(mc, &prefix, 1);
- emit_rex(mc, 0, src, 0, base);
- u8 op[2] = {0x0F, opcode};
- mc->emit_bytes(mc, op, 2);
- emit_mem_operand(mc, src, base, disp);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-static void emit_sse_rr_w(MCEmitter* mc, u8 prefix, u8 opcode, int w, u32 dst,
- u32 src) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- if (prefix) mc->emit_bytes(mc, &prefix, 1);
- emit_rex(mc, w, dst, 0, src);
- u8 op[2] = {0x0F, opcode};
- mc->emit_bytes(mc, op, 2);
- emit_rm_reg(mc, dst, src);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-/* ============================================================
- * Function lifecycle */
-
-static void x_func_begin(CGTarget* t, const CGFuncDesc* fd) {
- XImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
-
- mc->set_section(mc, fd->text_section_id);
- mc->emit_align(mc, 16, 0x90);
-
- a->fd = fd;
- a->func_start = mc->pos(mc);
- a->next_param_int = 0;
- a->next_param_fp = 0;
- a->next_param_stack = 0;
- a->has_sret = (fd->abi && fd->abi->has_sret) ? 1 : 0;
- a->has_alloca = 0;
- a->is_variadic = (fd->abi && fd->abi->variadic) ? 1 : 0;
- a->cum_off = 0;
- a->max_outgoing = 0;
- xpool_init(&a->int_pool, g_int_order, 6u, 5u);
- xpool_init(&a->fp_pool, g_fp_order, 10u, 0u);
- a->nslots = 0;
- a->nscopes = 0;
- a->nalloca_patches = 0;
- a->sret_ptr_slot = FRAME_SLOT_NONE;
- a->reg_save_slot = FRAME_SLOT_NONE;
- a->epilogue_label = mc->label_new(mc);
-
- mc->cfi_startproc(mc);
-
- /* Reserve a fixed-size prologue placeholder filled with NOPs. */
- a->prologue_pos = mc->pos(mc);
- for (u32 i = 0; i < X64_PROLOGUE_BYTES; ++i) emit1(mc, 0x90);
-
- /* sret: rdi at entry holds the destination pointer. Spill it to a
- * hidden slot so the body can use rdi freely. */
- if (a->has_sret) {
- FrameSlotDesc fsd = {
- .type = NULL, .name = 0, .loc = {0, 0, 0},
- .size = 8, .align = 8, .kind = FS_SPILL, .flags = 0,
- };
- a->sret_ptr_slot = x_frame_slot(t, &fsd);
- /* Subsequent int args start at rsi (next_param_int = 1). */
- a->next_param_int = 1;
- }
-
- /* Variadic: reserve the SysV reg-save area (rdi..r9 at +0..+40, then
- * xmm0..xmm7 at +48..+160 with 16-byte stride) and emit the saves
- * directly after the prologue placeholder so the original register
- * args are preserved before x_param() spills the named ones. */
- if (a->is_variadic) {
- FrameSlotDesc rsd = {
- .type = NULL, .name = 0, .loc = {0, 0, 0},
- .size = 176, .align = 8, .kind = FS_SPILL, .flags = 0,
- };
- a->reg_save_slot = x_frame_slot(t, &rsd);
- XSlot* rs = slot_get(a, a->reg_save_slot);
- static const u32 gprs[6] = {X64_RDI, X64_RSI, X64_RDX,
- X64_RCX, X64_R8, X64_R9};
- for (u32 i = 0; i < 6; ++i) {
- emit_mov_store(mc, 8, gprs[i], X64_RBP,
- -(i32)rs->off + (i32)(i * 8u));
- }
- /* movsd writes the low 8 bytes of each xmm; va_arg reads 8 bytes per
- * FP slot, so the upper half of the 16-byte stride stays unused. */
- for (u32 i = 0; i < 8; ++i) {
- emit_sse_store(mc, 0xF2, 0x11, (u32)(X64_XMM0 + i), X64_RBP,
- -(i32)rs->off + (i32)(48u + i * 16u));
- }
- }
-}
-
-static u32 align_up_u32(u32 v, u32 a) { return (v + (a - 1u)) & ~(a - 1u); }
-
-static void x_func_end(CGTarget* t) {
- XImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
-
- u32 cs_used = a->int_pool.hwm;
- if (cs_used > a->int_pool.n_cs) cs_used = a->int_pool.n_cs;
- u32 cs_size = cs_used * 8u;
-
- /* Stack alignment: SysV requires rsp ≡ 0 mod 16 just before a call,
- * which means rsp ≡ 8 mod 16 inside the function (after the return
- * address is pushed). On entry, rsp ≡ 8 mod 16; after `push rbp` it
- * is 0 mod 16; after `sub rsp, frame_size` we need it back to 0
- * mod 16, so frame_size must be a multiple of 16. */
- u32 raw = a->max_outgoing + cs_size + a->cum_off;
- u32 frame_size = align_up_u32(raw, 16u);
- if (frame_size == 0) frame_size = 16;
-
- mc->label_place(mc, a->epilogue_label);
-
- /* Restore callee-saves. Each at rbp - (cum_off + (i+1)*8). */
- for (i32 i = (i32)cs_used - 1; i >= 0; --i) {
- u32 reg = a->int_pool.order[i];
- i32 off = -(i32)a->cum_off - (i32)(i + 1) * 8;
- emit_mov_load(mc, /*size=*/8, /*signed=*/0, reg, X64_RBP, off);
- }
-
- /* leave; ret. */
- emit_leave(mc);
- emit_ret(mc);
-
- /* Patch prologue placeholder. */
- u8 buf[X64_PROLOGUE_BYTES];
- for (u32 i = 0; i < X64_PROLOGUE_BYTES; ++i) buf[i] = 0x90;
- u32 wi = 0;
-
- /* push rbp (1 byte). */
- buf[wi++] = 0x55;
- /* mov rbp, rsp: REX.W 89 E5. */
- buf[wi++] = X64_REX_BASE | X64_REX_W;
- buf[wi++] = 0x89;
- buf[wi++] = modrm(3u, X64_RSP, X64_RBP);
- /* sub rsp, frame_size: REX.W 81 /5 imm32 = 7 bytes. */
- buf[wi++] = X64_REX_BASE | X64_REX_W;
- buf[wi++] = 0x81;
- buf[wi++] = modrm(3u, 5u, X64_RSP);
- buf[wi++] = (u8)frame_size;
- buf[wi++] = (u8)(frame_size >> 8);
- buf[wi++] = (u8)(frame_size >> 16);
- buf[wi++] = (u8)(frame_size >> 24);
-
- /* sret: mov [rbp + disp32], rdi. */
- if (a->has_sret && a->sret_ptr_slot != FRAME_SLOT_NONE) {
- XSlot* s = slot_get(a, a->sret_ptr_slot);
- if (s) {
- i32 off = -(i32)s->off;
- if (wi + 7 > X64_PROLOGUE_BYTES) goto overflow;
- buf[wi++] = X64_REX_BASE | X64_REX_W;
- buf[wi++] = 0x89;
- buf[wi++] = modrm(2u, X64_RDI, X64_RBP);
- buf[wi++] = (u8)off;
- buf[wi++] = (u8)(off >> 8);
- buf[wi++] = (u8)(off >> 16);
- buf[wi++] = (u8)(off >> 24);
- }
- }
-
- /* Spill callee-saves. */
- for (u32 i = 0; i < cs_used; ++i) {
- u32 reg = a->int_pool.order[i];
- i32 off = -(i32)a->cum_off - (i32)(i + 1) * 8;
- if (wi + 7 > X64_PROLOGUE_BYTES) goto overflow;
- buf[wi++] = (u8)(X64_REX_BASE | X64_REX_W | ((reg & 8) ? X64_REX_R : 0));
- buf[wi++] = 0x89;
- buf[wi++] = modrm(2u, (reg & 7u), X64_RBP);
- buf[wi++] = (u8)off;
- buf[wi++] = (u8)(off >> 8);
- buf[wi++] = (u8)(off >> 16);
- buf[wi++] = (u8)(off >> 24);
- }
-
- if (0) {
- overflow:
- compiler_panic(t->c, a->loc,
- "x64: prologue placeholder overflow (%u of %u bytes)", wi,
- X64_PROLOGUE_BYTES);
- }
- obj_patch(t->obj, a->fd->text_section_id, a->prologue_pos, buf,
- X64_PROLOGUE_BYTES);
-
- /* Patch each alloca's `lea dst, [rsp + 0]` disp32 with the final
- * max_outgoing (already 16-aligned via the `(stack_off+15)&~15` round
- * at every call site). */
- for (u32 i = 0; i < a->nalloca_patches; ++i) {
- u8 dbuf[4];
- u32 m = a->max_outgoing;
- dbuf[0] = (u8)m;
- dbuf[1] = (u8)(m >> 8);
- dbuf[2] = (u8)(m >> 16);
- dbuf[3] = (u8)(m >> 24);
- obj_patch(t->obj, a->fd->text_section_id,
- a->alloca_patches[i].disp_pos, dbuf, 4);
- }
-
- /* Define the function symbol. */
- u32 end = mc->pos(mc);
- obj_symbol_define(t->obj, a->fd->sym, a->fd->text_section_id,
- (u64)a->func_start, (u64)(end - a->func_start));
-
- mc->cfi_endproc(mc);
- a->fd = NULL;
-}
-
-/* ============================================================
- * Registers / frame */
-
-static Reg x_alloc_reg(CGTarget* t, RegClass cls, const Type* ty) {
- XImpl* a = impl_of(t);
- (void)ty;
- if (cls == RC_INT) return xpool_alloc(&a->int_pool);
- if (cls == RC_FP) return xpool_alloc(&a->fp_pool);
- compiler_panic(t->c, a->loc, "x64 alloc_reg: class %d unimpl", (int)cls);
-}
-
-static void x_free_reg(CGTarget* t, Reg r, RegClass cls) {
- XImpl* a = impl_of(t);
- XRegPool* p = (cls == RC_FP) ? &a->fp_pool : &a->int_pool;
- int rc = xpool_free(p, r);
- if (rc == 1) return;
- if (rc == -1) {
- compiler_panic(t->c, a->loc, "x64 free_reg: reg %u already free",
- (unsigned)r);
- }
- compiler_panic(t->c, a->loc, "x64 free_reg: reg %u not in %s pool",
- (unsigned)r, cls == RC_FP ? "fp" : "int");
-}
-
-static FrameSlot x_frame_slot(CGTarget* t, const FrameSlotDesc* d) {
- XImpl* a = impl_of(t);
- if (a->nslots == a->slots_cap) {
- u32 ncap = a->slots_cap ? a->slots_cap * 2 : 8;
- XSlot* nbuf = arena_array(t->c->tu, XSlot, ncap);
- if (a->slots) memcpy(nbuf, a->slots, sizeof(XSlot) * a->nslots);
- a->slots = nbuf;
- a->slots_cap = ncap;
- }
- u32 size = d->size ? d->size : 8;
- u32 align = d->align ? d->align : 1;
- u32 next = a->cum_off + size;
- u32 mask = align - 1u;
- next = (next + mask) & ~mask;
- XSlot* s = &a->slots[a->nslots];
- s->off = next;
- s->size = size;
- s->align = align;
- s->kind = d->kind;
- a->cum_off = next;
- a->nslots++;
- return (FrameSlot)(a->nslots);
-}
-
-static XSlot* slot_get(XImpl* a, FrameSlot fs) {
- if (fs == FRAME_SLOT_NONE || fs > a->nslots) return NULL;
- return &a->slots[fs - 1];
-}
-
-/* ---- param: store incoming arg(s) into the home slot ---- */
-static void x_param(CGTarget* t, const CGParamDesc* p) {
- XImpl* a = impl_of(t);
- XSlot* s = slot_get(a, p->slot);
- if (!s) compiler_panic(t->c, a->loc, "x64 param: bad slot");
- const ABIArgInfo* ai = p->abi;
-
- if (ai->kind == ABI_ARG_IGNORE) return;
- if (ai->kind == ABI_ARG_INDIRECT) {
- /* Incoming pointer to byval copy: load pointer, memcpy into slot. */
- u32 ptr_reg;
- if (a->next_param_int < 6) {
- ptr_reg = g_int_arg_regs[a->next_param_int++];
- } else {
- u32 caller_off = a->next_param_stack;
- a->next_param_stack += 8;
- emit_mov_load(t->mc, 8, 0, X64_R11, X64_RBP, (i32)(16 + caller_off));
- ptr_reg = X64_R11;
- }
- u32 nbytes = s->size;
- u32 i = 0;
- while (i + 8 <= nbytes) {
- emit_mov_load(t->mc, 8, 0, X64_RAX, ptr_reg, (i32)i);
- emit_mov_store(t->mc, 8, X64_RAX, X64_RBP, -(i32)s->off + (i32)i);
- i += 8;
- }
- while (i + 4 <= nbytes) {
- emit_mov_load(t->mc, 4, 0, X64_RAX, ptr_reg, (i32)i);
- emit_mov_store(t->mc, 4, X64_RAX, X64_RBP, -(i32)s->off + (i32)i);
- i += 4;
- }
- while (i + 2 <= nbytes) {
- emit_mov_load(t->mc, 2, 0, X64_RAX, ptr_reg, (i32)i);
- emit_mov_store(t->mc, 2, X64_RAX, X64_RBP, -(i32)s->off + (i32)i);
- i += 2;
- }
- while (i < nbytes) {
- emit_mov_load(t->mc, 1, 0, X64_RAX, ptr_reg, (i32)i);
- emit_mov_store(t->mc, 1, X64_RAX, X64_RBP, -(i32)s->off + (i32)i);
- i += 1;
- }
- return;
- }
- /* DIRECT */
- for (u16 i = 0; i < ai->nparts; ++i) {
- const ABIArgPart* pt = &ai->parts[i];
- u32 part_off = pt->src_offset;
- u32 sz = pt->size;
- if (pt->cls == ABI_CLASS_INT) {
- if (a->next_param_int < 6) {
- u32 reg = g_int_arg_regs[a->next_param_int++];
- emit_mov_store(t->mc, sz, reg, X64_RBP,
- -(i32)s->off + (i32)part_off);
- } else {
- u32 caller_off = a->next_param_stack;
- a->next_param_stack += 8;
- emit_mov_load(t->mc, sz, 0, X64_RAX, X64_RBP,
- (i32)(16 + caller_off));
- emit_mov_store(t->mc, sz, X64_RAX, X64_RBP,
- -(i32)s->off + (i32)part_off);
- }
- } else if (pt->cls == ABI_CLASS_FP) {
- if (a->next_param_fp < 8) {
- u32 xmm = a->next_param_fp++;
- u8 prefix = (sz == 8) ? 0xF2 : 0xF3;
- emit_sse_store(t->mc, prefix, 0x11, xmm, X64_RBP,
- -(i32)s->off + (i32)part_off);
- } else {
- u32 caller_off = a->next_param_stack;
- a->next_param_stack += 8;
- u8 prefix = (sz == 8) ? 0xF2 : 0xF3;
- emit_sse_load(t->mc, prefix, 0x10, X64_XMM0, X64_RBP,
- (i32)(16 + caller_off));
- emit_sse_store(t->mc, prefix, 0x11, X64_XMM0, X64_RBP,
- -(i32)s->off + (i32)part_off);
- }
- } else {
- compiler_panic(t->c, a->loc, "x64 param: ABI class %d unimpl",
- (int)pt->cls);
- }
- }
-}
-
-static const Reg* x_clobbers(CGTarget* t, RegClass c, u32* n) {
- (void)c;
- (void)n;
- x_panic(t, "clobbers");
-}
-static void x_spill_reg(CGTarget* t, Operand src, FrameSlot slot,
- MemAccess ma) {
- XImpl* a = impl_of(t);
- if (src.kind != OPK_REG)
- compiler_panic(t->c, a->loc, "x64 spill_reg: src is not OPK_REG");
- Operand addr;
- memset(&addr, 0, sizeof addr);
- addr.kind = OPK_LOCAL;
- addr.cls = RC_INT;
- addr.type = ma.type;
- addr.v.frame_slot = slot;
- x_store(t, addr, src, ma);
- x_free_reg(t, src.v.reg, src.cls);
-}
-
-static void x_reload_reg(CGTarget* t, Operand dst, FrameSlot slot,
- MemAccess ma) {
- XImpl* a = impl_of(t);
- if (dst.kind != OPK_REG)
- compiler_panic(t->c, a->loc, "x64 reload_reg: dst is not OPK_REG");
- Operand addr;
- memset(&addr, 0, sizeof addr);
- addr.kind = OPK_LOCAL;
- addr.cls = RC_INT;
- addr.type = ma.type;
- addr.v.frame_slot = slot;
- x_load(t, dst, addr, ma);
-}
-
-/* ============================================================
- * Labels / control flow */
-
-static Label x_label_new(CGTarget* t) {
- return (Label)t->mc->label_new(t->mc);
-}
-static void x_label_place(CGTarget* t, Label l) {
- t->mc->label_place(t->mc, (MCLabel)l);
-}
-
-/* Emit `jmp rel32` (E9 + 4-byte disp) with a label fixup. R_PC32 applied
- * at the disp32 site with addend=-4 yields target - end_of_insn. */
-static void emit_jmp_label(MCEmitter* mc, MCLabel l) {
- u8 op = 0xE9;
- mc->emit_bytes(mc, &op, 1);
- emit_u32le(mc, 0);
- mc->emit_label_ref(mc, l, R_PC32, 4, -4);
-}
-
-/* Emit `Jcc rel32` (0F 8x + 4-byte disp) with a label fixup. */
-static void emit_jcc_label(MCEmitter* mc, u32 cc, MCLabel l) {
- u8 op[2] = {0x0F, (u8)(0x80 | (cc & 0xF))};
- mc->emit_bytes(mc, op, 2);
- emit_u32le(mc, 0);
- mc->emit_label_ref(mc, l, R_PC32, 4, -4);
-}
-
-static void x_jump(CGTarget* t, Label l) { emit_jmp_label(t->mc, (MCLabel)l); }
-
-static u32 cmp_to_cc(CmpOp op) {
- switch (op) {
- case CMP_EQ: return X64_CC_E;
- case CMP_NE: return X64_CC_NE;
- case CMP_LT_U: return X64_CC_B;
- case CMP_LE_U: return X64_CC_BE;
- case CMP_GT_U: return X64_CC_A;
- case CMP_GE_U: return X64_CC_AE;
- case CMP_LT_S: return X64_CC_L;
- case CMP_LE_S: return X64_CC_LE;
- case CMP_GT_S: return X64_CC_G;
- case CMP_GE_S: return X64_CC_GE;
- default: return X64_CC_E;
- }
-}
-
-static u32 force_reg_int(CGTarget* t, Operand op, int w, u32 scratch) {
- if (op.kind == OPK_REG) return op.v.reg & 0xFu;
- if (op.kind == OPK_IMM) {
- emit_load_imm(t->mc, w, scratch, op.v.imm);
- return scratch;
- }
- compiler_panic(t->c, impl_of(t)->loc, "x64: operand kind %d not REG/IMM",
- (int)op.kind);
-}
-
-static void emit_cmp_ab(CGTarget* t, Operand a_op, Operand b_op) {
- int w = type_is_64(a_op.type) ? 1 : 0;
- /* IMM RHS imm8 / imm32 fast paths. CMP is not commutative across the
- * cond codes, so IMM-on-LHS still has to materialize. */
- if (b_op.kind == OPK_IMM && a_op.kind == OPK_REG) {
- if (imm_fits_i8(b_op.v.imm)) {
- emit_cmp_imm8(t->mc, w, a_op.v.reg & 0xFu, (i8)b_op.v.imm);
- return;
- }
- if (imm_fits_i32(b_op.v.imm)) {
- emit_alu_imm32(t->mc, w, /*sub=CMP*/ 7u, a_op.v.reg & 0xFu,
- (i32)b_op.v.imm);
- return;
- }
- }
- u32 ra = force_reg_int(t, a_op, w, X64_RAX);
- u32 rb = force_reg_int(t, b_op, w, (ra == X64_R11) ? X64_RAX : X64_R11);
- /* cmp r/m, r — opcode 0x39 (encoded as `cmp ra, rb` ⇒ flags = ra - rb). */
- emit_alu_rr(t->mc, w, 0x39, ra, rb);
-}
-
-static void x_cmp_branch(CGTarget* t, CmpOp op, Operand a, Operand b,
- Label l) {
- emit_cmp_ab(t, a, b);
- emit_jcc_label(t->mc, cmp_to_cc(op), (MCLabel)l);
-}
-
-static void x_cmp(CGTarget* t, CmpOp op, Operand dst, Operand a, Operand b) {
- emit_cmp_ab(t, a, b);
- u32 d = dst.v.reg & 0xFu;
- emit_setcc(t->mc, cmp_to_cc(op), d);
- emit_movzx_r32_r8(t->mc, d, d);
-}
-
-/* ---- structured scopes ---- */
-static CGScope x_scope_begin(CGTarget* t, const CGScopeDesc* d) {
- XImpl* a = impl_of(t);
- if (a->nscopes == a->scopes_cap) {
- u32 ncap = a->scopes_cap ? a->scopes_cap * 2u : 4u;
- XScope* nb = arena_array(t->c->tu, XScope, ncap);
- if (a->scopes) memcpy(nb, a->scopes, sizeof(XScope) * a->nscopes);
- a->scopes = nb;
- a->scopes_cap = ncap;
- }
- XScope* sc = &a->scopes[a->nscopes];
- sc->kind = (u8)d->kind;
- sc->has_else = 0;
- sc->else_label = 0;
- sc->end_label = 0;
- sc->break_label = d->break_label;
- sc->continue_label = d->continue_label;
-
- if (d->kind == SCOPE_IF) {
- sc->else_label = t->mc->label_new(t->mc);
- sc->end_label = t->mc->label_new(t->mc);
- int w = type_is_64(d->cond.type) ? 1 : 0;
- u32 rc = force_reg_int(t, d->cond, w, X64_RAX);
- emit_test_self(t->mc, w, rc);
- emit_jcc_label(t->mc, X64_CC_E, sc->else_label);
- } else if (d->kind == SCOPE_LOOP || d->kind == SCOPE_BLOCK) {
- /* Bookkeeping only. */
- } else {
- compiler_panic(t->c, a->loc,
- "x64 scope_begin: kind %d not yet implemented",
- (int)d->kind);
- }
- a->nscopes++;
- return (CGScope)a->nscopes;
-}
-
-static void x_scope_else(CGTarget* t, CGScope s) {
- XImpl* a = impl_of(t);
- if (s == CG_SCOPE_NONE || s > a->nscopes)
- compiler_panic(t->c, a->loc, "x64 scope_else: bad scope");
- XScope* sc = &a->scopes[s - 1];
- emit_jmp_label(t->mc, sc->end_label);
- t->mc->label_place(t->mc, sc->else_label);
- sc->has_else = 1;
-}
-
-static void x_scope_end(CGTarget* t, CGScope s) {
- XImpl* a = impl_of(t);
- if (s == CG_SCOPE_NONE || s > a->nscopes)
- compiler_panic(t->c, a->loc, "x64 scope_end: bad scope");
- XScope* sc = &a->scopes[s - 1];
- if (sc->kind == SCOPE_IF) {
- if (!sc->has_else) t->mc->label_place(t->mc, sc->else_label);
- t->mc->label_place(t->mc, sc->end_label);
- }
-}
-
-static void x_break_to(CGTarget* t, CGScope s) {
- XImpl* a = impl_of(t);
- if (s == CG_SCOPE_NONE || s > a->nscopes)
- compiler_panic(t->c, a->loc, "x64 break_to: bad scope");
- x_jump(t, a->scopes[s - 1].break_label);
-}
-static void x_continue_to(CGTarget* t, CGScope s) {
- XImpl* a = impl_of(t);
- if (s == CG_SCOPE_NONE || s > a->nscopes)
- compiler_panic(t->c, a->loc, "x64 continue_to: bad scope");
- x_jump(t, a->scopes[s - 1].continue_label);
-}
-
-/* ============================================================
- * Data movement */
-
-static void x_load_imm(CGTarget* t, Operand dst, i64 imm) {
- int w = type_is_64(dst.type) ? 1 : 0;
- emit_load_imm(t->mc, w, dst.v.reg & 0xFu, imm);
-}
-
-/* Materialize an FP literal: stash bytes in .rodata as a fresh local
- * symbol, then load via RIP-relative movss/movsd. */
-static void x_load_const(CGTarget* t, Operand dst, ConstBytes cb) {
- XImpl* a = impl_of(t);
- if (dst.cls != RC_FP)
- compiler_panic(t->c, a->loc, "x64 load_const: only FP supported in v1");
-
- Sym ro_name = pool_intern_cstr(t->c->global, ".rodata");
- ObjSecId ro = obj_section(t->obj, ro_name, SEC_RODATA, SF_ALLOC, 1u);
-
- u32 cur_section = t->mc->section_id;
- t->mc->set_section(t->mc, ro);
- u32 ro_off = obj_align_to(t->obj, ro, cb.align ? cb.align : 4);
- t->mc->emit_bytes(t->mc, cb.bytes, cb.size);
-
- char namebuf[64];
- static u32 lit_seq = 0;
- int len = 0;
- const char* prefix = ".LCFP_x64_";
- for (; prefix[len]; ++len) namebuf[len] = prefix[len];
- u32 v = lit_seq++;
- char tmp[16];
- int tn = 0;
- if (v == 0)
- tmp[tn++] = '0';
- else
- while (v) {
- tmp[tn++] = '0' + (char)(v % 10);
- v /= 10;
- }
- for (int i = tn - 1; i >= 0; --i) namebuf[len++] = tmp[i];
- namebuf[len] = 0;
-
- Sym sname = pool_intern_cstr(t->c->global, namebuf);
- ObjSymId sym = obj_symbol(t->obj, sname, SB_LOCAL, SK_OBJ, ro, (u64)ro_off,
- (u64)cb.size);
- t->mc->set_section(t->mc, cur_section);
-
- /* movs{s,d} xmm, [rip+disp32]. Reloc R_PC32 with addend=-4 at the
- * disp32 site so the linker resolves to target relative to end-of-insn. */
- u8 prefix2 = (cb.size == 8) ? 0xF2 : 0xF3;
- u32 dst_x = dst.v.reg & 0xFu;
- t->mc->emit_bytes(t->mc, &prefix2, 1);
- emit_rex(t->mc, 0, dst_x, 0, 0);
- u8 op[2] = {0x0F, 0x10};
- t->mc->emit_bytes(t->mc, op, 2);
- u8 mr = modrm(0u, (dst_x & 7u), 5u); /* [RIP + disp32] */
- t->mc->emit_bytes(t->mc, &mr, 1);
- u32 disp_pos = t->mc->pos(t->mc);
- emit_u32le(t->mc, 0);
- t->mc->emit_reloc_at(t->mc, cur_section, disp_pos, R_PC32, sym, -4, 1, 0);
-}
-
-static void x_copy(CGTarget* t, Operand dst, Operand src) {
- if (dst.cls == RC_FP || src.cls == RC_FP) {
- u8 prefix2 = type_is_fp_double(dst.type) ? 0xF2 : 0xF3;
- emit_sse_rr(t->mc, prefix2, 0x10, dst.v.reg & 0xFu, src.v.reg & 0xFu);
- return;
- }
- int w = type_is_64(dst.type) ? 1 : 0;
- emit_mov_rr(t->mc, w, dst.v.reg & 0xFu, src.v.reg & 0xFu);
-}
-
-static u32 addr_base(CGTarget* t, Operand addr, i32* out_off) {
- XImpl* a = impl_of(t);
- if (addr.kind == OPK_LOCAL) {
- XSlot* s = slot_get(a, addr.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "x64 addr_base: bad slot");
- *out_off = -(i32)s->off;
- return X64_RBP;
- }
- if (addr.kind == OPK_INDIRECT) {
- *out_off = addr.v.ind.ofs;
- return addr.v.ind.base & 0xFu;
- }
- compiler_panic(t->c, a->loc, "x64 addr_base: kind %d unsupported",
- (int)addr.kind);
-}
-
-static int x64_use_got_for_sym(CGTarget* t, ObjSymId sym) {
- return obj_symbol_extern_via_got(t->c, t->obj, sym);
-}
-
-/* Materialize `&sym + addend` into `dst_reg`. For locally-defined or
- * static-link extern symbols, emit `lea rd, [rip + disp32]` with
- * R_X64_PLT32 (PLT32 collapses to a plain PC-relative LEA at link time
- * — the PLT routing only fires when the linker actually needs the
- * trampoline, i.e. function calls into a DSO). For undef externs in
- * PIC/PIE we instead emit `mov rd, [rip + disp32]` against a GOT slot
- * (R_X64_REX_GOTPCRELX) so the loader can resolve the symbol by
- * patching a single slot rather than touching .text.
- *
- * Addend -4 because the PC is end-of-instruction. When routing
- * through the GOT we omit any extra addend on the reloc (most loaders
- * disallow nonzero addends on GOT-load fixups); a follow-up `add` /
- * `lea` would have to add it after the load if the codegen needed
- * `&sym + nonzero`. In practice the caller only ever passes
- * addend=0 for global references that go through the GOT path. */
-static void emit_global_lea(CGTarget* t, u32 dst_reg, ObjSymId sym,
- i64 addend) {
- if (x64_use_got_for_sym(t, sym)) {
- /* mov rd, [rip + disp32] */
- emit_rex(t->mc, 1, dst_reg, 0, 0);
- u8 op = 0x8B;
- t->mc->emit_bytes(t->mc, &op, 1);
- u8 mr = modrm(0u, (dst_reg & 7u), 5u); /* [RIP + disp32] */
- t->mc->emit_bytes(t->mc, &mr, 1);
- u32 disp_pos = t->mc->pos(t->mc);
- emit_u32le(t->mc, 0);
- t->mc->emit_reloc_at(t->mc, t->mc->section_id, disp_pos,
- R_X64_REX_GOTPCRELX, sym, -4, 1, 0);
- /* Apply any nonzero addend by adjusting the loaded value. */
- if (addend) {
- i32 a = (i32)addend;
- if (a >= -128 && a <= 127) {
- /* add r/m64, imm8 (REX.W + 0x83 /0 ib) */
- emit_rex(t->mc, 1, 0, 0, dst_reg);
- u8 add_op[2] = {0x83, modrm(3u, 0u, (u8)(dst_reg & 7u))};
- t->mc->emit_bytes(t->mc, add_op, 2);
- u8 ib = (u8)a;
- t->mc->emit_bytes(t->mc, &ib, 1);
- } else {
- /* add r/m64, imm32 (REX.W + 0x81 /0 id) */
- emit_rex(t->mc, 1, 0, 0, dst_reg);
- u8 add_op[2] = {0x81, modrm(3u, 0u, (u8)(dst_reg & 7u))};
- t->mc->emit_bytes(t->mc, add_op, 2);
- emit_u32le(t->mc, (u32)a);
- }
- }
- return;
- }
- emit_rex(t->mc, 1, dst_reg, 0, 0);
- u8 op = 0x8D;
- t->mc->emit_bytes(t->mc, &op, 1);
- u8 mr = modrm(0u, (dst_reg & 7u), 5u); /* [RIP + disp32] */
- t->mc->emit_bytes(t->mc, &mr, 1);
- u32 disp_pos = t->mc->pos(t->mc);
- emit_u32le(t->mc, 0);
- t->mc->emit_reloc_at(t->mc, t->mc->section_id, disp_pos, R_X64_PLT32, sym,
- addend - 4, 1, 0);
-}
-
-static void x_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) {
- u32 sz = ma.size ? ma.size : type_byte_size(addr.type);
-
- if (addr.kind == OPK_GLOBAL) {
- /* Materialize &sym into R11, then load from [r11]. */
- emit_global_lea(t, X64_R11, addr.v.global.sym, addr.v.global.addend);
- if (dst.cls == RC_FP) {
- u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3;
- emit_sse_load(t->mc, prefix2, 0x10, dst.v.reg & 0xFu, X64_R11, 0);
- } else {
- int signed_ = type_is_signed(ma.type ? ma.type : addr.type);
- emit_mov_load(t->mc, sz, signed_, dst.v.reg & 0xFu, X64_R11, 0);
- }
- return;
- }
-
- i32 off;
- u32 base = addr_base(t, addr, &off);
- if (dst.cls == RC_FP) {
- u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3;
- emit_sse_load(t->mc, prefix2, 0x10, dst.v.reg & 0xFu, base, off);
- } else {
- int signed_ = type_is_signed(ma.type ? ma.type : addr.type);
- emit_mov_load(t->mc, sz, signed_, dst.v.reg & 0xFu, base, off);
- }
-}
-
-static void x_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) {
- u32 sz = ma.size ? ma.size : type_byte_size(addr.type);
-
- if (addr.kind == OPK_GLOBAL) {
- /* Materialize &sym into R11, then store via [r11]. The IMM source
- * branch below uses RAX as a scratch for the value, so R11 stays
- * untouched between the LEA and the store. */
- emit_global_lea(t, X64_R11, addr.v.global.sym, addr.v.global.addend);
- if (src.kind == OPK_IMM) {
- int w = (sz == 8) ? 1 : 0;
- emit_load_imm(t->mc, w, X64_RAX, src.v.imm);
- emit_mov_store(t->mc, sz, X64_RAX, X64_R11, 0);
- return;
- }
- if (src.cls == RC_FP) {
- u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3;
- emit_sse_store(t->mc, prefix2, 0x11, src.v.reg & 0xFu, X64_R11, 0);
- return;
- }
- emit_mov_store(t->mc, sz, src.v.reg & 0xFu, X64_R11, 0);
- return;
- }
-
- i32 off;
- u32 base = addr_base(t, addr, &off);
-
- if (src.kind == OPK_IMM) {
- int w = (sz == 8) ? 1 : 0;
- emit_load_imm(t->mc, w, X64_RAX, src.v.imm);
- emit_mov_store(t->mc, sz, X64_RAX, base, off);
- return;
- }
- if (src.cls == RC_FP) {
- u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3;
- emit_sse_store(t->mc, prefix2, 0x11, src.v.reg & 0xFu, base, off);
- return;
- }
- emit_mov_store(t->mc, sz, src.v.reg & 0xFu, base, off);
-}
-
-static void x_addr_of(CGTarget* t, Operand dst, Operand lv) {
- XImpl* a = impl_of(t);
- if (lv.kind == OPK_LOCAL) {
- XSlot* s = slot_get(a, lv.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "x64 addr_of: bad slot");
- emit_lea(t->mc, dst.v.reg & 0xFu, X64_RBP, -(i32)s->off);
- return;
- }
- if (lv.kind == OPK_INDIRECT) {
- emit_lea(t->mc, dst.v.reg & 0xFu, lv.v.ind.base & 0xFu, lv.v.ind.ofs);
- return;
- }
- if (lv.kind == OPK_GLOBAL) {
- emit_global_lea(t, dst.v.reg & 0xFu, lv.v.global.sym, lv.v.global.addend);
- return;
- }
- x_panic(t, "addr_of: kind unsupported");
-}
-
-/* x86_64 TLS Local-Exec materialization.
- * mov rd, fs:0 ; read thread pointer (FS base + 0)
- * lea rd, [rd + sym@tpoff] ; add TP-relative offset
- * The disp32 of the LEA carries an R_X64_TPOFF32 reloc; the linker fills
- * in the signed TP-relative offset (negative under variant II — TLS image
- * sits below the TCB that FS points at). */
-static void x_tls_addr_of(CGTarget* t, Operand dst, ObjSymId sym, i64 addend) {
- MCEmitter* mc = t->mc;
- u32 sec = mc->section_id;
- u32 rd = dst.v.reg & 0xFu;
-
- /* mov rd, qword ptr fs:[0]
- * 64 [REX.W|REX.R] 8B mod=00/reg=rd/rm=100 sib(0,4,5) disp32=0 */
- u8 fs_prefix = 0x64;
- mc->emit_bytes(mc, &fs_prefix, 1);
- emit_rex(mc, 1, rd, 0, 0);
- u8 op_mov = 0x8B;
- mc->emit_bytes(mc, &op_mov, 1);
- u8 mr1 = modrm(0u, rd & 7u, 4u);
- mc->emit_bytes(mc, &mr1, 1);
- u8 s1 = sib(0u, 4u, 5u);
- mc->emit_bytes(mc, &s1, 1);
- emit_u32le(mc, 0);
-
- /* lea rd, [rd + disp32]
- * [REX.W|REX.R|REX.B] 8D mod=10/reg=rd/rm=rd [SIB if rd&7==4] disp32 */
- emit_rex(mc, 1, rd, 0, rd);
- u8 op_lea = 0x8D;
- mc->emit_bytes(mc, &op_lea, 1);
- u32 disp_pos;
- if ((rd & 7u) == 4u) {
- u8 mr2 = modrm(2u, rd & 7u, 4u);
- mc->emit_bytes(mc, &mr2, 1);
- u8 s2 = sib(0u, 4u, rd & 7u);
- mc->emit_bytes(mc, &s2, 1);
- disp_pos = mc->pos(mc);
- emit_u32le(mc, 0);
- } else {
- u8 mr2 = modrm(2u, rd & 7u, rd & 7u);
- mc->emit_bytes(mc, &mr2, 1);
- disp_pos = mc->pos(mc);
- emit_u32le(mc, 0);
- }
- mc->emit_reloc_at(mc, sec, disp_pos, R_X64_TPOFF32, sym, addend, 0, 0);
-}
-
-/* Aggregate ops — small unrolled memcpy/memset. */
-static u32 agg_addr_reg(CGTarget* t, Operand op, u32 scratch) {
- if (op.kind == OPK_REG) return op.v.reg & 0xFu;
- if (op.kind == OPK_LOCAL) {
- XImpl* a = impl_of(t);
- XSlot* s = slot_get(a, op.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "x64 agg: bad slot");
- emit_lea(t->mc, scratch, X64_RBP, -(i32)s->off);
- return scratch;
- }
- compiler_panic(t->c, impl_of(t)->loc,
- "x64 agg: address kind %d unsupported", (int)op.kind);
-}
-
-static void x_copy_bytes(CGTarget* t, Operand da, Operand sa,
- AggregateAccess g) {
- u32 dr = agg_addr_reg(t, da, X64_R11);
- u32 sr = agg_addr_reg(t, sa, (dr == X64_RAX) ? X64_RCX : X64_RAX);
- u32 nbytes = g.size;
- u32 i = 0;
- while (i + 8 <= nbytes) {
- emit_mov_load(t->mc, 8, 0, X64_RDX, sr, (i32)i);
- emit_mov_store(t->mc, 8, X64_RDX, dr, (i32)i);
- i += 8;
- }
- while (i + 4 <= nbytes) {
- emit_mov_load(t->mc, 4, 0, X64_RDX, sr, (i32)i);
- emit_mov_store(t->mc, 4, X64_RDX, dr, (i32)i);
- i += 4;
- }
- while (i + 2 <= nbytes) {
- emit_mov_load(t->mc, 2, 0, X64_RDX, sr, (i32)i);
- emit_mov_store(t->mc, 2, X64_RDX, dr, (i32)i);
- i += 2;
- }
- while (i < nbytes) {
- emit_mov_load(t->mc, 1, 0, X64_RDX, sr, (i32)i);
- emit_mov_store(t->mc, 1, X64_RDX, dr, (i32)i);
- i += 1;
- }
-}
-
-static void x_set_bytes(CGTarget* t, Operand da, Operand bv,
- AggregateAccess g) {
- u32 dr = agg_addr_reg(t, da, X64_R11);
- if (bv.kind != OPK_IMM)
- compiler_panic(t->c, impl_of(t)->loc,
- "x64 set_bytes: non-IMM byte not yet supported");
- u8 b = (u8)(bv.v.imm & 0xff);
- u64 b64 = b;
- b64 |= b64 << 8;
- b64 |= b64 << 16;
- b64 |= b64 << 32;
- emit_load_imm(t->mc, 1, X64_RAX, (i64)b64);
- u32 nbytes = g.size;
- u32 i = 0;
- while (i + 8 <= nbytes) {
- emit_mov_store(t->mc, 8, X64_RAX, dr, (i32)i);
- i += 8;
- }
- while (i + 4 <= nbytes) {
- emit_mov_store(t->mc, 4, X64_RAX, dr, (i32)i);
- i += 4;
- }
- while (i + 2 <= nbytes) {
- emit_mov_store(t->mc, 2, X64_RAX, dr, (i32)i);
- i += 2;
- }
- while (i < nbytes) {
- emit_mov_store(t->mc, 1, X64_RAX, dr, (i32)i);
- i += 1;
- }
-}
-
-/* Load the storage unit, then extract the field by shifting it to the
- * top of the register and shifting back. SAR for signed, SHR for unsigned. */
-static void x_bitfield_load(CGTarget* t, Operand dst, Operand record_addr,
- BitFieldAccess bf) {
- u32 base = agg_addr_reg(t, record_addr, X64_R11);
- u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u;
- int w = (storage_bytes == 8u) ? 1 : 0;
- u32 reg_size = w ? 64u : 32u;
- u32 lsb = bf.bit_offset;
- u32 width = bf.bit_width ? bf.bit_width : 1u;
- u32 rd = dst.v.reg & 0xFu;
-
- emit_mov_load(t->mc, storage_bytes, 0, rd, base, (i32)bf.storage_offset);
- u8 left = (u8)(reg_size - lsb - width);
- u8 right = (u8)(reg_size - width);
- if (left) emit_shift_imm(t->mc, w, 4u, rd, left);
- if (right) emit_shift_imm(t->mc, w, bf.signed_ ? 7u : 5u, rd, right);
-}
-
-/* Read-modify-write: clear the field bits in the storage unit via AND ~mask,
- * mask/shift the source into place, OR it in, write back. RAX holds the
- * storage word; RCX is the staged value; RDX holds the source-side mask when
- * needed. Avoids touching the base register. */
-static void x_bitfield_store(CGTarget* t, Operand record_addr, Operand src,
- BitFieldAccess bf) {
- u32 base = agg_addr_reg(t, record_addr, X64_R11);
- u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u;
- int w = (storage_bytes == 8u) ? 1 : 0;
- u32 lsb = bf.bit_offset;
- u32 width = bf.bit_width ? bf.bit_width : 1u;
- u64 ones = (width >= 64u) ? ~(u64)0 : (((u64)1 << width) - 1u);
- u64 mask = ones << lsb;
-
- emit_mov_load(t->mc, storage_bytes, 0, X64_RAX, base, (i32)bf.storage_offset);
- emit_load_imm(t->mc, w, X64_RCX, (i64)~mask);
- emit_alu_rr(t->mc, w, 0x21, X64_RAX, X64_RCX); /* AND rax, rcx */
-
- if (src.kind == OPK_IMM) {
- u64 v = ((u64)src.v.imm & ones) << lsb;
- emit_load_imm(t->mc, w, X64_RCX, (i64)v);
- } else if (src.kind == OPK_REG) {
- emit_mov_rr(t->mc, w, X64_RCX, src.v.reg & 0xFu);
- emit_load_imm(t->mc, w, X64_RDX, (i64)ones);
- emit_alu_rr(t->mc, w, 0x21, X64_RCX, X64_RDX); /* AND rcx, rdx */
- if (lsb) emit_shift_imm(t->mc, w, 4u, X64_RCX, (u8)lsb);
- } else {
- compiler_panic(t->c, impl_of(t)->loc,
- "x64 bitfield_store: src kind %d unsupported",
- (int)src.kind);
- }
- emit_alu_rr(t->mc, w, 0x09, X64_RAX, X64_RCX); /* OR rax, rcx */
- emit_mov_store(t->mc, storage_bytes, X64_RAX, base, (i32)bf.storage_offset);
-}
-
-/* ============================================================
- * Arithmetic */
-
-static void x_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op,
- Operand b_op) {
- MCEmitter* mc = t->mc;
-
- /* FP binops. */
- if (op == BO_FADD || op == BO_FSUB || op == BO_FMUL || op == BO_FDIV) {
- u32 rd = dst.v.reg & 0xFu;
- u32 ra = a_op.v.reg & 0xFu;
- u32 rb = b_op.v.reg & 0xFu;
- u8 prefix2 = type_is_fp_double(dst.type) ? 0xF2 : 0xF3;
- if (rd != ra) emit_sse_rr(mc, prefix2, 0x10, rd, ra);
- u8 opcode;
- switch (op) {
- case BO_FADD: opcode = 0x58; break;
- case BO_FSUB: opcode = 0x5C; break;
- case BO_FMUL: opcode = 0x59; break;
- case BO_FDIV: opcode = 0x5E; break;
- default: opcode = 0x58; break;
- }
- emit_sse_rr(mc, prefix2, opcode, rd, rb);
- return;
- }
-
- int w = type_is_64(dst.type) ? 1 : 0;
- u32 rd = dst.v.reg & 0xFu;
-
- /* Division: idiv/div uses rax/rdx implicitly. Route divisor through r11
- * if it would otherwise be rax/rdx. */
- if (op == BO_SDIV || op == BO_UDIV || op == BO_SREM || op == BO_UREM) {
- u32 ra = force_reg_int(t, a_op, w, X64_RAX);
- if (ra != X64_RAX) emit_mov_rr(mc, w, X64_RAX, ra);
- u32 rb;
- if (b_op.kind == OPK_REG) {
- rb = b_op.v.reg & 0xFu;
- if (rb == X64_RAX || rb == X64_RDX) {
- emit_mov_rr(mc, w, X64_R11, rb);
- rb = X64_R11;
- }
- } else if (b_op.kind == OPK_IMM) {
- emit_load_imm(mc, w, X64_R11, b_op.v.imm);
- rb = X64_R11;
- } else {
- compiler_panic(t->c, impl_of(t)->loc,
- "x64 div: divisor kind %d unsupported", (int)b_op.kind);
- }
- if (op == BO_SDIV || op == BO_SREM) {
- emit_cqo_or_cdq(mc, w);
- emit_f7_rm(mc, w, 7u, rb); /* idiv */
- } else {
- emit_xor_self(mc, w, X64_RDX);
- emit_f7_rm(mc, w, 6u, rb); /* div */
- }
- u32 result_reg = (op == BO_SREM || op == BO_UREM) ? X64_RDX : X64_RAX;
- if (rd != result_reg) emit_mov_rr(mc, w, rd, result_reg);
- return;
- }
-
- /* Shifts: shift count must be in cl OR encoded as imm8 directly (C1
- * /sub ib). Use the imm form when b is OPK_IMM and skip materializing
- * into cl. */
- if (op == BO_SHL || op == BO_SHR_U || op == BO_SHR_S) {
- u32 ra = force_reg_int(t, a_op, w, X64_RAX);
- if (rd != ra) emit_mov_rr(mc, w, rd, ra);
- u32 sub = (op == BO_SHL) ? 4u : (op == BO_SHR_U ? 5u : 7u);
- if (b_op.kind == OPK_IMM) {
- u32 width = w ? 64u : 32u;
- emit_shift_imm(mc, w, sub, rd, (u8)((u64)b_op.v.imm & (width - 1u)));
- return;
- }
- if (b_op.kind == OPK_REG) {
- u32 rb = b_op.v.reg & 0xFu;
- if (rb != X64_RCX) emit_mov_rr(mc, 0, X64_RCX, rb);
- } else {
- compiler_panic(t->c, impl_of(t)->loc,
- "x64 shift: count kind %d unsupported", (int)b_op.kind);
- }
- emit_shift_cl(mc, w, sub, rd);
- return;
- }
-
- /* For commutative ops, canonicalize IMM to the RHS so the imm-form
- * check below fires uniformly. ISUB is non-commutative — IMM-on-LHS
- * still materializes. */
- switch (op) {
- case BO_IADD:
- case BO_AND:
- case BO_OR:
- case BO_XOR:
- case BO_IMUL: {
- if (a_op.kind == OPK_IMM && b_op.kind != OPK_IMM) {
- Operand t_op = a_op; a_op = b_op; b_op = t_op;
- }
- break;
- }
- default: break;
- }
-
- /* IMM-form fast paths. For ADD/SUB/AND/OR/XOR the ALU imm encoding
- * reads-and-writes a single reg — copy ra → dst first, then `dst OP=
- * imm`. For IMUL the imm form is three-operand (`dst = src * imm`)
- * and reads from `ra` directly without the prep copy. */
- if (b_op.kind == OPK_IMM && a_op.kind == OPK_REG &&
- (op == BO_IADD || op == BO_ISUB || op == BO_AND || op == BO_OR ||
- op == BO_XOR || op == BO_IMUL)) {
- i64 imm = b_op.v.imm;
- u32 ra = a_op.v.reg & 0xFu;
- if (op == BO_IMUL) {
- if (imm_fits_i8(imm)) {
- emit_imul_imm8(mc, w, rd, ra, (i8)imm);
- return;
- }
- if (imm_fits_i32(imm)) {
- emit_imul_imm32(mc, w, rd, ra, (i32)imm);
- return;
- }
- } else {
- u32 sub;
- switch (op) {
- case BO_IADD: sub = 0u; break;
- case BO_OR: sub = 1u; break;
- case BO_AND: sub = 4u; break;
- case BO_ISUB: sub = 5u; break;
- case BO_XOR: sub = 6u; break;
- default: sub = 0u; break; /* unreachable */
- }
- if (imm_fits_i8(imm)) {
- if (rd != ra) emit_mov_rr(mc, w, rd, ra);
- emit_alu_imm8(mc, w, sub, rd, (i8)imm);
- return;
- }
- if (imm_fits_i32(imm)) {
- if (rd != ra) emit_mov_rr(mc, w, rd, ra);
- emit_alu_imm32(mc, w, sub, rd, (i32)imm);
- return;
- }
- }
- /* Fall through to materialize for >32-bit literals. */
- }
-
- /* Generic 2-operand ALU: copy ra → dst, then dst op= rb. */
- u32 ra = force_reg_int(t, a_op, w, X64_RAX);
- if (rd != ra) emit_mov_rr(mc, w, rd, ra);
- u32 rb = force_reg_int(t, b_op, w, X64_R11);
- switch (op) {
- case BO_IADD: emit_alu_rr(mc, w, 0x01, rd, rb); break;
- case BO_ISUB: emit_alu_rr(mc, w, 0x29, rd, rb); break;
- case BO_AND: emit_alu_rr(mc, w, 0x21, rd, rb); break;
- case BO_OR: emit_alu_rr(mc, w, 0x09, rd, rb); break;
- case BO_XOR: emit_alu_rr(mc, w, 0x31, rd, rb); break;
- case BO_IMUL: emit_imul_rr(mc, w, rd, rb); break;
- default:
- compiler_panic(t->c, impl_of(t)->loc, "x64 binop: op %d unimpl",
- (int)op);
- }
-}
-
-static void x_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) {
- MCEmitter* mc = t->mc;
- int w = type_is_64(dst.type) ? 1 : 0;
- u32 rd = dst.v.reg & 0xFu;
- /* IMM operand is legal per the CGTarget contract (arch.h); materialize
- * into a scratch register when not already a register. cg folds
- * literal unops upstream (cg_fold_unop), so this path is reached only
- * when opt's emit hands us an unfolded literal. */
- u32 ra = force_reg_int(t, a_op, w, X64_R11);
- switch (op) {
- case UO_NEG:
- if (rd != ra) emit_mov_rr(mc, w, rd, ra);
- emit_f7_rm(mc, w, 3u, rd);
- return;
- case UO_BNOT:
- if (rd != ra) emit_mov_rr(mc, w, rd, ra);
- emit_f7_rm(mc, w, 2u, rd);
- return;
- case UO_NOT:
- /* !x → (x == 0) materialized as 0/1 in dst. */
- emit_test_self(mc, w, ra);
- emit_setcc(mc, X64_CC_E, rd);
- emit_movzx_r32_r8(mc, rd, rd);
- return;
- default:
- compiler_panic(t->c, impl_of(t)->loc, "x64 unop: op %d unimpl",
- (int)op);
- }
-}
-
-static void x_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) {
- XImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
- u32 rd = dst.v.reg & 0xFu;
- u32 rs = src.v.reg & 0xFu;
- switch (k) {
- case CV_SEXT: {
- u32 src_bytes = type_byte_size(src.type);
- int w = type_is_64(dst.type) ? 1 : 0;
- emit_extend_rr(mc, w, /*signed=*/1, src_bytes, rd, rs);
- return;
- }
- case CV_ZEXT: {
- u32 src_bytes = type_byte_size(src.type);
- int w = type_is_64(dst.type) ? 1 : 0;
- emit_extend_rr(mc, w, /*signed=*/0, src_bytes, rd, rs);
- return;
- }
- case CV_TRUNC: {
- /* In-reg truncation: `mov r32, r32` clears high 32. Narrower stores
- * select width themselves. */
- emit_mov_rr(mc, 0, rd, rs);
- return;
- }
- case CV_ITOF_S:
- case CV_ITOF_U: {
- int w_src = type_is_64(src.type) ? 1 : 0;
- u8 prefix2 = type_is_fp_double(dst.type) ? 0xF2 : 0xF3;
- if (k == CV_ITOF_U && w_src == 1) {
- compiler_panic(t->c, a->loc,
- "x64 convert: u64→fp not yet implemented");
- }
- if (k == CV_ITOF_U) {
- /* u32→fp: zero-extend to 64-bit, then signed cvtsi2sd works. */
- emit_extend_rr(mc, 0, 0, 4, X64_R11, rs);
- rs = X64_R11;
- w_src = 1;
- }
- emit_sse_rr_w(mc, prefix2, 0x2A, w_src, rd, rs);
- return;
- }
- case CV_FTOI_S:
- case CV_FTOI_U: {
- int w_dst = type_is_64(dst.type) ? 1 : 0;
- u8 prefix2 = type_is_fp_double(src.type) ? 0xF2 : 0xF3;
- if (k == CV_FTOI_U && w_dst == 1) {
- compiler_panic(t->c, a->loc,
- "x64 convert: fp→u64 not yet implemented");
- }
- emit_sse_rr_w(mc, prefix2, 0x2C, w_dst, rd, rs);
- return;
- }
- case CV_FEXT:
- emit_sse_rr(mc, 0xF3, 0x5A, rd, rs);
- return;
- case CV_FTRUNC:
- emit_sse_rr(mc, 0xF2, 0x5A, rd, rs);
- return;
- case CV_BITCAST: {
- /* movd/movq between xmm and GPR. */
- if (src.cls == RC_INT && dst.cls == RC_FP) {
- int w = type_is_64(dst.type) ? 1 : 0;
- emit_sse_rr_w(mc, 0x66, 0x6E, w, rd, rs);
- } else if (src.cls == RC_FP && dst.cls == RC_INT) {
- int w = type_is_64(src.type) ? 1 : 0;
- emit_sse_rr_w(mc, 0x66, 0x7E, w, rs, rd);
- } else {
- compiler_panic(t->c, a->loc,
- "x64 convert BITCAST: same-class not supported");
- }
- return;
- }
- default:
- compiler_panic(t->c, a->loc, "x64 convert kind %d unimpl", (int)k);
- }
-}
-
-/* ============================================================
- * Calls / return */
-
-static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
- u32* next_fp, u32* stack_off) {
- XImpl* a = impl_of(t);
- /* Synthesize one-part DIRECT for variadic args (av->abi NULL). */
- ABIArgInfo va_ai;
- ABIArgPart va_pt;
- const ABIArgInfo* ai = av->abi;
- if (!ai) {
- u32 sz = type_byte_size(av->type);
- memset(&va_ai, 0, sizeof va_ai);
- memset(&va_pt, 0, sizeof va_pt);
- va_ai.kind = ABI_ARG_DIRECT;
- va_ai.parts = &va_pt;
- va_ai.nparts = 1;
- va_pt.cls = (av->storage.cls == RC_FP) ? ABI_CLASS_FP : ABI_CLASS_INT;
- va_pt.size = sz;
- va_pt.align = sz;
- va_pt.src_offset = 0;
- ai = &va_ai;
- }
- if (ai->kind == ABI_ARG_IGNORE) return;
- if (ai->kind == ABI_ARG_INDIRECT) {
- /* Pass &av->storage_local in the next int arg reg. */
- u32 dst_reg = (*next_int < 6) ? g_int_arg_regs[(*next_int)++] : X64_RAX;
- int to_stack = (*next_int > 6) || (dst_reg == X64_RAX && *next_int == 6);
- /* Above is awkward — recompute clearly: */
- if (*next_int >= 6 + (a->has_sret ? 0 : 0)) {
- /* (next_int was already bumped past 6) — stack route */
- }
- to_stack = (dst_reg == X64_RAX);
- if (av->storage.kind == OPK_LOCAL) {
- XSlot* s = slot_get(a, av->storage.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "x64 call: bad byval slot");
- emit_lea(t->mc, dst_reg, X64_RBP, -(i32)s->off);
- } else if (av->storage.kind == OPK_INDIRECT) {
- emit_lea(t->mc, dst_reg, av->storage.v.ind.base & 0xFu,
- av->storage.v.ind.ofs);
- } else {
- compiler_panic(t->c, a->loc,
- "x64 call: INDIRECT arg storage kind %d unsupported",
- (int)av->storage.kind);
- }
- if (to_stack) {
- emit_mov_store(t->mc, 8, dst_reg, X64_RSP, (i32)*stack_off);
- *stack_off += 8;
- }
- return;
- }
-
- for (u16 i = 0; i < ai->nparts; ++i) {
- const ABIArgPart* pt = &ai->parts[i];
- u32 sz = pt->size;
- if (pt->cls == ABI_CLASS_INT) {
- int to_stack = (*next_int >= 6);
- u32 dst_reg = to_stack ? X64_RAX : g_int_arg_regs[(*next_int)++];
- switch (av->storage.kind) {
- case OPK_IMM: {
- int w = (sz == 8) ? 1 : 0;
- emit_load_imm(t->mc, w, dst_reg, av->storage.v.imm);
- break;
- }
- case OPK_REG: {
- int w = (sz == 8) ? 1 : 0;
- u32 sr = av->storage.v.reg & 0xFu;
- if (sr != dst_reg) emit_mov_rr(t->mc, w, dst_reg, sr);
- break;
- }
- case OPK_LOCAL: {
- XSlot* s = slot_get(a, av->storage.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "x64 call: bad arg slot");
- emit_mov_load(t->mc, sz, 0, dst_reg, X64_RBP,
- -(i32)s->off + (i32)pt->src_offset);
- break;
- }
- case OPK_INDIRECT: {
- /* cg holds INDIRECT base regs in {RBX, R10, R12..R15}, disjoint
- * from arg regs (RDI/RSI/RDX/RCX/R8/R9) and the RAX scratch, so
- * the base survives across the part loop. */
- emit_mov_load(t->mc, sz, 0, dst_reg, av->storage.v.ind.base & 0xFu,
- av->storage.v.ind.ofs + (i32)pt->src_offset);
- break;
- }
- default:
- compiler_panic(t->c, a->loc,
- "x64 call: arg storage kind %d unsupported",
- (int)av->storage.kind);
- }
- if (to_stack) {
- emit_mov_store(t->mc, 8, dst_reg, X64_RSP, (i32)*stack_off);
- *stack_off += 8;
- }
- } else if (pt->cls == ABI_CLASS_FP) {
- int to_stack = (*next_fp >= 8);
- u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3;
- if (!to_stack) {
- u32 dst_x = (*next_fp)++;
- if (av->storage.kind == OPK_REG) {
- u32 sx = av->storage.v.reg & 0xFu;
- if (sx != dst_x) emit_sse_rr(t->mc, prefix2, 0x10, dst_x, sx);
- } else if (av->storage.kind == OPK_INDIRECT) {
- emit_sse_load(t->mc, prefix2, 0x10, dst_x,
- av->storage.v.ind.base & 0xFu,
- av->storage.v.ind.ofs + (i32)pt->src_offset);
- } else {
- compiler_panic(t->c, a->loc,
- "x64 call: FP arg storage kind %d unsupported",
- (int)av->storage.kind);
- }
- } else {
- if (av->storage.kind == OPK_REG) {
- emit_sse_store(t->mc, prefix2, 0x11, av->storage.v.reg & 0xFu,
- X64_RSP, (i32)*stack_off);
- } else if (av->storage.kind == OPK_INDIRECT) {
- /* Load through xmm15 (scratch — last in g_fp_order so cg won't
- * have it live mid-call) then store. */
- emit_sse_load(t->mc, prefix2, 0x10, X64_XMM15,
- av->storage.v.ind.base & 0xFu,
- av->storage.v.ind.ofs + (i32)pt->src_offset);
- emit_sse_store(t->mc, prefix2, 0x11, X64_XMM15, X64_RSP,
- (i32)*stack_off);
- } else {
- compiler_panic(t->c, a->loc,
- "x64 call: FP stack-arg storage kind %d unsupported",
- (int)av->storage.kind);
- }
- *stack_off += 8;
- }
- } else {
- compiler_panic(t->c, a->loc, "x64 call: ABI class %d unimpl",
- (int)pt->cls);
- }
- }
-}
-
-static void x_call(CGTarget* t, const CGCallDesc* d) {
- XImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
-
- u32 next_int = 0, next_fp = 0, stack_off = 0;
-
- /* sret: caller puts destination pointer in rdi. */
- if (d->abi && d->abi->has_sret) {
- if (d->ret.storage.kind != OPK_LOCAL) {
- compiler_panic(t->c, a->loc, "x64 call: sret destination must be LOCAL");
- }
- XSlot* s = slot_get(a, d->ret.storage.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "x64 call: bad sret slot");
- emit_lea(mc, X64_RDI, X64_RBP, -(i32)s->off);
- next_int = 1;
- }
- for (u32 i = 0; i < d->nargs; ++i) {
- emit_arg_value(t, &d->args[i], &next_int, &next_fp, &stack_off);
- }
- u32 needed = (stack_off + 15u) & ~15u;
- if (needed > a->max_outgoing) a->max_outgoing = needed;
-
- /* Variadic calls: AL = number of XMM regs used. */
- if (d->abi && d->abi->variadic) {
- emit_load_imm(mc, 0, X64_RAX, (i64)next_fp);
- }
-
- if (d->callee.kind == OPK_GLOBAL) {
- /* call rel32: E8 + disp32 + R_X64_PLT32. */
- u8 op = 0xE8;
- mc->emit_bytes(mc, &op, 1);
- u32 disp_pos = mc->pos(mc);
- emit_u32le(mc, 0);
- mc->emit_reloc_at(mc, mc->section_id, disp_pos, R_X64_PLT32,
- d->callee.v.global.sym,
- d->callee.v.global.addend - 4, 1, 0);
- } else if (d->callee.kind == OPK_REG) {
- u32 r = d->callee.v.reg & 0xFu;
- emit_rex(mc, 0, 0, 0, r);
- u8 buf[2] = {0xFF, modrm(3u, 2u, r)};
- mc->emit_bytes(mc, buf, 2);
- } else {
- compiler_panic(t->c, a->loc, "x64 call: callee kind %d unsupported",
- (int)d->callee.kind);
- }
-
- /* Receive return value. */
- const ABIArgInfo* ri = &d->abi->ret;
- if (ri->kind == ABI_ARG_IGNORE || ri->kind == ABI_ARG_INDIRECT) return;
- if (ri->nparts == 0) return;
-
- Operand rs = d->ret.storage;
- u32 next_int_ret = 0, next_fp_ret = 0;
- static const u32 ret_int_regs[2] = {X64_RAX, X64_RDX};
- for (u16 i = 0; i < ri->nparts; ++i) {
- const ABIArgPart* p = &ri->parts[i];
- u32 src_reg;
- if (p->cls == ABI_CLASS_INT) src_reg = ret_int_regs[next_int_ret++];
- else if (p->cls == ABI_CLASS_FP) src_reg = (u32)(X64_XMM0 + next_fp_ret++);
- else compiler_panic(t->c, a->loc, "x64 call: ret cls %d unimpl",
- (int)p->cls);
-
- if (rs.kind == OPK_REG) {
- if (ri->nparts != 1) {
- compiler_panic(t->c, a->loc,
- "x64 call: REG ret_storage with %u parts",
- (unsigned)ri->nparts);
- }
- if (p->cls == ABI_CLASS_INT) {
- int w = (p->size == 8) ? 1 : 0;
- u32 dr = rs.v.reg & 0xFu;
- if (dr != src_reg) emit_mov_rr(mc, w, dr, src_reg);
- } else {
- u8 prefix2 = (p->size == 8) ? 0xF2 : 0xF3;
- u32 dr = rs.v.reg & 0xFu;
- if (dr != src_reg) emit_sse_rr(mc, prefix2, 0x10, dr, src_reg);
- }
- } else if (rs.kind == OPK_LOCAL || rs.kind == OPK_INDIRECT) {
- u32 base_reg;
- i32 base_off;
- if (rs.kind == OPK_LOCAL) {
- XSlot* s = slot_get(a, rs.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "x64 call: bad ret slot");
- base_reg = X64_RBP;
- base_off = -(i32)s->off;
- } else {
- base_reg = rs.v.ind.base & 0xFu;
- base_off = rs.v.ind.ofs;
- }
- i32 off = base_off + (i32)p->src_offset;
- if (p->cls == ABI_CLASS_INT) {
- emit_mov_store(mc, p->size, src_reg, base_reg, off);
- } else {
- u8 prefix2 = (p->size == 8) ? 0xF2 : 0xF3;
- emit_sse_store(mc, prefix2, 0x11, src_reg, base_reg, off);
- }
- } else if (rs.kind == OPK_IMM && rs.type && rs.type->kind == TY_VOID) {
- /* void ret placeholder — nothing to do. */
- } else {
- compiler_panic(t->c, a->loc,
- "x64 call: ret_storage kind %d unsupported",
- (int)rs.kind);
- }
- }
-}
-
-static void x_ret(CGTarget* t, const CGABIValue* val) {
- XImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
-
- if (val) {
- const ABIArgInfo* ri = val->abi;
- if (ri && ri->kind == ABI_ARG_INDIRECT) {
- /* sret: reload destination pointer into rdi, memcpy source into [rdi]. */
- u32 src_base;
- i32 src_base_off;
- u32 nbytes;
- if (val->storage.kind == OPK_LOCAL) {
- XSlot* s = slot_get(a, val->storage.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "x64 ret: bad sret slot");
- src_base = X64_RBP;
- src_base_off = -(i32)s->off;
- nbytes = s->size;
- } else if (val->storage.kind == OPK_INDIRECT) {
- src_base = val->storage.v.ind.base & 0xFu;
- src_base_off = val->storage.v.ind.ofs;
- nbytes = val->size;
- if (!nbytes) {
- compiler_panic(t->c, a->loc,
- "x64 ret indirect: missing aggregate size");
- }
- } else {
- compiler_panic(t->c, a->loc,
- "x64 ret indirect: storage kind %d unsupported",
- (int)val->storage.kind);
- }
- if (a->sret_ptr_slot != FRAME_SLOT_NONE) {
- XSlot* sp = slot_get(a, a->sret_ptr_slot);
- if (sp) emit_mov_load(mc, 8, 0, X64_RDI, X64_RBP, -(i32)sp->off);
- }
- u32 i = 0;
- while (i + 8 <= nbytes) {
- emit_mov_load(mc, 8, 0, X64_RAX, src_base, src_base_off + (i32)i);
- emit_mov_store(mc, 8, X64_RAX, X64_RDI, (i32)i);
- i += 8;
- }
- while (i + 4 <= nbytes) {
- emit_mov_load(mc, 4, 0, X64_RAX, src_base, src_base_off + (i32)i);
- emit_mov_store(mc, 4, X64_RAX, X64_RDI, (i32)i);
- i += 4;
- }
- while (i + 2 <= nbytes) {
- emit_mov_load(mc, 2, 0, X64_RAX, src_base, src_base_off + (i32)i);
- emit_mov_store(mc, 2, X64_RAX, X64_RDI, (i32)i);
- i += 2;
- }
- while (i < nbytes) {
- emit_mov_load(mc, 1, 0, X64_RAX, src_base, src_base_off + (i32)i);
- emit_mov_store(mc, 1, X64_RAX, X64_RDI, (i32)i);
- i += 1;
- }
- /* Convention: return sret pointer in rax. */
- emit_mov_rr(mc, 1, X64_RAX, X64_RDI);
- } else if (val->storage.kind == OPK_REG) {
- if (val->storage.cls == RC_FP) {
- u8 prefix2 = type_is_fp_double(val->storage.type) ? 0xF2 : 0xF3;
- u32 sr = val->storage.v.reg & 0xFu;
- if (sr != X64_XMM0) emit_sse_rr(mc, prefix2, 0x10, X64_XMM0, sr);
- } else {
- int w = type_is_64(val->storage.type) ? 1 : 0;
- u32 sr = val->storage.v.reg & 0xFu;
- if (sr != X64_RAX) emit_mov_rr(mc, w, X64_RAX, sr);
- }
- } else if (val->storage.kind == OPK_IMM) {
- int w = type_is_64(val->storage.type) ? 1 : 0;
- emit_load_imm(mc, w, X64_RAX, val->storage.v.imm);
- } else if (val->storage.kind == OPK_LOCAL ||
- val->storage.kind == OPK_INDIRECT) {
- /* DIRECT struct return: load each part into rax/rdx or xmm0/xmm1. */
- u32 base_reg;
- i32 base_off;
- if (val->storage.kind == OPK_LOCAL) {
- XSlot* s = slot_get(a, val->storage.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "x64 ret: bad local slot");
- base_reg = X64_RBP;
- base_off = -(i32)s->off;
- } else {
- base_reg = val->storage.v.ind.base & 0xFu;
- base_off = val->storage.v.ind.ofs;
- }
- const ABIArgInfo* ri2 = val->abi;
- u32 next_int_ret = 0, next_fp_ret = 0;
- static const u32 ret_int_regs[2] = {X64_RAX, X64_RDX};
- for (u16 i = 0; i < (ri2 ? ri2->nparts : 0); ++i) {
- const ABIArgPart* pt = &ri2->parts[i];
- i32 off = base_off + (i32)pt->src_offset;
- if (pt->cls == ABI_CLASS_INT) {
- emit_mov_load(mc, pt->size, 0, ret_int_regs[next_int_ret++],
- base_reg, off);
- } else if (pt->cls == ABI_CLASS_FP) {
- u8 prefix2 = (pt->size == 8) ? 0xF2 : 0xF3;
- emit_sse_load(mc, prefix2, 0x10, (u32)(X64_XMM0 + next_fp_ret++),
- base_reg, off);
- } else {
- compiler_panic(t->c, a->loc, "x64 ret: ret part cls %d unimpl",
- (int)pt->cls);
- }
- }
- }
- }
- emit_jmp_label(mc, a->epilogue_label);
-}
-
-/* ============================================================
- * Alloca / VLA.
- *
- * Layout (low → high addresses, after a `sub rsp, aligned_size`):
- * [rsp + 0, +max_outgoing): outgoing-arg area
- * [rsp + max_outgoing, +max_outgoing +aligned_size): newly allocated block
- *
- * max_outgoing is only known at func_end (it is the max across all
- * x_call sites in the function), so each alloca emits a placeholder
- * `lea dst, [rsp + 0]` whose 4-byte disp is patched at func_end. The
- * epilogue restores rsp via `leave` (mov rsp, rbp; pop rbp), so no
- * extra dance is needed when alloca is present. */
-
-static void emit_lea_rsp_disp32(MCEmitter* mc, u32 dst, u32* out_disp_pos) {
- /* Force the disp32 form (mod=10, rm=SIB, base=rsp, no index, scale=0)
- * regardless of the displacement value so func_end has a fixed-width
- * field to patch. 8 bytes: REX.W [+R] | 0x8D | ModRM | SIB | disp32. */
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- emit_rex(mc, 1, dst, 0, X64_RSP);
- u8 op = 0x8D;
- mc->emit_bytes(mc, &op, 1);
- u8 mr = modrm(2u, dst & 7u, 4u);
- mc->emit_bytes(mc, &mr, 1);
- u8 s = sib(0, 4u, X64_RSP);
- mc->emit_bytes(mc, &s, 1);
- *out_disp_pos = mc->pos(mc);
- emit_u32le(mc, 0);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-static void x_alloca_(CGTarget* t, Operand d, Operand sz, u32 align) {
- XImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
- if (d.kind != OPK_REG)
- compiler_panic(t->c, a->loc, "x64 alloca: dst must be REG");
- if (align > 16) {
- compiler_panic(t->c, a->loc,
- "x64 alloca: align %u > 16 not yet supported", align);
- }
-
- if (sz.kind == OPK_IMM) {
- i64 v = sz.v.imm;
- if (v < 0) compiler_panic(t->c, a->loc, "x64 alloca: negative size");
- u64 aligned = ((u64)v + 15u) & ~(u64)15u;
- if (aligned == 0) aligned = 16;
- /* sub rsp, imm32 : REX.W 0x81 /5 imm32 (7 bytes). */
- emit_rex(mc, 1, 0, 0, X64_RSP);
- u8 buf[2] = {0x81, modrm(3u, 5u, X64_RSP)};
- mc->emit_bytes(mc, buf, 2);
- emit_u32le(mc, (u32)aligned);
- } else if (sz.kind == OPK_REG) {
- u32 sz_reg = sz.v.reg & 0xFu;
- /* rax = (sz_reg + 15) & ~15 */
- emit_lea(mc, X64_RAX, sz_reg, 15);
- /* and rax, -16 : REX.W 0x83 /4 imm8(0xF0). */
- emit_rex(mc, 1, 0, 0, X64_RAX);
- u8 abuf[3] = {0x83, modrm(3u, 4u, X64_RAX), 0xF0};
- mc->emit_bytes(mc, abuf, 3);
- /* sub rsp, rax */
- emit_alu_rr(mc, 1, 0x29, X64_RSP, X64_RAX);
- } else {
- compiler_panic(t->c, a->loc, "x64 alloca: size kind %d unsupported",
- (int)sz.kind);
- }
-
- /* lea dst, [rsp + max_outgoing] — placeholder, patched at func_end. */
- if (a->nalloca_patches == a->alloca_patches_cap) {
- u32 ncap = a->alloca_patches_cap ? a->alloca_patches_cap * 2u : 4u;
- XAllocaPatch* nb = arena_array(t->c->tu, XAllocaPatch, ncap);
- if (a->alloca_patches)
- memcpy(nb, a->alloca_patches, sizeof(XAllocaPatch) * a->nalloca_patches);
- a->alloca_patches = nb;
- a->alloca_patches_cap = ncap;
- }
- u32 disp_pos;
- emit_lea_rsp_disp32(mc, d.v.reg & 0xFu, &disp_pos);
- a->alloca_patches[a->nalloca_patches].disp_pos = disp_pos;
- a->nalloca_patches++;
- a->has_alloca = 1;
-}
-
-/* SysV AMD64 __va_list_tag (24 bytes, 8-aligned):
- * off 0 u32 gp_offset next free GP slot in reg_save_area (0..48)
- * off 4 u32 fp_offset next free FP slot (48..176)
- * off 8 ptr overflow_arg_area pointer to next stack-passed arg
- * off 16 ptr reg_save_area pointer to the 176-byte save area
- *
- * The reg_save_area layout (filled in func_begin):
- * +0..+40 : rdi, rsi, rdx, rcx, r8, r9 (8B each)
- * +48..+168 : xmm0..xmm7 at 16B stride (low 8B written via movsd)
- *
- * va_arg dispatches on dst class. When the relevant offset reaches its
- * max (48 for GP, 176 for FP), fall through to overflow_arg_area at
- * 8-byte stride. */
-
-static void x_va_start_(CGTarget* t, Operand ap_op) {
- XImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
- if (!a->is_variadic)
- compiler_panic(t->c, a->loc, "x64 va_start: function not variadic");
- u32 ap = ap_op.v.reg & 0xFu;
- XSlot* rs = slot_get(a, a->reg_save_slot);
- if (!rs) compiler_panic(t->c, a->loc, "x64 va_start: no reg_save_slot");
-
- /* gp_offset = next_param_int * 8 */
- emit_load_imm(mc, 0, X64_RAX, (i64)(a->next_param_int * 8u));
- emit_mov_store(mc, 4, X64_RAX, ap, 0);
- /* fp_offset = 48 + next_param_fp * 16 */
- emit_load_imm(mc, 0, X64_RAX, (i64)(48u + a->next_param_fp * 16u));
- emit_mov_store(mc, 4, X64_RAX, ap, 4);
- /* overflow_arg_area = rbp + 16 + next_param_stack */
- emit_lea(mc, X64_RAX, X64_RBP, (i32)(16u + a->next_param_stack));
- emit_mov_store(mc, 8, X64_RAX, ap, 8);
- /* reg_save_area = rbp - reg_save_slot.off */
- emit_lea(mc, X64_RAX, X64_RBP, -(i32)rs->off);
- emit_mov_store(mc, 8, X64_RAX, ap, 16);
-}
-
-static void x_va_arg_(CGTarget* t, Operand dst, Operand ap_op,
- const Type* ty) {
- MCEmitter* mc = t->mc;
- u32 ap = ap_op.v.reg & 0xFu;
- u32 sz = type_byte_size(ty);
- int is_fp = (dst.cls == RC_FP);
- u32 offs_field = is_fp ? 4u : 0u;
- u32 max_offs = is_fp ? 176u : 48u;
- u32 stride = is_fp ? 16u : 8u;
- u32 dr = dst.v.reg & 0xFu;
-
- MCLabel L_stack = mc->label_new(mc);
- MCLabel L_done = mc->label_new(mc);
-
- /* eax = ap[offs_field]; cmp eax, max_offs; jae L_stack. */
- emit_mov_load(mc, 4, 0, X64_RAX, ap, (i32)offs_field);
- if (max_offs <= 127u) {
- emit_cmp_imm8(mc, 0, X64_RAX, (i8)max_offs);
- } else {
- /* cmp eax, imm32 : 0x3D imm32 (5 bytes, EAX-specific form). */
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- u8 op = 0x3D;
- mc->emit_bytes(mc, &op, 1);
- emit_u32le(mc, max_offs);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
- }
- emit_jcc_label(mc, X64_CC_AE, L_stack);
-
- /* Reg path:
- * r11 = ap[16] (reg_save_area)
- * r11 = r11 + rax
- * load dst from [r11 + 0]
- * eax += stride; ap[offs_field] = eax
- * jmp L_done */
- emit_mov_load(mc, 8, 0, X64_R11, ap, 16);
- emit_alu_rr(mc, 1, 0x01, X64_R11, X64_RAX);
- if (is_fp) {
- u8 prefix = (sz == 8) ? 0xF2 : 0xF3;
- emit_sse_load(mc, prefix, 0x10, dr, X64_R11, 0);
- } else {
- int sx = type_is_signed(ty);
- emit_mov_load(mc, sz, sx, dr, X64_R11, 0);
- }
- /* add eax, imm8 : 0x83 /0 imm8 (no REX needed for eax). */
- {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- u8 buf[3] = {0x83, modrm(3u, 0u, X64_RAX), (u8)stride};
- mc->emit_bytes(mc, buf, 3);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
- }
- emit_mov_store(mc, 4, X64_RAX, ap, (i32)offs_field);
- emit_jmp_label(mc, L_done);
-
- /* L_stack:
- * r11 = ap[8] (overflow_arg_area)
- * load dst from [r11 + 0]
- * r11 += 8; ap[8] = r11 */
- mc->label_place(mc, L_stack);
- emit_mov_load(mc, 8, 0, X64_R11, ap, 8);
- if (is_fp) {
- u8 prefix = (sz == 8) ? 0xF2 : 0xF3;
- emit_sse_load(mc, prefix, 0x10, dr, X64_R11, 0);
- } else {
- int sx = type_is_signed(ty);
- emit_mov_load(mc, sz, sx, dr, X64_R11, 0);
- }
- /* add r11, 8 : REX.WB 0x83 /0 imm8. */
- {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- u8 rex = (u8)(X64_REX_BASE | X64_REX_W | X64_REX_B);
- mc->emit_bytes(mc, &rex, 1);
- u8 buf[3] = {0x83, modrm(3u, 0u, X64_R11 & 7u), 8};
- mc->emit_bytes(mc, buf, 3);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
- }
- emit_mov_store(mc, 8, X64_R11, ap, 8);
-
- mc->label_place(mc, L_done);
-}
-
-static void x_va_end_(CGTarget* t, Operand a) {
- (void)t;
- (void)a;
-}
-
-static void x_va_copy_(CGTarget* t, Operand d, Operand s) {
- MCEmitter* mc = t->mc;
- u32 dr = d.v.reg & 0xFu;
- u32 sr = s.v.reg & 0xFu;
- /* va_list is 24 bytes; three 8B loads + stores via rax. */
- for (u32 i = 0; i < 24u; i += 8u) {
- emit_mov_load(mc, 8, 0, X64_RAX, sr, (i32)i);
- emit_mov_store(mc, 8, X64_RAX, dr, (i32)i);
- }
-}
-
-/* ============================================================
- * Atomics (Group K).
- *
- * x86 has a strong memory model: plain MOV is acquire on loads and
- * release on stores, so most MemOrders need no extra fence. The
- * exception is SEQ_CST stores, which need a full StoreLoad barrier —
- * realized either via XCHG (which has implicit LOCK) or MOV+MFENCE.
- * All LOCK-prefixed RMWs (XADD/XCHG/CMPXCHG) act as full barriers,
- * subsuming any MemOrder the front end requests. */
-
-static void emit_lock_prefix(MCEmitter* mc) {
- u8 b = 0xF0;
- mc->emit_bytes(mc, &b, 1);
-}
-
-static void emit_mfence(MCEmitter* mc) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- u8 b[3] = {0x0F, 0xAE, 0xF0};
- mc->emit_bytes(mc, b, 3);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-static void emit_ud2(MCEmitter* mc) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- u8 b[2] = {0x0F, 0x0B};
- mc->emit_bytes(mc, b, 2);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-/* LOCK XADD [base+disp], src. Opcode 0F C1 /r (32/64-bit; sets src=prior,
- * mem=mem+src). */
-static void emit_lock_xadd(MCEmitter* mc, int w, u32 src, u32 base, i32 disp) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- emit_lock_prefix(mc);
- emit_rex(mc, w, src, 0, base);
- u8 op[2] = {0x0F, 0xC1};
- mc->emit_bytes(mc, op, 2);
- emit_mem_operand(mc, src, base, disp);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-/* XCHG [base+disp], src. Opcode 87 /r. LOCK is implicit when the
- * destination is memory, but we emit it explicitly for clarity. */
-static void emit_lock_xchg_mem(MCEmitter* mc, int w, u32 src, u32 base,
- i32 disp) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- emit_lock_prefix(mc);
- emit_rex(mc, w, src, 0, base);
- u8 op = 0x87;
- mc->emit_bytes(mc, &op, 1);
- emit_mem_operand(mc, src, base, disp);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-/* LOCK CMPXCHG [base+disp], src. Opcode 0F B1 /r. Compares RAX with [mem];
- * if equal, [mem]=src and ZF=1; else RAX=[mem] and ZF=0. */
-static void emit_lock_cmpxchg(MCEmitter* mc, int w, u32 src, u32 base,
- i32 disp) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- emit_lock_prefix(mc);
- emit_rex(mc, w, src, 0, base);
- u8 op[2] = {0x0F, 0xB1};
- mc->emit_bytes(mc, op, 2);
- emit_mem_operand(mc, src, base, disp);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-/* POPCNT rd, rs. Encoding: F3 0F B8 /r. */
-static void emit_popcnt(MCEmitter* mc, int w, u32 dst, u32 src) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- u8 p = 0xF3;
- mc->emit_bytes(mc, &p, 1);
- emit_rex(mc, w, dst, 0, src);
- u8 op[2] = {0x0F, 0xB8};
- mc->emit_bytes(mc, op, 2);
- emit_rm_reg(mc, dst, src);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-/* BSF/BSR rd, rs. opcode2 = 0xBC (BSF) or 0xBD (BSR). */
-static void emit_bs(MCEmitter* mc, int w, u8 opcode2, u32 dst, u32 src) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- emit_rex(mc, w, dst, 0, src);
- u8 op[2] = {0x0F, opcode2};
- mc->emit_bytes(mc, op, 2);
- emit_rm_reg(mc, dst, src);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-/* BSWAP r32/r64. Opcode 0F C8+r; REX.W for r64; REX.B if reg>=8. */
-static void emit_bswap(MCEmitter* mc, int w, u32 reg) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- emit_rex(mc, w, 0, 0, reg);
- u8 op[2] = {0x0F, (u8)(0xC8 + (reg & 7))};
- mc->emit_bytes(mc, op, 2);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-/* ROL r/m16, imm8. Used to swap bytes in a 16-bit value (ROL by 8). */
-static void emit_rol16_imm8(MCEmitter* mc, u32 reg, u8 imm) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- u8 p = 0x66;
- mc->emit_bytes(mc, &p, 1);
- emit_rex(mc, 0, 0, 0, reg);
- u8 buf[3];
- buf[0] = 0xC1;
- buf[1] = modrm(3u, 0u, reg & 7u);
- buf[2] = imm;
- mc->emit_bytes(mc, buf, 3);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-/* XOR r/m, imm32 — opcode 81 /6. Used to compute (bits-1) - x via XOR. */
-static void emit_xor_imm32(MCEmitter* mc, int w, u32 reg, i32 imm) {
- u32 ofs = obj_pos(mc->obj, mc->section_id);
- emit_rex(mc, w, 0, 0, reg);
- u8 op = 0x81;
- mc->emit_bytes(mc, &op, 1);
- emit_rm_reg(mc, 6u, reg);
- emit_u32le(mc, (u32)imm);
- if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
-}
-
-/* Resolve an atomic addr operand to (base, disp) for a memory operand.
- * Accepts OPK_REG (pointer in reg, disp=0), OPK_LOCAL, or OPK_INDIRECT. */
-static u32 atomic_addr_base(CGTarget* t, Operand addr, i32* out_disp) {
- if (addr.kind == OPK_REG) {
- *out_disp = 0;
- return addr.v.reg & 0xFu;
- }
- return addr_base(t, addr, out_disp);
-}
-
-static void x_atomic_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma,
- MemOrder ord) {
- MCEmitter* mc = t->mc;
- (void)ord; /* x86: plain MOV satisfies all orders for loads. */
- u32 sz = ma.size ? ma.size : type_byte_size(dst.type);
- i32 disp;
- u32 base = atomic_addr_base(t, addr, &disp);
- int signed_ = type_is_signed(ma.type ? ma.type : dst.type);
- emit_mov_load(mc, sz, signed_, dst.v.reg & 0xFu, base, disp);
-}
-
-static void x_atomic_store(CGTarget* t, Operand addr, Operand src, MemAccess ma,
- MemOrder ord) {
- XImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
- u32 sz = ma.size ? ma.size : type_byte_size(src.type);
- int w = (sz == 8) ? 1 : 0;
- i32 disp;
- u32 base = atomic_addr_base(t, addr, &disp);
-
- /* Materialize src into a register. */
- u32 sr;
- if (src.kind == OPK_IMM) {
- emit_load_imm(mc, w, X64_R11, src.v.imm);
- sr = X64_R11;
- } else if (src.kind == OPK_REG) {
- sr = src.v.reg & 0xFu;
- } else {
- compiler_panic(t->c, a->loc, "x64 atomic_store: src kind %d unsupported",
- (int)src.kind);
- }
-
- if (ord == MO_SEQ_CST) {
- /* SEQ_CST store: XCHG implicitly fences. Move src into r11 so the
- * caller's reg is unmodified, then xchg [mem], r11. */
- if (sr != X64_R11) emit_mov_rr(mc, w, X64_R11, sr);
- emit_lock_xchg_mem(mc, w, X64_R11, base, disp);
- return;
- }
- /* Plain store covers RELAXED / RELEASE. */
- emit_mov_store(mc, sz, sr, base, disp);
-}
-
-static void x_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, Operand addr,
- Operand val, MemAccess ma, MemOrder ord) {
- XImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
- (void)ord; /* LOCK-prefixed ops are unconditionally full barriers. */
- u32 sz = ma.size ? ma.size : type_byte_size(dst.type);
- int w = (sz == 8) ? 1 : 0;
- i32 disp;
- u32 base = atomic_addr_base(t, addr, &disp);
- u32 dr = dst.v.reg & 0xFu;
-
- /* Materialize val into r11 (it's our working temp). For SUB we negate
- * it so the XADD does the subtraction. */
- if (val.kind == OPK_IMM) {
- i64 v = val.v.imm;
- if (op == AO_SUB) v = -v;
- emit_load_imm(mc, w, X64_R11, v);
- } else if (val.kind == OPK_REG) {
- u32 vr = val.v.reg & 0xFu;
- if (vr != X64_R11) emit_mov_rr(mc, w, X64_R11, vr);
- if (op == AO_SUB) emit_f7_rm(mc, w, 3u, X64_R11); /* NEG */
- } else {
- compiler_panic(t->c, a->loc, "x64 atomic_rmw: val kind %d unsupported",
- (int)val.kind);
- }
-
- if (op == AO_ADD || op == AO_SUB) {
- /* LOCK XADD [base], r11 — afterwards r11 holds prior. */
- emit_lock_xadd(mc, w, X64_R11, base, disp);
- if (dr != X64_R11) emit_mov_rr(mc, w, dr, X64_R11);
- return;
- }
- if (op == AO_XCHG) {
- emit_lock_xchg_mem(mc, w, X64_R11, base, disp);
- if (dr != X64_R11) emit_mov_rr(mc, w, dr, X64_R11);
- return;
- }
-
- /* AND/OR/XOR/NAND: CMPXCHG retry loop.
- *
- * mov rax, [mem]
- * .retry:
- * mov rcx, rax ; new = prior
- * <op> rcx, r11 ; combine with val
- * [NAND: not rcx]
- * lock cmpxchg [mem], rcx
- * jne .retry
- * mov dr, rax
- *
- * rax = prior (cmpxchg implicit), rcx = new (scratch), r11 = val. */
- emit_mov_load(mc, sz, 0, X64_RAX, base, disp);
- MCLabel L_retry = mc->label_new(mc);
- mc->label_place(mc, L_retry);
- emit_mov_rr(mc, w, X64_RCX, X64_RAX);
- switch (op) {
- case AO_AND:
- emit_alu_rr(mc, w, 0x21, X64_RCX, X64_R11);
- break;
- case AO_OR:
- emit_alu_rr(mc, w, 0x09, X64_RCX, X64_R11);
- break;
- case AO_XOR:
- emit_alu_rr(mc, w, 0x31, X64_RCX, X64_R11);
- break;
- case AO_NAND:
- emit_alu_rr(mc, w, 0x21, X64_RCX, X64_R11);
- emit_f7_rm(mc, w, 2u, X64_RCX); /* NOT */
- break;
- default:
- compiler_panic(t->c, a->loc, "x64 atomic_rmw: op %d unimpl", (int)op);
- }
- emit_lock_cmpxchg(mc, w, X64_RCX, base, disp);
- emit_jcc_label(mc, X64_CC_NE, L_retry);
- if (dr != X64_RAX) emit_mov_rr(mc, w, dr, X64_RAX);
-}
-
-static void x_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr,
- Operand expected, Operand desired, MemAccess ma,
- MemOrder succ, MemOrder fail) {
- XImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
- (void)succ;
- (void)fail;
- u32 sz = ma.size ? ma.size : type_byte_size(prior.type);
- int w = (sz == 8) ? 1 : 0;
- i32 disp;
- u32 base = atomic_addr_base(t, addr, &disp);
-
- /* RAX = expected. */
- if (expected.kind == OPK_IMM) {
- emit_load_imm(mc, w, X64_RAX, expected.v.imm);
- } else if (expected.kind == OPK_REG) {
- u32 er = expected.v.reg & 0xFu;
- if (er != X64_RAX) emit_mov_rr(mc, w, X64_RAX, er);
- } else {
- compiler_panic(t->c, a->loc, "x64 atomic_cas: exp kind %d unsupported",
- (int)expected.kind);
- }
- /* R11 = desired. */
- if (desired.kind == OPK_IMM) {
- emit_load_imm(mc, w, X64_R11, desired.v.imm);
- } else if (desired.kind == OPK_REG) {
- u32 dr2 = desired.v.reg & 0xFu;
- if (dr2 != X64_R11) emit_mov_rr(mc, w, X64_R11, dr2);
- } else {
- compiler_panic(t->c, a->loc, "x64 atomic_cas: des kind %d unsupported",
- (int)desired.kind);
- }
-
- emit_lock_cmpxchg(mc, w, X64_R11, base, disp);
-
- /* ok = ZF (success). */
- u32 ok_r = ok.v.reg & 0xFu;
- emit_setcc(mc, X64_CC_E, ok_r);
- emit_movzx_r32_r8(mc, ok_r, ok_r);
-
- /* prior = rax. */
- u32 pr = prior.v.reg & 0xFu;
- if (pr != X64_RAX) emit_mov_rr(mc, w, pr, X64_RAX);
-}
-
-static void x_fence(CGTarget* t, MemOrder o) {
- /* x86: only SEQ_CST needs an explicit StoreLoad barrier. RELAXED is
- * a no-op; ACQUIRE/RELEASE/ACQ_REL are satisfied by plain MOV. */
- if (o == MO_SEQ_CST) emit_mfence(t->mc);
-}
-
-/* ============================================================
- * Intrinsics (Group L). */
-
-static void x_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd,
- const Operand* args, u32 na) {
- XImpl* a = impl_of(t);
- MCEmitter* mc = t->mc;
- (void)nd;
- (void)na;
-
- switch (kind) {
- case INTRIN_POPCOUNT: {
- Operand src = args[0];
- Operand dst = dsts[0];
- int w = type_is_64(src.type) ? 1 : 0;
- emit_popcnt(mc, w, dst.v.reg & 0xFu, src.v.reg & 0xFu);
- return;
- }
- case INTRIN_CTZ: {
- /* BSF gives the index of the lowest set bit (undefined for 0). */
- Operand src = args[0];
- Operand dst = dsts[0];
- int w = type_is_64(src.type) ? 1 : 0;
- emit_bs(mc, w, 0xBC, dst.v.reg & 0xFu, src.v.reg & 0xFu);
- return;
- }
- case INTRIN_CLZ: {
- /* BSR gives the index of the highest set bit; clz = (bits-1) - bsr.
- * XOR with (bits-1) computes the subtraction for in-range values. */
- Operand src = args[0];
- Operand dst = dsts[0];
- int w = type_is_64(src.type) ? 1 : 0;
- u32 dr = dst.v.reg & 0xFu;
- emit_bs(mc, w, 0xBD, dr, src.v.reg & 0xFu);
- emit_xor_imm32(mc, w, dr, w ? 63 : 31);
- return;
- }
- case INTRIN_BSWAP16: {
- Operand src = args[0];
- Operand dst = dsts[0];
- u32 dr = dst.v.reg & 0xFu;
- u32 sr = src.v.reg & 0xFu;
- if (dr != sr) emit_mov_rr(mc, 0, dr, sr);
- emit_rol16_imm8(mc, dr, 8);
- return;
- }
- case INTRIN_BSWAP32: {
- Operand src = args[0];
- Operand dst = dsts[0];
- u32 dr = dst.v.reg & 0xFu;
- u32 sr = src.v.reg & 0xFu;
- if (dr != sr) emit_mov_rr(mc, 0, dr, sr);
- emit_bswap(mc, 0, dr);
- return;
- }
- case INTRIN_BSWAP64: {
- Operand src = args[0];
- Operand dst = dsts[0];
- u32 dr = dst.v.reg & 0xFu;
- u32 sr = src.v.reg & 0xFu;
- if (dr != sr) emit_mov_rr(mc, 1, dr, sr);
- emit_bswap(mc, 1, dr);
- return;
- }
- case INTRIN_MEMCPY:
- case INTRIN_MEMMOVE: {
- /* args = (dst_addr, src_addr, n_bytes). v1: const n, REG ptrs. */
- Operand da = args[0], sa = args[1], nb = args[2];
- if (da.kind != OPK_REG || sa.kind != OPK_REG || nb.kind != OPK_IMM) {
- compiler_panic(t->c, a->loc,
- "x64 intrinsic: %s with non-const n or non-REG ptr",
- kind == INTRIN_MEMCPY ? "memcpy" : "memmove");
- }
- u32 dr = da.v.reg & 0xFu;
- u32 sr = sa.v.reg & 0xFu;
- u32 n = (u32)nb.v.imm;
- if (kind == INTRIN_MEMCPY) {
- u32 i = 0;
- while (i + 8 <= n) {
- emit_mov_load(mc, 8, 0, X64_RAX, sr, (i32)i);
- emit_mov_store(mc, 8, X64_RAX, dr, (i32)i);
- i += 8;
- }
- while (i + 4 <= n) {
- emit_mov_load(mc, 4, 0, X64_RAX, sr, (i32)i);
- emit_mov_store(mc, 4, X64_RAX, dr, (i32)i);
- i += 4;
- }
- while (i + 2 <= n) {
- emit_mov_load(mc, 2, 0, X64_RAX, sr, (i32)i);
- emit_mov_store(mc, 2, X64_RAX, dr, (i32)i);
- i += 2;
- }
- while (i < n) {
- emit_mov_load(mc, 1, 0, X64_RAX, sr, (i32)i);
- emit_mov_store(mc, 1, X64_RAX, dr, (i32)i);
- i += 1;
- }
- } else {
- /* memmove: copy backward so dst>src overlap is safe. */
- u32 i = n;
- while (i >= 8) {
- i -= 8;
- emit_mov_load(mc, 8, 0, X64_RAX, sr, (i32)i);
- emit_mov_store(mc, 8, X64_RAX, dr, (i32)i);
- }
- while (i >= 4) {
- i -= 4;
- emit_mov_load(mc, 4, 0, X64_RAX, sr, (i32)i);
- emit_mov_store(mc, 4, X64_RAX, dr, (i32)i);
- }
- while (i >= 2) {
- i -= 2;
- emit_mov_load(mc, 2, 0, X64_RAX, sr, (i32)i);
- emit_mov_store(mc, 2, X64_RAX, dr, (i32)i);
- }
- while (i >= 1) {
- i -= 1;
- emit_mov_load(mc, 1, 0, X64_RAX, sr, (i32)i);
- emit_mov_store(mc, 1, X64_RAX, dr, (i32)i);
- }
- }
- return;
- }
- case INTRIN_MEMSET: {
- /* args = (dst_addr, byte, n). */
- Operand da = args[0], bv = args[1], nb = args[2];
- if (da.kind != OPK_REG || nb.kind != OPK_IMM) {
- compiler_panic(t->c, a->loc,
- "x64 intrinsic: memset with non-const n / non-REG ptr");
- }
- u32 dr = da.v.reg & 0xFu;
- u32 n = (u32)nb.v.imm;
- /* Build a 64-bit value with the byte broadcast across all 8 bytes. */
- if (bv.kind == OPK_IMM) {
- u8 byte = (u8)(bv.v.imm & 0xffu);
- u64 b64 = byte;
- b64 |= b64 << 8;
- b64 |= b64 << 16;
- b64 |= b64 << 32;
- emit_load_imm(mc, 1, X64_RAX, (i64)b64);
- } else if (bv.kind == OPK_REG) {
- /* Broadcast low byte of bv across 8 bytes: rax = bv * 0x0101010101010101. */
- emit_load_imm(mc, 1, X64_R11, (i64)0x0101010101010101ll);
- emit_mov_rr(mc, 1, X64_RAX, bv.v.reg & 0xFu);
- emit_imul_rr(mc, 1, X64_RAX, X64_R11);
- } else {
- compiler_panic(t->c, a->loc,
- "x64 intrinsic: memset byte kind %d unsupported",
- (int)bv.kind);
- }
- u32 i = 0;
- while (i + 8 <= n) {
- emit_mov_store(mc, 8, X64_RAX, dr, (i32)i);
- i += 8;
- }
- while (i + 4 <= n) {
- emit_mov_store(mc, 4, X64_RAX, dr, (i32)i);
- i += 4;
- }
- while (i + 2 <= n) {
- emit_mov_store(mc, 2, X64_RAX, dr, (i32)i);
- i += 2;
- }
- while (i < n) {
- emit_mov_store(mc, 1, X64_RAX, dr, (i32)i);
- i += 1;
- }
- return;
- }
- case INTRIN_PREFETCH:
- /* Drop the hint. */
- return;
- case INTRIN_ASSUME_ALIGNED: {
- /* dst = src (alignment is a hint only). */
- Operand src = args[0];
- Operand dst = dsts[0];
- u32 dr = dst.v.reg & 0xFu;
- u32 sr = src.v.reg & 0xFu;
- if (dr != sr) emit_mov_rr(mc, 1, dr, sr);
- return;
- }
- case INTRIN_EXPECT: {
- /* dst = val; expected hint dropped. */
- Operand val = args[0];
- Operand dst = dsts[0];
- int w = type_is_64(dst.type) ? 1 : 0;
- u32 dr = dst.v.reg & 0xFu;
- if (val.kind == OPK_REG) {
- u32 sr = val.v.reg & 0xFu;
- if (sr != dr) emit_mov_rr(mc, w, dr, sr);
- } else if (val.kind == OPK_IMM) {
- emit_load_imm(mc, w, dr, val.v.imm);
- } else {
- compiler_panic(t->c, a->loc,
- "x64 intrinsic: expect val kind %d unsupported",
- (int)val.kind);
- }
- return;
- }
- case INTRIN_UNREACHABLE:
- case INTRIN_TRAP:
- emit_ud2(mc);
- return;
- case INTRIN_ADD_OVERFLOW:
- case INTRIN_SUB_OVERFLOW: {
- /* dsts: [val, ovf]. ADD/SUB sets OF on signed overflow; SETO captures. */
- Operand a_op = args[0], b_op = args[1];
- Operand dval = dsts[0], dovf = dsts[1];
- int w = type_is_64(dval.type) ? 1 : 0;
- u32 rd = dval.v.reg & 0xFu;
- u32 ra = force_reg_int(t, a_op, w, X64_RAX);
- if (rd != ra) emit_mov_rr(mc, w, rd, ra);
- u32 rb = force_reg_int(t, b_op, w, X64_R11);
- u8 op = (kind == INTRIN_ADD_OVERFLOW) ? 0x01 : 0x29;
- emit_alu_rr(mc, w, op, rd, rb);
- u32 dovf_r = dovf.v.reg & 0xFu;
- emit_setcc(mc, X64_CC_O, dovf_r);
- emit_movzx_r32_r8(mc, dovf_r, dovf_r);
- return;
- }
- case INTRIN_MUL_OVERFLOW: {
- /* dsts: [val, ovf]. IMUL r32, r/m32 (0F AF /r) is the signed
- * two-operand form: low 32 bits of product go to dst, OF set if
- * the result didn't fit. i64 not yet supported. */
- Operand a_op = args[0], b_op = args[1];
- Operand dval = dsts[0], dovf = dsts[1];
- int w = type_is_64(dval.type) ? 1 : 0;
- if (w) {
- compiler_panic(t->c, a->loc,
- "x64 intrinsic: mul_overflow on i64 not yet supported");
- }
- u32 rd = dval.v.reg & 0xFu;
- u32 ra = force_reg_int(t, a_op, w, X64_RAX);
- if (rd != ra) emit_mov_rr(mc, w, rd, ra);
- u32 rb = force_reg_int(t, b_op, w, X64_R11);
- emit_imul_rr(mc, w, rd, rb);
- u32 dovf_r = dovf.v.reg & 0xFu;
- emit_setcc(mc, X64_CC_O, dovf_r);
- emit_movzx_r32_r8(mc, dovf_r, dovf_r);
- return;
- }
- default:
- compiler_panic(t->c, a->loc, "x64 intrinsic: kind %d unsupported",
- (int)kind);
- }
-}
-static void x_asm_block(CGTarget* t, const char* tmpl,
- const AsmConstraint* outs, u32 no, Operand* oo,
- const AsmConstraint* ins, u32 ni, const Operand* io,
- const Sym* clobs, u32 nc) {
- (void)tmpl;
- (void)outs;
- (void)no;
- (void)oo;
- (void)ins;
- (void)ni;
- (void)io;
- (void)clobs;
- (void)nc;
- x_panic(t, "asm_block");
-}
-
-static void x_set_loc(CGTarget* t, SrcLoc l) {
- ((XImpl*)t)->loc = l;
- if (t->mc) t->mc->set_loc(t->mc, l);
-}
-
-static void x_finalize(CGTarget* t) { (void)t; }
-static void x_destroy(CGTarget* t) { (void)t; }
-
-static void cgt_cleanup(void* arg) { cgtarget_free((CGTarget*)arg); }
-
-CGTarget* x64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) {
- XImpl* x = arena_new(c->tu, XImpl);
- memset(x, 0, sizeof *x);
-
- CGTarget* t = &x->base;
- t->c = c;
- t->obj = o;
- t->mc = m;
-
- t->func_begin = x_func_begin;
- t->func_end = x_func_end;
-
- t->alloc_reg = x_alloc_reg;
- t->free_reg = x_free_reg;
- t->frame_slot = x_frame_slot;
- t->param = x_param;
- t->clobbers = x_clobbers;
- t->spill_reg = x_spill_reg;
- t->reload_reg = x_reload_reg;
-
- t->label_new = x_label_new;
- t->label_place = x_label_place;
- t->jump = x_jump;
- t->cmp_branch = x_cmp_branch;
-
- t->scope_begin = x_scope_begin;
- t->scope_else = x_scope_else;
- t->scope_end = x_scope_end;
- t->break_to = x_break_to;
- t->continue_to = x_continue_to;
-
- t->load_imm = x_load_imm;
- t->load_const = x_load_const;
- t->copy = x_copy;
- t->load = x_load;
- t->store = x_store;
- t->addr_of = x_addr_of;
- t->tls_addr_of = x_tls_addr_of;
- t->copy_bytes = x_copy_bytes;
- t->set_bytes = x_set_bytes;
- t->bitfield_load = x_bitfield_load;
- t->bitfield_store = x_bitfield_store;
-
- t->binop = x_binop;
- t->unop = x_unop;
- t->cmp = x_cmp;
- t->convert = x_convert;
-
- t->call = x_call;
- t->ret = x_ret;
-
- t->alloca_ = x_alloca_;
- t->va_start_ = x_va_start_;
- t->va_arg_ = x_va_arg_;
- t->va_end_ = x_va_end_;
- t->va_copy_ = x_va_copy_;
-
- t->setjmp_ = NULL;
- t->longjmp_ = NULL;
-
- t->atomic_load = x_atomic_load;
- t->atomic_store = x_atomic_store;
- t->atomic_rmw = x_atomic_rmw;
- t->atomic_cas = x_atomic_cas;
- t->fence = x_fence;
-
- t->intrinsic = x_intrinsic;
- t->asm_block = x_asm_block;
-
- t->set_loc = x_set_loc;
- t->finalize = x_finalize;
- t->destroy = x_destroy;
-
- compiler_defer(c, cgt_cleanup, t);
- return t;
-}
diff --git a/src/arch/x64/alloc.c b/src/arch/x64/alloc.c
@@ -0,0 +1,378 @@
+/* arch/x64/alloc.c — register pool, spill/reload, labels, control flow.
+ *
+ * Covers: xpool_init/alloc/free, x_alloc_reg, x_free_reg, x_frame_slot,
+ * x64_slot_get, x_param, x_clobbers, x_spill_reg, x_reload_reg, x_label_*,
+ * emit_jmp_label, emit_jcc_label, x_jump, x64_force_reg_int, emit_cmp_ab,
+ * x_cmp_branch, x_cmp, x_scope_*, x_break_to, x_continue_to. */
+
+#include <string.h>
+
+#include "arch/arch.h"
+#include "arch/x64.h"
+#include "arch/x64_isa.h"
+#include "core/arena.h"
+#include "core/pool.h"
+#include "obj/obj.h"
+#include "type/type.h"
+
+#include "arch/x64/internal.h"
+
+/* ============================================================
+ * XRegPool implementation. */
+
+void xpool_init(XRegPool* p, const u8* order, u8 nregs, u8 n_cs) {
+ p->order = order;
+ p->nregs = nregs;
+ p->n_cs = n_cs;
+ p->hwm = 0;
+ p->free = (nregs >= 32u) ? 0xFFFFFFFFu : ((1u << nregs) - 1u);
+}
+
+static Reg xpool_alloc(XRegPool* p) {
+ if (p->free == 0) return (Reg)REG_NONE;
+ u32 idx = (u32)__builtin_ctz(p->free);
+ p->free &= ~(1u << idx);
+ if (idx + 1u > p->hwm) p->hwm = idx + 1u;
+ return (Reg)p->order[idx];
+}
+
+static int xpool_free(XRegPool* p, Reg r) {
+ for (u8 i = 0; i < p->nregs; ++i) {
+ if (p->order[i] == (u8)r) {
+ u32 bit = 1u << i;
+ if (p->free & bit) return -1;
+ p->free |= bit;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* ============================================================
+ * Registers / frame */
+
+Reg x_alloc_reg(CGTarget* t, RegClass cls, const Type* ty) {
+ XImpl* a = impl_of(t);
+ (void)ty;
+ if (cls == RC_INT) return xpool_alloc(&a->int_pool);
+ if (cls == RC_FP) return xpool_alloc(&a->fp_pool);
+ compiler_panic(t->c, a->loc, "x64 alloc_reg: class %d unimpl", (int)cls);
+}
+
+void x_free_reg(CGTarget* t, Reg r, RegClass cls) {
+ XImpl* a = impl_of(t);
+ XRegPool* p = (cls == RC_FP) ? &a->fp_pool : &a->int_pool;
+ int rc = xpool_free(p, r);
+ if (rc == 1) return;
+ if (rc == -1) {
+ compiler_panic(t->c, a->loc, "x64 free_reg: reg %u already free",
+ (unsigned)r);
+ }
+ compiler_panic(t->c, a->loc, "x64 free_reg: reg %u not in %s pool",
+ (unsigned)r, cls == RC_FP ? "fp" : "int");
+}
+
+FrameSlot x_frame_slot(CGTarget* t, const FrameSlotDesc* d) {
+ XImpl* a = impl_of(t);
+ if (a->nslots == a->slots_cap) {
+ u32 ncap = a->slots_cap ? a->slots_cap * 2 : 8;
+ XSlot* nbuf = arena_array(t->c->tu, XSlot, ncap);
+ if (a->slots) memcpy(nbuf, a->slots, sizeof(XSlot) * a->nslots);
+ a->slots = nbuf;
+ a->slots_cap = ncap;
+ }
+ u32 size = d->size ? d->size : 8;
+ u32 align = d->align ? d->align : 1;
+ u32 next = a->cum_off + size;
+ u32 mask = align - 1u;
+ next = (next + mask) & ~mask;
+ XSlot* s = &a->slots[a->nslots];
+ s->off = next;
+ s->size = size;
+ s->align = align;
+ s->kind = d->kind;
+ a->cum_off = next;
+ a->nslots++;
+ return (FrameSlot)(a->nslots);
+}
+
+XSlot* x64_slot_get(XImpl* a, FrameSlot fs) {
+ if (fs == FRAME_SLOT_NONE || fs > a->nslots) return NULL;
+ return &a->slots[fs - 1];
+}
+
+/* ---- param: store incoming arg(s) into the home slot ---- */
+void x_param(CGTarget* t, const CGParamDesc* p) {
+ XImpl* a = impl_of(t);
+ XSlot* s = x64_slot_get(a, p->slot);
+ if (!s) compiler_panic(t->c, a->loc, "x64 param: bad slot");
+ const ABIArgInfo* ai = p->abi;
+
+ if (ai->kind == ABI_ARG_IGNORE) return;
+ if (ai->kind == ABI_ARG_INDIRECT) {
+ /* Incoming pointer to byval copy: load pointer, memcpy into slot. */
+ u32 ptr_reg;
+ if (a->next_param_int < 6) {
+ ptr_reg = g_int_arg_regs[a->next_param_int++];
+ } else {
+ u32 caller_off = a->next_param_stack;
+ a->next_param_stack += 8;
+ emit_mov_load(t->mc, 8, 0, X64_R11, X64_RBP, (i32)(16 + caller_off));
+ ptr_reg = X64_R11;
+ }
+ u32 nbytes = s->size;
+ u32 i = 0;
+ while (i + 8 <= nbytes) {
+ emit_mov_load(t->mc, 8, 0, X64_RAX, ptr_reg, (i32)i);
+ emit_mov_store(t->mc, 8, X64_RAX, X64_RBP, -(i32)s->off + (i32)i);
+ i += 8;
+ }
+ while (i + 4 <= nbytes) {
+ emit_mov_load(t->mc, 4, 0, X64_RAX, ptr_reg, (i32)i);
+ emit_mov_store(t->mc, 4, X64_RAX, X64_RBP, -(i32)s->off + (i32)i);
+ i += 4;
+ }
+ while (i + 2 <= nbytes) {
+ emit_mov_load(t->mc, 2, 0, X64_RAX, ptr_reg, (i32)i);
+ emit_mov_store(t->mc, 2, X64_RAX, X64_RBP, -(i32)s->off + (i32)i);
+ i += 2;
+ }
+ while (i < nbytes) {
+ emit_mov_load(t->mc, 1, 0, X64_RAX, ptr_reg, (i32)i);
+ emit_mov_store(t->mc, 1, X64_RAX, X64_RBP, -(i32)s->off + (i32)i);
+ i += 1;
+ }
+ return;
+ }
+ /* DIRECT */
+ for (u16 i = 0; i < ai->nparts; ++i) {
+ const ABIArgPart* pt = &ai->parts[i];
+ u32 part_off = pt->src_offset;
+ u32 sz = pt->size;
+ if (pt->cls == ABI_CLASS_INT) {
+ if (a->next_param_int < 6) {
+ u32 reg = g_int_arg_regs[a->next_param_int++];
+ emit_mov_store(t->mc, sz, reg, X64_RBP,
+ -(i32)s->off + (i32)part_off);
+ } else {
+ u32 caller_off = a->next_param_stack;
+ a->next_param_stack += 8;
+ emit_mov_load(t->mc, sz, 0, X64_RAX, X64_RBP,
+ (i32)(16 + caller_off));
+ emit_mov_store(t->mc, sz, X64_RAX, X64_RBP,
+ -(i32)s->off + (i32)part_off);
+ }
+ } else if (pt->cls == ABI_CLASS_FP) {
+ if (a->next_param_fp < 8) {
+ u32 xmm = a->next_param_fp++;
+ u8 prefix = (sz == 8) ? 0xF2 : 0xF3;
+ emit_sse_store(t->mc, prefix, 0x11, xmm, X64_RBP,
+ -(i32)s->off + (i32)part_off);
+ } else {
+ u32 caller_off = a->next_param_stack;
+ a->next_param_stack += 8;
+ u8 prefix = (sz == 8) ? 0xF2 : 0xF3;
+ emit_sse_load(t->mc, prefix, 0x10, X64_XMM0, X64_RBP,
+ (i32)(16 + caller_off));
+ emit_sse_store(t->mc, prefix, 0x11, X64_XMM0, X64_RBP,
+ -(i32)s->off + (i32)part_off);
+ }
+ } else {
+ compiler_panic(t->c, a->loc, "x64 param: ABI class %d unimpl",
+ (int)pt->cls);
+ }
+ }
+}
+
+const Reg* x_clobbers(CGTarget* t, RegClass c, u32* n) {
+ (void)c;
+ (void)n;
+ x_panic(t, "clobbers");
+}
+void x_spill_reg(CGTarget* t, Operand src, FrameSlot slot,
+ MemAccess ma) {
+ XImpl* a = impl_of(t);
+ if (src.kind != OPK_REG)
+ compiler_panic(t->c, a->loc, "x64 spill_reg: src is not OPK_REG");
+ Operand addr;
+ memset(&addr, 0, sizeof addr);
+ addr.kind = OPK_LOCAL;
+ addr.cls = RC_INT;
+ addr.type = ma.type;
+ addr.v.frame_slot = slot;
+ x_store(t, addr, src, ma);
+ x_free_reg(t, src.v.reg, src.cls);
+}
+
+void x_reload_reg(CGTarget* t, Operand dst, FrameSlot slot,
+ MemAccess ma) {
+ XImpl* a = impl_of(t);
+ if (dst.kind != OPK_REG)
+ compiler_panic(t->c, a->loc, "x64 reload_reg: dst is not OPK_REG");
+ Operand addr;
+ memset(&addr, 0, sizeof addr);
+ addr.kind = OPK_LOCAL;
+ addr.cls = RC_INT;
+ addr.type = ma.type;
+ addr.v.frame_slot = slot;
+ x_load(t, dst, addr, ma);
+}
+
+/* ============================================================
+ * Labels / control flow */
+
+Label x_label_new(CGTarget* t) {
+ return (Label)t->mc->label_new(t->mc);
+}
+void x_label_place(CGTarget* t, Label l) {
+ t->mc->label_place(t->mc, (MCLabel)l);
+}
+
+/* Emit `jmp rel32` (E9 + 4-byte disp) with a label fixup. R_PC32 applied
+ * at the disp32 site with addend=-4 yields target - end_of_insn. */
+void emit_jmp_label(MCEmitter* mc, MCLabel l) {
+ u8 op = 0xE9;
+ mc->emit_bytes(mc, &op, 1);
+ emit_u32le(mc, 0);
+ mc->emit_label_ref(mc, l, R_PC32, 4, -4);
+}
+
+/* Emit `Jcc rel32` (0F 8x + 4-byte disp) with a label fixup. */
+void emit_jcc_label(MCEmitter* mc, u32 cc, MCLabel l) {
+ u8 op[2] = {0x0F, (u8)(0x80 | (cc & 0xF))};
+ mc->emit_bytes(mc, op, 2);
+ emit_u32le(mc, 0);
+ mc->emit_label_ref(mc, l, R_PC32, 4, -4);
+}
+
+void x_jump(CGTarget* t, Label l) { emit_jmp_label(t->mc, (MCLabel)l); }
+
+static u32 cmp_to_cc(CmpOp op) {
+ switch (op) {
+ case CMP_EQ: return X64_CC_E;
+ case CMP_NE: return X64_CC_NE;
+ case CMP_LT_U: return X64_CC_B;
+ case CMP_LE_U: return X64_CC_BE;
+ case CMP_GT_U: return X64_CC_A;
+ case CMP_GE_U: return X64_CC_AE;
+ case CMP_LT_S: return X64_CC_L;
+ case CMP_LE_S: return X64_CC_LE;
+ case CMP_GT_S: return X64_CC_G;
+ case CMP_GE_S: return X64_CC_GE;
+ default: return X64_CC_E;
+ }
+}
+
+u32 x64_force_reg_int(CGTarget* t, Operand op, int w, u32 scratch) {
+ if (op.kind == OPK_REG) return op.v.reg & 0xFu;
+ if (op.kind == OPK_IMM) {
+ x64_emit_load_imm(t->mc, w, scratch, op.v.imm);
+ return scratch;
+ }
+ compiler_panic(t->c, impl_of(t)->loc, "x64: operand kind %d not REG/IMM",
+ (int)op.kind);
+}
+
+static void emit_cmp_ab(CGTarget* t, Operand a_op, Operand b_op) {
+ int w = type_is_64(a_op.type) ? 1 : 0;
+ /* IMM RHS imm8 / imm32 fast paths. CMP is not commutative across the
+ * cond codes, so IMM-on-LHS still has to materialize. */
+ if (b_op.kind == OPK_IMM && a_op.kind == OPK_REG) {
+ if (imm_fits_i8(b_op.v.imm)) {
+ emit_cmp_imm8(t->mc, w, a_op.v.reg & 0xFu, (i8)b_op.v.imm);
+ return;
+ }
+ if (imm_fits_i32(b_op.v.imm)) {
+ emit_alu_imm32(t->mc, w, /*sub=CMP*/ 7u, a_op.v.reg & 0xFu,
+ (i32)b_op.v.imm);
+ return;
+ }
+ }
+ u32 ra = x64_force_reg_int(t, a_op, w, X64_RAX);
+ u32 rb = x64_force_reg_int(t, b_op, w, (ra == X64_R11) ? X64_RAX : X64_R11);
+ /* cmp r/m, r — opcode 0x39 (encoded as `cmp ra, rb` ⇒ flags = ra - rb). */
+ emit_alu_rr(t->mc, w, 0x39, ra, rb);
+}
+
+void x_cmp_branch(CGTarget* t, CmpOp op, Operand a, Operand b,
+ Label l) {
+ emit_cmp_ab(t, a, b);
+ emit_jcc_label(t->mc, cmp_to_cc(op), (MCLabel)l);
+}
+
+void x_cmp(CGTarget* t, CmpOp op, Operand dst, Operand a, Operand b) {
+ emit_cmp_ab(t, a, b);
+ u32 d = dst.v.reg & 0xFu;
+ emit_setcc(t->mc, cmp_to_cc(op), d);
+ emit_movzx_r32_r8(t->mc, d, d);
+}
+
+/* ---- structured scopes ---- */
+CGScope x_scope_begin(CGTarget* t, const CGScopeDesc* d) {
+ XImpl* a = impl_of(t);
+ if (a->nscopes == a->scopes_cap) {
+ u32 ncap = a->scopes_cap ? a->scopes_cap * 2u : 4u;
+ XScope* nb = arena_array(t->c->tu, XScope, ncap);
+ if (a->scopes) memcpy(nb, a->scopes, sizeof(XScope) * a->nscopes);
+ a->scopes = nb;
+ a->scopes_cap = ncap;
+ }
+ XScope* sc = &a->scopes[a->nscopes];
+ sc->kind = (u8)d->kind;
+ sc->has_else = 0;
+ sc->else_label = 0;
+ sc->end_label = 0;
+ sc->break_label = d->break_label;
+ sc->continue_label = d->continue_label;
+
+ if (d->kind == SCOPE_IF) {
+ sc->else_label = t->mc->label_new(t->mc);
+ sc->end_label = t->mc->label_new(t->mc);
+ int w = type_is_64(d->cond.type) ? 1 : 0;
+ u32 rc = x64_force_reg_int(t, d->cond, w, X64_RAX);
+ emit_test_self(t->mc, w, rc);
+ emit_jcc_label(t->mc, X64_CC_E, sc->else_label);
+ } else if (d->kind == SCOPE_LOOP || d->kind == SCOPE_BLOCK) {
+ /* Bookkeeping only. */
+ } else {
+ compiler_panic(t->c, a->loc,
+ "x64 scope_begin: kind %d not yet implemented",
+ (int)d->kind);
+ }
+ a->nscopes++;
+ return (CGScope)a->nscopes;
+}
+
+void x_scope_else(CGTarget* t, CGScope s) {
+ XImpl* a = impl_of(t);
+ if (s == CG_SCOPE_NONE || s > a->nscopes)
+ compiler_panic(t->c, a->loc, "x64 scope_else: bad scope");
+ XScope* sc = &a->scopes[s - 1];
+ emit_jmp_label(t->mc, sc->end_label);
+ t->mc->label_place(t->mc, sc->else_label);
+ sc->has_else = 1;
+}
+
+void x_scope_end(CGTarget* t, CGScope s) {
+ XImpl* a = impl_of(t);
+ if (s == CG_SCOPE_NONE || s > a->nscopes)
+ compiler_panic(t->c, a->loc, "x64 scope_end: bad scope");
+ XScope* sc = &a->scopes[s - 1];
+ if (sc->kind == SCOPE_IF) {
+ if (!sc->has_else) t->mc->label_place(t->mc, sc->else_label);
+ t->mc->label_place(t->mc, sc->end_label);
+ }
+}
+
+void x_break_to(CGTarget* t, CGScope s) {
+ XImpl* a = impl_of(t);
+ if (s == CG_SCOPE_NONE || s > a->nscopes)
+ compiler_panic(t->c, a->loc, "x64 break_to: bad scope");
+ x_jump(t, a->scopes[s - 1].break_label);
+}
+void x_continue_to(CGTarget* t, CGScope s) {
+ XImpl* a = impl_of(t);
+ if (s == CG_SCOPE_NONE || s > a->nscopes)
+ compiler_panic(t->c, a->loc, "x64 continue_to: bad scope");
+ x_jump(t, a->scopes[s - 1].continue_label);
+}
diff --git a/src/arch/x64/emit.c b/src/arch/x64/emit.c
@@ -0,0 +1,647 @@
+/* arch/x64/emit.c — byte-level emit helpers, function prologue/epilogue.
+ *
+ * Covers: REX, ModR/M, SIB, all emit_* primitives, x_func_begin,
+ * x_func_end, and the shared constant tables (g_int_order, g_fp_order,
+ * g_int_arg_regs). */
+
+#include <string.h>
+
+#include "arch/arch.h"
+#include "arch/x64.h"
+#include "arch/x64_isa.h"
+#include "core/arena.h"
+#include "core/pool.h"
+#include "obj/obj.h"
+#include "type/type.h"
+
+#include "arch/x64/internal.h"
+
+/* ============================================================
+ * Shared constant tables. */
+
+const u8 g_int_order[6] = {
+ X64_RBX, X64_R12, X64_R13, X64_R14, X64_R15, /* callee-saved (n_cs=5) */
+ X64_R10, /* caller-saved tail */
+};
+
+const u8 g_fp_order[10] = {
+ /* All xmm regs are caller-saved on SysV; preference order is xmm6
+ * upward to keep the low arg/return regs (xmm0..5) clear for calls. */
+ X64_XMM6, X64_XMM7, X64_XMM8, X64_XMM0 + 9, X64_XMM0 + 10,
+ X64_XMM0 + 11, X64_XMM0 + 12, X64_XMM0 + 13, X64_XMM0 + 14, X64_XMM15,
+};
+
+const u32 g_int_arg_regs[6] = {X64_RDI, X64_RSI, X64_RDX,
+ X64_RCX, X64_R8, X64_R9};
+
+/* ============================================================
+ * Byte-level emit helpers.
+ *
+ * x64 instructions are variable length: optional legacy prefix(es),
+ * optional REX, 1-3 byte opcode, ModR/M, optional SIB, optional
+ * displacement, optional immediate. Helpers below build sequences
+ * into the active MCEmitter section, recording one Debug row per
+ * instruction-start. */
+static void emit1(MCEmitter* mc, u8 b) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ mc->emit_bytes(mc, &b, 1);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+void emit_u32le(MCEmitter* mc, u32 v) {
+ u8 b[4];
+ b[0] = (u8)v;
+ b[1] = (u8)(v >> 8);
+ b[2] = (u8)(v >> 16);
+ b[3] = (u8)(v >> 24);
+ mc->emit_bytes(mc, b, 4);
+}
+static void emit_u64le(MCEmitter* mc, u64 v) {
+ u8 b[8];
+ for (int i = 0; i < 8; ++i) b[i] = (u8)(v >> (i * 8));
+ mc->emit_bytes(mc, b, 8);
+}
+
+static u8 make_rex(int w, u32 reg, u32 index, u32 rm) {
+ u8 r = 0;
+ if (w) r |= X64_REX_W;
+ if (reg & 8) r |= X64_REX_R;
+ if (index & 8) r |= X64_REX_X;
+ if (rm & 8) r |= X64_REX_B;
+ return r ? (u8)(X64_REX_BASE | r) : 0;
+}
+void emit_rex(MCEmitter* mc, int w, u32 reg, u32 index, u32 rm) {
+ u8 r = make_rex(w, reg, index, rm);
+ if (r) mc->emit_bytes(mc, &r, 1);
+}
+/* Force REX (even REX=0x40) — required for byte-reg encodings that
+ * promote SIL/DIL/etc. */
+void emit_rex_force(MCEmitter* mc, int w, u32 reg, u32 index, u32 rm) {
+ u8 r = (u8)(X64_REX_BASE | (w ? X64_REX_W : 0) | ((reg & 8) ? X64_REX_R : 0) |
+ ((index & 8) ? X64_REX_X : 0) | ((rm & 8) ? X64_REX_B : 0));
+ mc->emit_bytes(mc, &r, 1);
+}
+
+u8 modrm(u32 mod, u32 reg, u32 rm) {
+ return (u8)(((mod & 3u) << 6) | ((reg & 7u) << 3) | (rm & 7u));
+}
+u8 sib(u32 scale, u32 index, u32 base) {
+ return (u8)(((scale & 3u) << 6) | ((index & 7u) << 3) | (base & 7u));
+}
+
+static u32 disp_mod(u32 base, i32 disp) {
+ if (disp == 0 && (base & 7u) != 5u) return 0u; /* [base] */
+ if (disp >= -128 && disp <= 127) return 1u; /* [base + disp8] */
+ return 2u; /* [base + disp32] */
+}
+
+void emit_mem_operand(MCEmitter* mc, u32 reg, u32 base, i32 disp) {
+ u32 m = disp_mod(base, disp);
+ if ((base & 7u) == 4u) {
+ /* SIB byte required: index=4 (none), base=base. */
+ u8 mr = modrm(m, reg, 4u);
+ mc->emit_bytes(mc, &mr, 1);
+ u8 s = sib(0, 4u, base);
+ mc->emit_bytes(mc, &s, 1);
+ } else {
+ u8 mr = modrm(m, reg, base);
+ mc->emit_bytes(mc, &mr, 1);
+ }
+ if (m == 1u) {
+ u8 d = (u8)(i8)disp;
+ mc->emit_bytes(mc, &d, 1);
+ } else if (m == 2u) {
+ emit_u32le(mc, (u32)disp);
+ }
+}
+void emit_rm_reg(MCEmitter* mc, u32 reg, u32 rm) {
+ u8 mr = modrm(3u, reg, rm);
+ mc->emit_bytes(mc, &mr, 1);
+}
+
+/* ---- specific instruction emitters ---- */
+
+/* mov rd, rs (64-bit if w, else 32-bit). */
+void emit_mov_rr(MCEmitter* mc, int w, u32 dst, u32 src) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex(mc, w, src, 0, dst);
+ u8 op = 0x89; /* MOV r/m, r */
+ mc->emit_bytes(mc, &op, 1);
+ emit_rm_reg(mc, src, dst);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* mov reg, [base + disp]; size 1/2/4/8. */
+void emit_mov_load(MCEmitter* mc, u32 size, int signed_ext, u32 dst,
+ u32 base, i32 disp) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ if (size == 8) {
+ emit_rex(mc, 1, dst, 0, base);
+ u8 op = 0x8B;
+ mc->emit_bytes(mc, &op, 1);
+ emit_mem_operand(mc, dst, base, disp);
+ } else if (size == 4) {
+ emit_rex(mc, 0, dst, 0, base);
+ u8 op = 0x8B;
+ mc->emit_bytes(mc, &op, 1);
+ emit_mem_operand(mc, dst, base, disp);
+ } else if (size == 2) {
+ emit_rex(mc, 0, dst, 0, base);
+ u8 op[2] = {0x0F, signed_ext ? 0xBF : 0xB7};
+ mc->emit_bytes(mc, op, 2);
+ emit_mem_operand(mc, dst, base, disp);
+ } else if (size == 1) {
+ emit_rex(mc, 0, dst, 0, base);
+ u8 op[2] = {0x0F, signed_ext ? 0xBE : 0xB6};
+ mc->emit_bytes(mc, op, 2);
+ emit_mem_operand(mc, dst, base, disp);
+ }
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* mov [base + disp], src; size 1/2/4/8. */
+void emit_mov_store(MCEmitter* mc, u32 size, u32 src, u32 base, i32 disp) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ if (size == 8) {
+ emit_rex(mc, 1, src, 0, base);
+ u8 op = 0x89;
+ mc->emit_bytes(mc, &op, 1);
+ emit_mem_operand(mc, src, base, disp);
+ } else if (size == 4) {
+ emit_rex(mc, 0, src, 0, base);
+ u8 op = 0x89;
+ mc->emit_bytes(mc, &op, 1);
+ emit_mem_operand(mc, src, base, disp);
+ } else if (size == 2) {
+ u8 p = 0x66;
+ mc->emit_bytes(mc, &p, 1);
+ emit_rex(mc, 0, src, 0, base);
+ u8 op = 0x89;
+ mc->emit_bytes(mc, &op, 1);
+ emit_mem_operand(mc, src, base, disp);
+ } else if (size == 1) {
+ /* Force REX so SIL/DIL/etc are addressable as byte regs. */
+ emit_rex_force(mc, 0, src, 0, base);
+ u8 op = 0x88;
+ mc->emit_bytes(mc, &op, 1);
+ emit_mem_operand(mc, src, base, disp);
+ }
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+void emit_lea(MCEmitter* mc, u32 dst, u32 base, i32 disp) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex(mc, 1, dst, 0, base);
+ u8 op = 0x8D;
+ mc->emit_bytes(mc, &op, 1);
+ emit_mem_operand(mc, dst, base, disp);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* movabs reg, imm64 (REX.W + B8+r imm64) for is64; mov r32, imm32 (B8+r
+ * imm32) for !is64. Both 10/5 bytes. */
+void x64_emit_load_imm(MCEmitter* mc, int is64, u32 dst, i64 imm) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ if (is64) {
+ emit_rex(mc, 1, 0, 0, dst);
+ u8 op = (u8)(0xB8 | (dst & 7));
+ mc->emit_bytes(mc, &op, 1);
+ emit_u64le(mc, (u64)imm);
+ } else {
+ emit_rex(mc, 0, 0, 0, dst);
+ u8 op = (u8)(0xB8 | (dst & 7));
+ mc->emit_bytes(mc, &op, 1);
+ emit_u32le(mc, (u32)imm);
+ }
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* Two-operand ALU r/m, r. op picks ADD(01)/SUB(29)/AND(21)/OR(09)/XOR(31)/
+ * CMP(39)/MOV(89)/TEST(85). */
+void emit_alu_rr(MCEmitter* mc, int w, u8 op, u32 dst, u32 src) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex(mc, w, src, 0, dst);
+ mc->emit_bytes(mc, &op, 1);
+ emit_rm_reg(mc, src, dst);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+void emit_imul_rr(MCEmitter* mc, int w, u32 dst, u32 src) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex(mc, w, dst, 0, src);
+ u8 op[2] = {0x0F, 0xAF};
+ mc->emit_bytes(mc, op, 2);
+ emit_rm_reg(mc, dst, src);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+void emit_f7_rm(MCEmitter* mc, int w, u32 sub, u32 reg) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex(mc, w, 0, 0, reg);
+ u8 op = 0xF7;
+ mc->emit_bytes(mc, &op, 1);
+ emit_rm_reg(mc, sub, reg);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+void emit_shift_cl(MCEmitter* mc, int w, u32 sub, u32 reg) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex(mc, w, 0, 0, reg);
+ u8 op = 0xD3;
+ mc->emit_bytes(mc, &op, 1);
+ emit_rm_reg(mc, sub, reg);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* Shift r/m by imm8: opcode C1 /sub ib. sub: SHL=4, SHR=5, SAR=7. */
+void emit_shift_imm(MCEmitter* mc, int w, u32 sub, u32 reg, u8 imm) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex(mc, w, 0, 0, reg);
+ u8 buf[3];
+ buf[0] = 0xC1;
+ buf[1] = modrm(3u, sub, reg);
+ buf[2] = imm;
+ mc->emit_bytes(mc, buf, 3);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+void emit_cqo_or_cdq(MCEmitter* mc, int w) {
+ if (w) {
+ u8 buf[2] = {X64_REX_BASE | X64_REX_W, 0x99};
+ mc->emit_bytes(mc, buf, 2);
+ } else {
+ u8 op = 0x99;
+ mc->emit_bytes(mc, &op, 1);
+ }
+}
+
+void emit_xor_self(MCEmitter* mc, int w, u32 r) {
+ emit_alu_rr(mc, w, 0x31, r, r);
+}
+
+/* cmp r/m, imm8 (0x83 /7). */
+void emit_cmp_imm8(MCEmitter* mc, int w, u32 reg, i8 imm) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex(mc, w, 0, 0, reg);
+ u8 buf[3];
+ buf[0] = 0x83;
+ buf[1] = modrm(3u, 7u, reg);
+ buf[2] = (u8)imm;
+ mc->emit_bytes(mc, buf, 3);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* ALU r/m, imm8: opcode 0x83 /sub ib (sign-extended). sub: ADD=0,
+ * OR=1, ADC=2, SBB=3, AND=4, SUB=5, XOR=6, CMP=7. */
+void emit_alu_imm8(MCEmitter* mc, int w, u32 sub, u32 reg, i8 imm) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex(mc, w, 0, 0, reg);
+ u8 buf[3];
+ buf[0] = 0x83;
+ buf[1] = modrm(3u, sub, reg);
+ buf[2] = (u8)imm;
+ mc->emit_bytes(mc, buf, 3);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* ALU r/m, imm32: opcode 0x81 /sub id (sign-extended for w=1). */
+void emit_alu_imm32(MCEmitter* mc, int w, u32 sub, u32 reg, i32 imm) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex(mc, w, 0, 0, reg);
+ u8 buf[6];
+ buf[0] = 0x81;
+ buf[1] = modrm(3u, sub, reg);
+ buf[2] = (u8)(imm & 0xFF);
+ buf[3] = (u8)((imm >> 8) & 0xFF);
+ buf[4] = (u8)((imm >> 16) & 0xFF);
+ buf[5] = (u8)((imm >> 24) & 0xFF);
+ mc->emit_bytes(mc, buf, 6);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* IMUL r, r/m, imm: 0x6B /r ib (imm8 sext) or 0x69 /r id (imm32 sext).
+ * Both forms write the result back to the same `dst` register so the
+ * caller doesn't need an explicit copy beforehand — unlike the ALU
+ * forms which read-modify-write a single operand. */
+void emit_imul_imm8(MCEmitter* mc, int w, u32 dst, u32 src, i8 imm) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex(mc, w, dst, 0, src);
+ u8 buf[3];
+ buf[0] = 0x6B;
+ buf[1] = modrm(3u, dst, src);
+ buf[2] = (u8)imm;
+ mc->emit_bytes(mc, buf, 3);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+void emit_imul_imm32(MCEmitter* mc, int w, u32 dst, u32 src, i32 imm) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex(mc, w, dst, 0, src);
+ u8 buf[6];
+ buf[0] = 0x69;
+ buf[1] = modrm(3u, dst, src);
+ buf[2] = (u8)(imm & 0xFF);
+ buf[3] = (u8)((imm >> 8) & 0xFF);
+ buf[4] = (u8)((imm >> 16) & 0xFF);
+ buf[5] = (u8)((imm >> 24) & 0xFF);
+ mc->emit_bytes(mc, buf, 6);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* Width predicate: does `imm` fit in an i8 (used by the 0x83/0x6B
+ * imm8-sign-extended forms)? */
+int imm_fits_i8(i64 imm) { return imm >= -128 && imm <= 127; }
+/* Width predicate: does `imm` fit in a signed 32-bit value (the 0x81/
+ * 0x69 imm32-sign-extended forms; for w=1 the imm is sign-extended to
+ * 64). Returns 0 for values outside [INT32_MIN, INT32_MAX] — those
+ * require a full materialization through x64_emit_load_imm. */
+int imm_fits_i32(i64 imm) {
+ return imm >= -2147483648LL && imm <= 2147483647LL;
+}
+
+void emit_test_self(MCEmitter* mc, int w, u32 reg) {
+ emit_alu_rr(mc, w, 0x85, reg, reg);
+}
+
+void emit_setcc(MCEmitter* mc, u32 cc, u32 reg) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex_force(mc, 0, 0, 0, reg);
+ u8 op[2] = {0x0F, (u8)(0x90 | (cc & 0xF))};
+ mc->emit_bytes(mc, op, 2);
+ emit_rm_reg(mc, 0u, reg);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+void emit_movzx_r32_r8(MCEmitter* mc, u32 dst, u32 src) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex_force(mc, 0, dst, 0, src);
+ u8 op[2] = {0x0F, 0xB6};
+ mc->emit_bytes(mc, op, 2);
+ emit_rm_reg(mc, dst, src);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* movzx/movsx r→r. src_size is source byte width. */
+void emit_extend_rr(MCEmitter* mc, int w, int signed_ext, u32 src_size,
+ u32 dst, u32 src) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ if (src_size == 4 && signed_ext) {
+ /* movsxd r64, r32: REX.W 0x63 ModRM */
+ emit_rex(mc, 1, dst, 0, src);
+ u8 op = 0x63;
+ mc->emit_bytes(mc, &op, 1);
+ emit_rm_reg(mc, dst, src);
+ } else if (src_size == 4 && !signed_ext) {
+ /* zext 32→64 is `mov r32, r32` (clears high 32). */
+ emit_rex(mc, 0, src, 0, dst);
+ u8 op = 0x89;
+ mc->emit_bytes(mc, &op, 1);
+ emit_rm_reg(mc, src, dst);
+ } else if (src_size == 1) {
+ emit_rex_force(mc, w, dst, 0, src);
+ u8 op[2] = {0x0F, signed_ext ? 0xBE : 0xB6};
+ mc->emit_bytes(mc, op, 2);
+ emit_rm_reg(mc, dst, src);
+ } else if (src_size == 2) {
+ emit_rex(mc, w, dst, 0, src);
+ u8 op[2] = {0x0F, signed_ext ? 0xBF : 0xB7};
+ mc->emit_bytes(mc, op, 2);
+ emit_rm_reg(mc, dst, src);
+ }
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+static void emit_ret(MCEmitter* mc) {
+ u8 op = 0xC3;
+ mc->emit_bytes(mc, &op, 1);
+}
+static void emit_leave(MCEmitter* mc) {
+ u8 op = 0xC9;
+ mc->emit_bytes(mc, &op, 1);
+}
+
+/* ---- SSE scalar FP encoders ---- */
+void emit_sse_rr(MCEmitter* mc, u8 prefix, u8 opcode, u32 dst, u32 src) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ if (prefix) mc->emit_bytes(mc, &prefix, 1);
+ emit_rex(mc, 0, dst, 0, src);
+ u8 op[2] = {0x0F, opcode};
+ mc->emit_bytes(mc, op, 2);
+ emit_rm_reg(mc, dst, src);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+void emit_sse_load(MCEmitter* mc, u8 prefix, u8 opcode, u32 dst,
+ u32 base, i32 disp) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ if (prefix) mc->emit_bytes(mc, &prefix, 1);
+ emit_rex(mc, 0, dst, 0, base);
+ u8 op[2] = {0x0F, opcode};
+ mc->emit_bytes(mc, op, 2);
+ emit_mem_operand(mc, dst, base, disp);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+void emit_sse_store(MCEmitter* mc, u8 prefix, u8 opcode, u32 src,
+ u32 base, i32 disp) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ if (prefix) mc->emit_bytes(mc, &prefix, 1);
+ emit_rex(mc, 0, src, 0, base);
+ u8 op[2] = {0x0F, opcode};
+ mc->emit_bytes(mc, op, 2);
+ emit_mem_operand(mc, src, base, disp);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+void emit_sse_rr_w(MCEmitter* mc, u8 prefix, u8 opcode, int w, u32 dst,
+ u32 src) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ if (prefix) mc->emit_bytes(mc, &prefix, 1);
+ emit_rex(mc, w, dst, 0, src);
+ u8 op[2] = {0x0F, opcode};
+ mc->emit_bytes(mc, op, 2);
+ emit_rm_reg(mc, dst, src);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* ============================================================
+ * Function lifecycle */
+
+void x_func_begin(CGTarget* t, const CGFuncDesc* fd) {
+ XImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+
+ mc->set_section(mc, fd->text_section_id);
+ mc->emit_align(mc, 16, 0x90);
+
+ a->fd = fd;
+ a->func_start = mc->pos(mc);
+ a->next_param_int = 0;
+ a->next_param_fp = 0;
+ a->next_param_stack = 0;
+ a->has_sret = (fd->abi && fd->abi->has_sret) ? 1 : 0;
+ a->has_alloca = 0;
+ a->is_variadic = (fd->abi && fd->abi->variadic) ? 1 : 0;
+ a->cum_off = 0;
+ a->max_outgoing = 0;
+ xpool_init(&a->int_pool, g_int_order, 6u, 5u);
+ xpool_init(&a->fp_pool, g_fp_order, 10u, 0u);
+ a->nslots = 0;
+ a->nscopes = 0;
+ a->nalloca_patches = 0;
+ a->sret_ptr_slot = FRAME_SLOT_NONE;
+ a->reg_save_slot = FRAME_SLOT_NONE;
+ a->epilogue_label = mc->label_new(mc);
+
+ mc->cfi_startproc(mc);
+
+ /* Reserve a fixed-size prologue placeholder filled with NOPs. */
+ a->prologue_pos = mc->pos(mc);
+ for (u32 i = 0; i < X64_PROLOGUE_BYTES; ++i) emit1(mc, 0x90);
+
+ /* sret: rdi at entry holds the destination pointer. Spill it to a
+ * hidden slot so the body can use rdi freely. */
+ if (a->has_sret) {
+ FrameSlotDesc fsd = {
+ .type = NULL, .name = 0, .loc = {0, 0, 0},
+ .size = 8, .align = 8, .kind = FS_SPILL, .flags = 0,
+ };
+ a->sret_ptr_slot = x_frame_slot(t, &fsd);
+ /* Subsequent int args start at rsi (next_param_int = 1). */
+ a->next_param_int = 1;
+ }
+
+ /* Variadic: reserve the SysV reg-save area (rdi..r9 at +0..+40, then
+ * xmm0..xmm7 at +48..+160 with 16-byte stride) and emit the saves
+ * directly after the prologue placeholder so the original register
+ * args are preserved before x_param() spills the named ones. */
+ if (a->is_variadic) {
+ FrameSlotDesc rsd = {
+ .type = NULL, .name = 0, .loc = {0, 0, 0},
+ .size = 176, .align = 8, .kind = FS_SPILL, .flags = 0,
+ };
+ a->reg_save_slot = x_frame_slot(t, &rsd);
+ XSlot* rs = x64_slot_get(a, a->reg_save_slot);
+ static const u32 gprs[6] = {X64_RDI, X64_RSI, X64_RDX,
+ X64_RCX, X64_R8, X64_R9};
+ for (u32 i = 0; i < 6; ++i) {
+ emit_mov_store(mc, 8, gprs[i], X64_RBP,
+ -(i32)rs->off + (i32)(i * 8u));
+ }
+ /* movsd writes the low 8 bytes of each xmm; va_arg reads 8 bytes per
+ * FP slot, so the upper half of the 16-byte stride stays unused. */
+ for (u32 i = 0; i < 8; ++i) {
+ emit_sse_store(mc, 0xF2, 0x11, (u32)(X64_XMM0 + i), X64_RBP,
+ -(i32)rs->off + (i32)(48u + i * 16u));
+ }
+ }
+}
+
+static u32 align_up_u32(u32 v, u32 a) { return (v + (a - 1u)) & ~(a - 1u); }
+
+void x_func_end(CGTarget* t) {
+ XImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+
+ u32 cs_used = a->int_pool.hwm;
+ if (cs_used > a->int_pool.n_cs) cs_used = a->int_pool.n_cs;
+ u32 cs_size = cs_used * 8u;
+
+ /* Stack alignment: SysV requires rsp ≡ 0 mod 16 just before a call,
+ * which means rsp ≡ 8 mod 16 inside the function (after the return
+ * address is pushed). On entry, rsp ≡ 8 mod 16; after `push rbp` it
+ * is 0 mod 16; after `sub rsp, frame_size` we need it back to 0
+ * mod 16, so frame_size must be a multiple of 16. */
+ u32 raw = a->max_outgoing + cs_size + a->cum_off;
+ u32 frame_size = align_up_u32(raw, 16u);
+ if (frame_size == 0) frame_size = 16;
+
+ mc->label_place(mc, a->epilogue_label);
+
+ /* Restore callee-saves. Each at rbp - (cum_off + (i+1)*8). */
+ for (i32 i = (i32)cs_used - 1; i >= 0; --i) {
+ u32 reg = a->int_pool.order[i];
+ i32 off = -(i32)a->cum_off - (i32)(i + 1) * 8;
+ emit_mov_load(mc, /*size=*/8, /*signed=*/0, reg, X64_RBP, off);
+ }
+
+ /* leave; ret. */
+ emit_leave(mc);
+ emit_ret(mc);
+
+ /* Patch prologue placeholder. */
+ u8 buf[X64_PROLOGUE_BYTES];
+ for (u32 i = 0; i < X64_PROLOGUE_BYTES; ++i) buf[i] = 0x90;
+ u32 wi = 0;
+
+ /* push rbp (1 byte). */
+ buf[wi++] = 0x55;
+ /* mov rbp, rsp: REX.W 89 E5. */
+ buf[wi++] = X64_REX_BASE | X64_REX_W;
+ buf[wi++] = 0x89;
+ buf[wi++] = modrm(3u, X64_RSP, X64_RBP);
+ /* sub rsp, frame_size: REX.W 81 /5 imm32 = 7 bytes. */
+ buf[wi++] = X64_REX_BASE | X64_REX_W;
+ buf[wi++] = 0x81;
+ buf[wi++] = modrm(3u, 5u, X64_RSP);
+ buf[wi++] = (u8)frame_size;
+ buf[wi++] = (u8)(frame_size >> 8);
+ buf[wi++] = (u8)(frame_size >> 16);
+ buf[wi++] = (u8)(frame_size >> 24);
+
+ /* sret: mov [rbp + disp32], rdi. */
+ if (a->has_sret && a->sret_ptr_slot != FRAME_SLOT_NONE) {
+ XSlot* s = x64_slot_get(a, a->sret_ptr_slot);
+ if (s) {
+ i32 off = -(i32)s->off;
+ if (wi + 7 > X64_PROLOGUE_BYTES) goto overflow;
+ buf[wi++] = X64_REX_BASE | X64_REX_W;
+ buf[wi++] = 0x89;
+ buf[wi++] = modrm(2u, X64_RDI, X64_RBP);
+ buf[wi++] = (u8)off;
+ buf[wi++] = (u8)(off >> 8);
+ buf[wi++] = (u8)(off >> 16);
+ buf[wi++] = (u8)(off >> 24);
+ }
+ }
+
+ /* Spill callee-saves. */
+ for (u32 i = 0; i < cs_used; ++i) {
+ u32 reg = a->int_pool.order[i];
+ i32 off = -(i32)a->cum_off - (i32)(i + 1) * 8;
+ if (wi + 7 > X64_PROLOGUE_BYTES) goto overflow;
+ buf[wi++] = (u8)(X64_REX_BASE | X64_REX_W | ((reg & 8) ? X64_REX_R : 0));
+ buf[wi++] = 0x89;
+ buf[wi++] = modrm(2u, (reg & 7u), X64_RBP);
+ buf[wi++] = (u8)off;
+ buf[wi++] = (u8)(off >> 8);
+ buf[wi++] = (u8)(off >> 16);
+ buf[wi++] = (u8)(off >> 24);
+ }
+
+ if (0) {
+ overflow:
+ compiler_panic(t->c, a->loc,
+ "x64: prologue placeholder overflow (%u of %u bytes)", wi,
+ X64_PROLOGUE_BYTES);
+ }
+ obj_patch(t->obj, a->fd->text_section_id, a->prologue_pos, buf,
+ X64_PROLOGUE_BYTES);
+
+ /* Patch each alloca's `lea dst, [rsp + 0]` disp32 with the final
+ * max_outgoing (already 16-aligned via the `(stack_off+15)&~15` round
+ * at every call site). */
+ for (u32 i = 0; i < a->nalloca_patches; ++i) {
+ u8 dbuf[4];
+ u32 m = a->max_outgoing;
+ dbuf[0] = (u8)m;
+ dbuf[1] = (u8)(m >> 8);
+ dbuf[2] = (u8)(m >> 16);
+ dbuf[3] = (u8)(m >> 24);
+ obj_patch(t->obj, a->fd->text_section_id,
+ a->alloca_patches[i].disp_pos, dbuf, 4);
+ }
+
+ /* Define the function symbol. */
+ u32 end = mc->pos(mc);
+ obj_symbol_define(t->obj, a->fd->sym, a->fd->text_section_id,
+ (u64)a->func_start, (u64)(end - a->func_start));
+
+ mc->cfi_endproc(mc);
+ a->fd = NULL;
+}
diff --git a/src/arch/x64/internal.h b/src/arch/x64/internal.h
@@ -0,0 +1,257 @@
+/* arch/x64/internal.h — private header shared by emit.c, alloc.c, ops.c.
+ *
+ * Contains:
+ * - XRegPool, XSlot, XScope, XAllocaPatch, XImpl struct definitions
+ * - impl_of() accessor
+ * - Small type helpers (static inline)
+ * - Forward declarations of cross-file functions
+ *
+ * NOT included by external consumers; use arch/x64.h for the public API. */
+
+#pragma once
+
+#include <string.h>
+
+#include "arch/arch.h"
+#include "arch/x64.h"
+#include "arch/x64_isa.h"
+#include "core/arena.h"
+#include "core/pool.h"
+#include "obj/obj.h"
+#include "type/type.h"
+
+#define X64_PROLOGUE_BYTES 96u
+
+/* ============================================================
+ * Custom register pool. */
+
+typedef struct XRegPool {
+ u32 free; /* bit i set ⇔ alloc_order[i] is free */
+ u32 hwm; /* highest index+1 ever allocated */
+ const u8* order; /* alloc_order; first n_cs are callee-saved */
+ u8 nregs;
+ u8 n_cs;
+ u8 pad[2];
+} XRegPool;
+
+/* ============================================================
+ * XImpl and friends. */
+
+typedef struct XSlot {
+ u32 off; /* bytes below rbp (positive); address = rbp - off */
+ u32 size;
+ u32 align;
+ u8 kind;
+ u8 pad[3];
+} XSlot;
+
+typedef struct XScope {
+ u8 kind;
+ u8 has_else;
+ u8 pad[2];
+ MCLabel else_label;
+ MCLabel end_label;
+ Label break_label;
+ Label continue_label;
+} XScope;
+
+/* alloca emits a placeholder `lea dst, [rsp + 0]` whose disp32 is patched
+ * at func_end with the final max_outgoing value. disp_pos records the
+ * byte offset of that disp32 in the active text section. */
+typedef struct XAllocaPatch {
+ u32 disp_pos;
+} XAllocaPatch;
+
+typedef struct XImpl {
+ CGTarget base;
+ SrcLoc loc;
+ const CGFuncDesc* fd;
+
+ u32 func_start;
+ u32 prologue_pos;
+ MCLabel epilogue_label;
+
+ XSlot* slots;
+ u32 nslots;
+ u32 slots_cap;
+ u32 cum_off;
+ u32 max_outgoing;
+
+ u32 next_param_int;
+ u32 next_param_fp;
+ u32 next_param_stack;
+ u8 has_sret;
+ u8 has_alloca;
+ u8 is_variadic;
+ u8 pad0;
+ FrameSlot sret_ptr_slot;
+ FrameSlot reg_save_slot; /* variadic: 176-byte __va_list_tag reg save area */
+
+ XRegPool int_pool;
+ XRegPool fp_pool;
+
+ XScope* scopes;
+ u32 nscopes;
+ u32 scopes_cap;
+
+ XAllocaPatch* alloca_patches;
+ u32 nalloca_patches;
+ u32 alloca_patches_cap;
+} XImpl;
+
+static inline XImpl* impl_of(CGTarget* t) { return (XImpl*)t; }
+
+extern void debug_emit_row(Debug*, ObjSecId text_section, u32 offset, SrcLoc);
+
+/* ============================================================
+ * Type helpers (static inline — used in all three translation units). */
+
+static inline int type_is_64(const Type* t) {
+ if (!t) return 0;
+ switch (t->kind) {
+ case TY_LONG:
+ case TY_ULONG:
+ case TY_LLONG:
+ case TY_ULLONG:
+ case TY_PTR:
+ case TY_DOUBLE:
+ return 1;
+ default:
+ return 0;
+ }
+}
+static inline int type_is_fp_double(const Type* t) {
+ return t && (t->kind == TY_DOUBLE || t->kind == TY_LDOUBLE);
+}
+static inline u32 type_byte_size(const Type* t) {
+ if (!t) return 4;
+ switch (t->kind) {
+ case TY_CHAR:
+ case TY_SCHAR:
+ case TY_UCHAR:
+ case TY_BOOL:
+ return 1;
+ case TY_SHORT:
+ case TY_USHORT:
+ return 2;
+ case TY_INT:
+ case TY_UINT:
+ case TY_FLOAT:
+ return 4;
+ case TY_LONG:
+ case TY_ULONG:
+ case TY_LLONG:
+ case TY_ULLONG:
+ case TY_PTR:
+ case TY_DOUBLE:
+ return 8;
+ default:
+ return 8;
+ }
+}
+static inline int type_is_signed(const Type* t) {
+ if (!t) return 0;
+ switch (t->kind) {
+ case TY_CHAR:
+ case TY_SCHAR:
+ case TY_SHORT:
+ case TY_INT:
+ case TY_LONG:
+ case TY_LLONG:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static inline _Noreturn void x_panic(CGTarget* t, const char* what) {
+ SrcLoc loc = impl_of(t)->loc;
+ compiler_panic(t->c, loc, "x64: %s not implemented", what);
+}
+
+/* ============================================================
+ * Shared constant tables (defined in alloc.c, used in emit.c and ops.c). */
+
+extern const u8 g_int_order[6];
+extern const u8 g_fp_order[10];
+extern const u32 g_int_arg_regs[6];
+
+/* ============================================================
+ * Cross-file function declarations.
+ *
+ * Functions that are defined in one translation unit but called from
+ * another cannot remain static; they are declared here. */
+
+/* --- emit.c exports (lifecycle used by ops.c vtable constructor,
+ * encoding helpers used by alloc.c and ops.c) --- */
+void x_func_begin(CGTarget* t, const CGFuncDesc* fd);
+void x_func_end(CGTarget* t);
+
+/* encoding helpers */
+void emit_u32le(MCEmitter* mc, u32 v);
+void emit_rex(MCEmitter* mc, int w, u32 reg, u32 index, u32 rm);
+void emit_rex_force(MCEmitter* mc, int w, u32 reg, u32 index, u32 rm);
+u8 modrm(u32 mod, u32 reg, u32 rm);
+u8 sib(u32 scale, u32 index, u32 base);
+void emit_mem_operand(MCEmitter* mc, u32 reg, u32 base, i32 disp);
+void emit_rm_reg(MCEmitter* mc, u32 reg, u32 rm);
+void emit_mov_rr(MCEmitter* mc, int w, u32 dst, u32 src);
+void emit_mov_load(MCEmitter* mc, u32 size, int signed_ext, u32 dst, u32 base,
+ i32 disp);
+void emit_mov_store(MCEmitter* mc, u32 size, u32 src, u32 base, i32 disp);
+void emit_lea(MCEmitter* mc, u32 dst, u32 base, i32 disp);
+void x64_emit_load_imm(MCEmitter* mc, int is64, u32 dst, i64 imm);
+void emit_alu_rr(MCEmitter* mc, int w, u8 op, u32 dst, u32 src);
+void emit_imul_rr(MCEmitter* mc, int w, u32 dst, u32 src);
+void emit_f7_rm(MCEmitter* mc, int w, u32 sub, u32 reg);
+void emit_shift_cl(MCEmitter* mc, int w, u32 sub, u32 reg);
+void emit_shift_imm(MCEmitter* mc, int w, u32 sub, u32 reg, u8 imm);
+void emit_cqo_or_cdq(MCEmitter* mc, int w);
+void emit_xor_self(MCEmitter* mc, int w, u32 r);
+void emit_cmp_imm8(MCEmitter* mc, int w, u32 reg, i8 imm);
+void emit_alu_imm8(MCEmitter* mc, int w, u32 sub, u32 reg, i8 imm);
+void emit_alu_imm32(MCEmitter* mc, int w, u32 sub, u32 reg, i32 imm);
+void emit_imul_imm8(MCEmitter* mc, int w, u32 dst, u32 src, i8 imm);
+void emit_imul_imm32(MCEmitter* mc, int w, u32 dst, u32 src, i32 imm);
+int imm_fits_i8(i64 imm);
+int imm_fits_i32(i64 imm);
+void emit_test_self(MCEmitter* mc, int w, u32 reg);
+void emit_setcc(MCEmitter* mc, u32 cc, u32 reg);
+void emit_movzx_r32_r8(MCEmitter* mc, u32 dst, u32 src);
+void emit_extend_rr(MCEmitter* mc, int w, int signed_ext, u32 src_size,
+ u32 dst, u32 src);
+void emit_sse_rr(MCEmitter* mc, u8 prefix, u8 opcode, u32 dst, u32 src);
+void emit_sse_load(MCEmitter* mc, u8 prefix, u8 opcode, u32 dst, u32 base,
+ i32 disp);
+void emit_sse_store(MCEmitter* mc, u8 prefix, u8 opcode, u32 src, u32 base,
+ i32 disp);
+void emit_sse_rr_w(MCEmitter* mc, u8 prefix, u8 opcode, int w, u32 dst,
+ u32 src);
+
+/* --- alloc.c exports (used by emit.c and/or ops.c) --- */
+void xpool_init(XRegPool* p, const u8* order, u8 nregs, u8 n_cs);
+XSlot* x64_slot_get(XImpl* a, FrameSlot fs);
+FrameSlot x_frame_slot(CGTarget* t, const FrameSlotDesc* d);
+Reg x_alloc_reg(CGTarget* t, RegClass cls, const Type* ty);
+void x_free_reg(CGTarget* t, Reg r, RegClass cls);
+void x_param(CGTarget* t, const CGParamDesc* p);
+const Reg* x_clobbers(CGTarget* t, RegClass c, u32* n);
+void x_spill_reg(CGTarget* t, Operand src, FrameSlot slot, MemAccess ma);
+void x_reload_reg(CGTarget* t, Operand dst, FrameSlot slot, MemAccess ma);
+Label x_label_new(CGTarget* t);
+void x_label_place(CGTarget* t, Label l);
+void emit_jmp_label(MCEmitter* mc, MCLabel l);
+void emit_jcc_label(MCEmitter* mc, u32 cc, MCLabel l);
+void x_jump(CGTarget* t, Label l);
+void x_cmp_branch(CGTarget* t, CmpOp op, Operand a, Operand b, Label l);
+void x_cmp(CGTarget* t, CmpOp op, Operand dst, Operand a, Operand b);
+CGScope x_scope_begin(CGTarget* t, const CGScopeDesc* d);
+void x_scope_else(CGTarget* t, CGScope s);
+void x_scope_end(CGTarget* t, CGScope s);
+void x_break_to(CGTarget* t, CGScope s);
+void x_continue_to(CGTarget* t, CGScope s);
+u32 x64_force_reg_int(CGTarget* t, Operand op, int w, u32 scratch);
+
+/* --- ops.c exports (used by alloc.c) --- */
+void x_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma);
+void x_store(CGTarget* t, Operand addr, Operand src, MemAccess ma);
diff --git a/src/arch/x64/ops.c b/src/arch/x64/ops.c
@@ -0,0 +1,1916 @@
+/* arch/x64/ops.c — data movement, arithmetic, calls, atomics, intrinsics,
+ * and the vtable constructor x64_cgtarget_new.
+ *
+ * Covers: x_load_imm, x_load_const, x_copy, x_load, x_store, x_addr_of,
+ * x_tls_addr_of, x_copy_bytes, x_set_bytes, x_bitfield_load/store,
+ * x_binop, x_unop, x_convert, emit_arg_value, x_call, x_ret,
+ * x_alloca_, x_va_start_, x_va_arg_, x_va_end_, x_va_copy_,
+ * emit_lock_*, x_atomic_load/store/rmw/cas, x_fence,
+ * emit_popcnt, emit_bs, emit_bswap, emit_rol16_imm8, emit_xor_imm32,
+ * x_intrinsic, x_asm_block, x_set_loc, x_finalize, x_destroy,
+ * x64_cgtarget_new. */
+
+#include <string.h>
+
+#include "arch/arch.h"
+#include "arch/x64.h"
+#include "arch/x64_isa.h"
+#include "core/arena.h"
+#include "core/pool.h"
+#include "obj/obj.h"
+#include "type/type.h"
+
+#include "arch/x64/internal.h"
+
+/* ============================================================
+ * Data movement */
+
+static void x_load_imm(CGTarget* t, Operand dst, i64 imm) {
+ int w = type_is_64(dst.type) ? 1 : 0;
+ x64_emit_load_imm(t->mc, w, dst.v.reg & 0xFu, imm);
+}
+
+/* Materialize an FP literal: stash bytes in .rodata as a fresh local
+ * symbol, then load via RIP-relative movss/movsd. */
+static void x_load_const(CGTarget* t, Operand dst, ConstBytes cb) {
+ XImpl* a = impl_of(t);
+ if (dst.cls != RC_FP)
+ compiler_panic(t->c, a->loc, "x64 load_const: only FP supported in v1");
+
+ Sym ro_name = pool_intern_cstr(t->c->global, ".rodata");
+ ObjSecId ro = obj_section(t->obj, ro_name, SEC_RODATA, SF_ALLOC, 1u);
+
+ u32 cur_section = t->mc->section_id;
+ t->mc->set_section(t->mc, ro);
+ u32 ro_off = obj_align_to(t->obj, ro, cb.align ? cb.align : 4);
+ t->mc->emit_bytes(t->mc, cb.bytes, cb.size);
+
+ char namebuf[64];
+ static u32 lit_seq = 0;
+ int len = 0;
+ const char* prefix = ".LCFP_x64_";
+ for (; prefix[len]; ++len) namebuf[len] = prefix[len];
+ u32 v = lit_seq++;
+ char tmp[16];
+ int tn = 0;
+ if (v == 0)
+ tmp[tn++] = '0';
+ else
+ while (v) {
+ tmp[tn++] = '0' + (char)(v % 10);
+ v /= 10;
+ }
+ for (int i = tn - 1; i >= 0; --i) namebuf[len++] = tmp[i];
+ namebuf[len] = 0;
+
+ Sym sname = pool_intern_cstr(t->c->global, namebuf);
+ ObjSymId sym = obj_symbol(t->obj, sname, SB_LOCAL, SK_OBJ, ro, (u64)ro_off,
+ (u64)cb.size);
+ t->mc->set_section(t->mc, cur_section);
+
+ /* movs{s,d} xmm, [rip+disp32]. Reloc R_PC32 with addend=-4 at the
+ * disp32 site so the linker resolves to target relative to end-of-insn. */
+ u8 prefix2 = (cb.size == 8) ? 0xF2 : 0xF3;
+ u32 dst_x = dst.v.reg & 0xFu;
+ t->mc->emit_bytes(t->mc, &prefix2, 1);
+ emit_rex(t->mc, 0, dst_x, 0, 0);
+ u8 op[2] = {0x0F, 0x10};
+ t->mc->emit_bytes(t->mc, op, 2);
+ u8 mr = modrm(0u, (dst_x & 7u), 5u); /* [RIP + disp32] */
+ t->mc->emit_bytes(t->mc, &mr, 1);
+ u32 disp_pos = t->mc->pos(t->mc);
+ emit_u32le(t->mc, 0);
+ t->mc->emit_reloc_at(t->mc, cur_section, disp_pos, R_PC32, sym, -4, 1, 0);
+}
+
+static void x_copy(CGTarget* t, Operand dst, Operand src) {
+ if (dst.cls == RC_FP || src.cls == RC_FP) {
+ u8 prefix2 = type_is_fp_double(dst.type) ? 0xF2 : 0xF3;
+ emit_sse_rr(t->mc, prefix2, 0x10, dst.v.reg & 0xFu, src.v.reg & 0xFu);
+ return;
+ }
+ int w = type_is_64(dst.type) ? 1 : 0;
+ emit_mov_rr(t->mc, w, dst.v.reg & 0xFu, src.v.reg & 0xFu);
+}
+
+static u32 addr_base(CGTarget* t, Operand addr, i32* out_off) {
+ XImpl* a = impl_of(t);
+ if (addr.kind == OPK_LOCAL) {
+ XSlot* s = x64_slot_get(a, addr.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "x64 addr_base: bad slot");
+ *out_off = -(i32)s->off;
+ return X64_RBP;
+ }
+ if (addr.kind == OPK_INDIRECT) {
+ *out_off = addr.v.ind.ofs;
+ return addr.v.ind.base & 0xFu;
+ }
+ compiler_panic(t->c, a->loc, "x64 addr_base: kind %d unsupported",
+ (int)addr.kind);
+}
+
+static int x64_use_got_for_sym(CGTarget* t, ObjSymId sym) {
+ return obj_symbol_extern_via_got(t->c, t->obj, sym);
+}
+
+/* Materialize `&sym + addend` into `dst_reg`. For locally-defined or
+ * static-link extern symbols, emit `lea rd, [rip + disp32]` with
+ * R_X64_PLT32 (PLT32 collapses to a plain PC-relative LEA at link time
+ * — the PLT routing only fires when the linker actually needs the
+ * trampoline, i.e. function calls into a DSO). For undef externs in
+ * PIC/PIE we instead emit `mov rd, [rip + disp32]` against a GOT slot
+ * (R_X64_REX_GOTPCRELX) so the loader can resolve the symbol by
+ * patching a single slot rather than touching .text.
+ *
+ * Addend -4 because the PC is end-of-instruction. When routing
+ * through the GOT we omit any extra addend on the reloc (most loaders
+ * disallow nonzero addends on GOT-load fixups); a follow-up `add` /
+ * `lea` would have to add it after the load if the codegen needed
+ * `&sym + nonzero`. In practice the caller only ever passes
+ * addend=0 for global references that go through the GOT path. */
+static void emit_global_lea(CGTarget* t, u32 dst_reg, ObjSymId sym,
+ i64 addend) {
+ if (x64_use_got_for_sym(t, sym)) {
+ /* mov rd, [rip + disp32] */
+ emit_rex(t->mc, 1, dst_reg, 0, 0);
+ u8 op = 0x8B;
+ t->mc->emit_bytes(t->mc, &op, 1);
+ u8 mr = modrm(0u, (dst_reg & 7u), 5u); /* [RIP + disp32] */
+ t->mc->emit_bytes(t->mc, &mr, 1);
+ u32 disp_pos = t->mc->pos(t->mc);
+ emit_u32le(t->mc, 0);
+ t->mc->emit_reloc_at(t->mc, t->mc->section_id, disp_pos,
+ R_X64_REX_GOTPCRELX, sym, -4, 1, 0);
+ /* Apply any nonzero addend by adjusting the loaded value. */
+ if (addend) {
+ i32 a = (i32)addend;
+ if (a >= -128 && a <= 127) {
+ /* add r/m64, imm8 (REX.W + 0x83 /0 ib) */
+ emit_rex(t->mc, 1, 0, 0, dst_reg);
+ u8 add_op[2] = {0x83, modrm(3u, 0u, (u8)(dst_reg & 7u))};
+ t->mc->emit_bytes(t->mc, add_op, 2);
+ u8 ib = (u8)a;
+ t->mc->emit_bytes(t->mc, &ib, 1);
+ } else {
+ /* add r/m64, imm32 (REX.W + 0x81 /0 id) */
+ emit_rex(t->mc, 1, 0, 0, dst_reg);
+ u8 add_op[2] = {0x81, modrm(3u, 0u, (u8)(dst_reg & 7u))};
+ t->mc->emit_bytes(t->mc, add_op, 2);
+ emit_u32le(t->mc, (u32)a);
+ }
+ }
+ return;
+ }
+ emit_rex(t->mc, 1, dst_reg, 0, 0);
+ u8 op = 0x8D;
+ t->mc->emit_bytes(t->mc, &op, 1);
+ u8 mr = modrm(0u, (dst_reg & 7u), 5u); /* [RIP + disp32] */
+ t->mc->emit_bytes(t->mc, &mr, 1);
+ u32 disp_pos = t->mc->pos(t->mc);
+ emit_u32le(t->mc, 0);
+ t->mc->emit_reloc_at(t->mc, t->mc->section_id, disp_pos, R_X64_PLT32, sym,
+ addend - 4, 1, 0);
+}
+
+void x_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) {
+ u32 sz = ma.size ? ma.size : type_byte_size(addr.type);
+
+ if (addr.kind == OPK_GLOBAL) {
+ /* Materialize &sym into R11, then load from [r11]. */
+ emit_global_lea(t, X64_R11, addr.v.global.sym, addr.v.global.addend);
+ if (dst.cls == RC_FP) {
+ u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3;
+ emit_sse_load(t->mc, prefix2, 0x10, dst.v.reg & 0xFu, X64_R11, 0);
+ } else {
+ int signed_ = type_is_signed(ma.type ? ma.type : addr.type);
+ emit_mov_load(t->mc, sz, signed_, dst.v.reg & 0xFu, X64_R11, 0);
+ }
+ return;
+ }
+
+ i32 off;
+ u32 base = addr_base(t, addr, &off);
+ if (dst.cls == RC_FP) {
+ u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3;
+ emit_sse_load(t->mc, prefix2, 0x10, dst.v.reg & 0xFu, base, off);
+ } else {
+ int signed_ = type_is_signed(ma.type ? ma.type : addr.type);
+ emit_mov_load(t->mc, sz, signed_, dst.v.reg & 0xFu, base, off);
+ }
+}
+
+void x_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) {
+ u32 sz = ma.size ? ma.size : type_byte_size(addr.type);
+
+ if (addr.kind == OPK_GLOBAL) {
+ /* Materialize &sym into R11, then store via [r11]. The IMM source
+ * branch below uses RAX as a scratch for the value, so R11 stays
+ * untouched between the LEA and the store. */
+ emit_global_lea(t, X64_R11, addr.v.global.sym, addr.v.global.addend);
+ if (src.kind == OPK_IMM) {
+ int w = (sz == 8) ? 1 : 0;
+ x64_emit_load_imm(t->mc, w, X64_RAX, src.v.imm);
+ emit_mov_store(t->mc, sz, X64_RAX, X64_R11, 0);
+ return;
+ }
+ if (src.cls == RC_FP) {
+ u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3;
+ emit_sse_store(t->mc, prefix2, 0x11, src.v.reg & 0xFu, X64_R11, 0);
+ return;
+ }
+ emit_mov_store(t->mc, sz, src.v.reg & 0xFu, X64_R11, 0);
+ return;
+ }
+
+ i32 off;
+ u32 base = addr_base(t, addr, &off);
+
+ if (src.kind == OPK_IMM) {
+ int w = (sz == 8) ? 1 : 0;
+ x64_emit_load_imm(t->mc, w, X64_RAX, src.v.imm);
+ emit_mov_store(t->mc, sz, X64_RAX, base, off);
+ return;
+ }
+ if (src.cls == RC_FP) {
+ u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3;
+ emit_sse_store(t->mc, prefix2, 0x11, src.v.reg & 0xFu, base, off);
+ return;
+ }
+ emit_mov_store(t->mc, sz, src.v.reg & 0xFu, base, off);
+}
+
+static void x_addr_of(CGTarget* t, Operand dst, Operand lv) {
+ XImpl* a = impl_of(t);
+ if (lv.kind == OPK_LOCAL) {
+ XSlot* s = x64_slot_get(a, lv.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "x64 addr_of: bad slot");
+ emit_lea(t->mc, dst.v.reg & 0xFu, X64_RBP, -(i32)s->off);
+ return;
+ }
+ if (lv.kind == OPK_INDIRECT) {
+ emit_lea(t->mc, dst.v.reg & 0xFu, lv.v.ind.base & 0xFu, lv.v.ind.ofs);
+ return;
+ }
+ if (lv.kind == OPK_GLOBAL) {
+ emit_global_lea(t, dst.v.reg & 0xFu, lv.v.global.sym, lv.v.global.addend);
+ return;
+ }
+ x_panic(t, "addr_of: kind unsupported");
+}
+
+/* x86_64 TLS Local-Exec materialization.
+ * mov rd, fs:0 ; read thread pointer (FS base + 0)
+ * lea rd, [rd + sym@tpoff] ; add TP-relative offset
+ * The disp32 of the LEA carries an R_X64_TPOFF32 reloc; the linker fills
+ * in the signed TP-relative offset (negative under variant II — TLS image
+ * sits below the TCB that FS points at). */
+static void x_tls_addr_of(CGTarget* t, Operand dst, ObjSymId sym, i64 addend) {
+ MCEmitter* mc = t->mc;
+ u32 sec = mc->section_id;
+ u32 rd = dst.v.reg & 0xFu;
+
+ /* mov rd, qword ptr fs:[0]
+ * 64 [REX.W|REX.R] 8B mod=00/reg=rd/rm=100 sib(0,4,5) disp32=0 */
+ u8 fs_prefix = 0x64;
+ mc->emit_bytes(mc, &fs_prefix, 1);
+ emit_rex(mc, 1, rd, 0, 0);
+ u8 op_mov = 0x8B;
+ mc->emit_bytes(mc, &op_mov, 1);
+ u8 mr1 = modrm(0u, rd & 7u, 4u);
+ mc->emit_bytes(mc, &mr1, 1);
+ u8 s1 = sib(0u, 4u, 5u);
+ mc->emit_bytes(mc, &s1, 1);
+ emit_u32le(mc, 0);
+
+ /* lea rd, [rd + disp32]
+ * [REX.W|REX.R|REX.B] 8D mod=10/reg=rd/rm=rd [SIB if rd&7==4] disp32 */
+ emit_rex(mc, 1, rd, 0, rd);
+ u8 op_lea = 0x8D;
+ mc->emit_bytes(mc, &op_lea, 1);
+ u32 disp_pos;
+ if ((rd & 7u) == 4u) {
+ u8 mr2 = modrm(2u, rd & 7u, 4u);
+ mc->emit_bytes(mc, &mr2, 1);
+ u8 s2 = sib(0u, 4u, rd & 7u);
+ mc->emit_bytes(mc, &s2, 1);
+ disp_pos = mc->pos(mc);
+ emit_u32le(mc, 0);
+ } else {
+ u8 mr2 = modrm(2u, rd & 7u, rd & 7u);
+ mc->emit_bytes(mc, &mr2, 1);
+ disp_pos = mc->pos(mc);
+ emit_u32le(mc, 0);
+ }
+ mc->emit_reloc_at(mc, sec, disp_pos, R_X64_TPOFF32, sym, addend, 0, 0);
+}
+
+/* Aggregate ops — small unrolled memcpy/memset. */
+static u32 agg_addr_reg(CGTarget* t, Operand op, u32 scratch) {
+ if (op.kind == OPK_REG) return op.v.reg & 0xFu;
+ if (op.kind == OPK_LOCAL) {
+ XImpl* a = impl_of(t);
+ XSlot* s = x64_slot_get(a, op.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "x64 agg: bad slot");
+ emit_lea(t->mc, scratch, X64_RBP, -(i32)s->off);
+ return scratch;
+ }
+ compiler_panic(t->c, impl_of(t)->loc,
+ "x64 agg: address kind %d unsupported", (int)op.kind);
+}
+
+static void x_copy_bytes(CGTarget* t, Operand da, Operand sa,
+ AggregateAccess g) {
+ u32 dr = agg_addr_reg(t, da, X64_R11);
+ u32 sr = agg_addr_reg(t, sa, (dr == X64_RAX) ? X64_RCX : X64_RAX);
+ u32 nbytes = g.size;
+ u32 i = 0;
+ while (i + 8 <= nbytes) {
+ emit_mov_load(t->mc, 8, 0, X64_RDX, sr, (i32)i);
+ emit_mov_store(t->mc, 8, X64_RDX, dr, (i32)i);
+ i += 8;
+ }
+ while (i + 4 <= nbytes) {
+ emit_mov_load(t->mc, 4, 0, X64_RDX, sr, (i32)i);
+ emit_mov_store(t->mc, 4, X64_RDX, dr, (i32)i);
+ i += 4;
+ }
+ while (i + 2 <= nbytes) {
+ emit_mov_load(t->mc, 2, 0, X64_RDX, sr, (i32)i);
+ emit_mov_store(t->mc, 2, X64_RDX, dr, (i32)i);
+ i += 2;
+ }
+ while (i < nbytes) {
+ emit_mov_load(t->mc, 1, 0, X64_RDX, sr, (i32)i);
+ emit_mov_store(t->mc, 1, X64_RDX, dr, (i32)i);
+ i += 1;
+ }
+}
+
+static void x_set_bytes(CGTarget* t, Operand da, Operand bv,
+ AggregateAccess g) {
+ u32 dr = agg_addr_reg(t, da, X64_R11);
+ if (bv.kind != OPK_IMM)
+ compiler_panic(t->c, impl_of(t)->loc,
+ "x64 set_bytes: non-IMM byte not yet supported");
+ u8 b = (u8)(bv.v.imm & 0xff);
+ u64 b64 = b;
+ b64 |= b64 << 8;
+ b64 |= b64 << 16;
+ b64 |= b64 << 32;
+ x64_emit_load_imm(t->mc, 1, X64_RAX, (i64)b64);
+ u32 nbytes = g.size;
+ u32 i = 0;
+ while (i + 8 <= nbytes) {
+ emit_mov_store(t->mc, 8, X64_RAX, dr, (i32)i);
+ i += 8;
+ }
+ while (i + 4 <= nbytes) {
+ emit_mov_store(t->mc, 4, X64_RAX, dr, (i32)i);
+ i += 4;
+ }
+ while (i + 2 <= nbytes) {
+ emit_mov_store(t->mc, 2, X64_RAX, dr, (i32)i);
+ i += 2;
+ }
+ while (i < nbytes) {
+ emit_mov_store(t->mc, 1, X64_RAX, dr, (i32)i);
+ i += 1;
+ }
+}
+
+/* Load the storage unit, then extract the field by shifting it to the
+ * top of the register and shifting back. SAR for signed, SHR for unsigned. */
+static void x_bitfield_load(CGTarget* t, Operand dst, Operand record_addr,
+ BitFieldAccess bf) {
+ u32 base = agg_addr_reg(t, record_addr, X64_R11);
+ u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u;
+ int w = (storage_bytes == 8u) ? 1 : 0;
+ u32 reg_size = w ? 64u : 32u;
+ u32 lsb = bf.bit_offset;
+ u32 width = bf.bit_width ? bf.bit_width : 1u;
+ u32 rd = dst.v.reg & 0xFu;
+
+ emit_mov_load(t->mc, storage_bytes, 0, rd, base, (i32)bf.storage_offset);
+ u8 left = (u8)(reg_size - lsb - width);
+ u8 right = (u8)(reg_size - width);
+ if (left) emit_shift_imm(t->mc, w, 4u, rd, left);
+ if (right) emit_shift_imm(t->mc, w, bf.signed_ ? 7u : 5u, rd, right);
+}
+
+/* Read-modify-write: clear the field bits in the storage unit via AND ~mask,
+ * mask/shift the source into place, OR it in, write back. RAX holds the
+ * storage word; RCX is the staged value; RDX holds the source-side mask when
+ * needed. Avoids touching the base register. */
+static void x_bitfield_store(CGTarget* t, Operand record_addr, Operand src,
+ BitFieldAccess bf) {
+ u32 base = agg_addr_reg(t, record_addr, X64_R11);
+ u32 storage_bytes = bf.storage.size ? bf.storage.size : 4u;
+ int w = (storage_bytes == 8u) ? 1 : 0;
+ u32 lsb = bf.bit_offset;
+ u32 width = bf.bit_width ? bf.bit_width : 1u;
+ u64 ones = (width >= 64u) ? ~(u64)0 : (((u64)1 << width) - 1u);
+ u64 mask = ones << lsb;
+
+ emit_mov_load(t->mc, storage_bytes, 0, X64_RAX, base, (i32)bf.storage_offset);
+ x64_emit_load_imm(t->mc, w, X64_RCX, (i64)~mask);
+ emit_alu_rr(t->mc, w, 0x21, X64_RAX, X64_RCX); /* AND rax, rcx */
+
+ if (src.kind == OPK_IMM) {
+ u64 v = ((u64)src.v.imm & ones) << lsb;
+ x64_emit_load_imm(t->mc, w, X64_RCX, (i64)v);
+ } else if (src.kind == OPK_REG) {
+ emit_mov_rr(t->mc, w, X64_RCX, src.v.reg & 0xFu);
+ x64_emit_load_imm(t->mc, w, X64_RDX, (i64)ones);
+ emit_alu_rr(t->mc, w, 0x21, X64_RCX, X64_RDX); /* AND rcx, rdx */
+ if (lsb) emit_shift_imm(t->mc, w, 4u, X64_RCX, (u8)lsb);
+ } else {
+ compiler_panic(t->c, impl_of(t)->loc,
+ "x64 bitfield_store: src kind %d unsupported",
+ (int)src.kind);
+ }
+ emit_alu_rr(t->mc, w, 0x09, X64_RAX, X64_RCX); /* OR rax, rcx */
+ emit_mov_store(t->mc, storage_bytes, X64_RAX, base, (i32)bf.storage_offset);
+}
+
+/* ============================================================
+ * Arithmetic */
+
+static void x_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op,
+ Operand b_op) {
+ MCEmitter* mc = t->mc;
+
+ /* FP binops. */
+ if (op == BO_FADD || op == BO_FSUB || op == BO_FMUL || op == BO_FDIV) {
+ u32 rd = dst.v.reg & 0xFu;
+ u32 ra = a_op.v.reg & 0xFu;
+ u32 rb = b_op.v.reg & 0xFu;
+ u8 prefix2 = type_is_fp_double(dst.type) ? 0xF2 : 0xF3;
+ if (rd != ra) emit_sse_rr(mc, prefix2, 0x10, rd, ra);
+ u8 opcode;
+ switch (op) {
+ case BO_FADD: opcode = 0x58; break;
+ case BO_FSUB: opcode = 0x5C; break;
+ case BO_FMUL: opcode = 0x59; break;
+ case BO_FDIV: opcode = 0x5E; break;
+ default: opcode = 0x58; break;
+ }
+ emit_sse_rr(mc, prefix2, opcode, rd, rb);
+ return;
+ }
+
+ int w = type_is_64(dst.type) ? 1 : 0;
+ u32 rd = dst.v.reg & 0xFu;
+
+ /* Division: idiv/div uses rax/rdx implicitly. Route divisor through r11
+ * if it would otherwise be rax/rdx. */
+ if (op == BO_SDIV || op == BO_UDIV || op == BO_SREM || op == BO_UREM) {
+ u32 ra = x64_force_reg_int(t, a_op, w, X64_RAX);
+ if (ra != X64_RAX) emit_mov_rr(mc, w, X64_RAX, ra);
+ u32 rb;
+ if (b_op.kind == OPK_REG) {
+ rb = b_op.v.reg & 0xFu;
+ if (rb == X64_RAX || rb == X64_RDX) {
+ emit_mov_rr(mc, w, X64_R11, rb);
+ rb = X64_R11;
+ }
+ } else if (b_op.kind == OPK_IMM) {
+ x64_emit_load_imm(mc, w, X64_R11, b_op.v.imm);
+ rb = X64_R11;
+ } else {
+ compiler_panic(t->c, impl_of(t)->loc,
+ "x64 div: divisor kind %d unsupported", (int)b_op.kind);
+ }
+ if (op == BO_SDIV || op == BO_SREM) {
+ emit_cqo_or_cdq(mc, w);
+ emit_f7_rm(mc, w, 7u, rb); /* idiv */
+ } else {
+ emit_xor_self(mc, w, X64_RDX);
+ emit_f7_rm(mc, w, 6u, rb); /* div */
+ }
+ u32 result_reg = (op == BO_SREM || op == BO_UREM) ? X64_RDX : X64_RAX;
+ if (rd != result_reg) emit_mov_rr(mc, w, rd, result_reg);
+ return;
+ }
+
+ /* Shifts: shift count must be in cl OR encoded as imm8 directly (C1
+ * /sub ib). Use the imm form when b is OPK_IMM and skip materializing
+ * into cl. */
+ if (op == BO_SHL || op == BO_SHR_U || op == BO_SHR_S) {
+ u32 ra = x64_force_reg_int(t, a_op, w, X64_RAX);
+ if (rd != ra) emit_mov_rr(mc, w, rd, ra);
+ u32 sub = (op == BO_SHL) ? 4u : (op == BO_SHR_U ? 5u : 7u);
+ if (b_op.kind == OPK_IMM) {
+ u32 width = w ? 64u : 32u;
+ emit_shift_imm(mc, w, sub, rd, (u8)((u64)b_op.v.imm & (width - 1u)));
+ return;
+ }
+ if (b_op.kind == OPK_REG) {
+ u32 rb = b_op.v.reg & 0xFu;
+ if (rb != X64_RCX) emit_mov_rr(mc, 0, X64_RCX, rb);
+ } else {
+ compiler_panic(t->c, impl_of(t)->loc,
+ "x64 shift: count kind %d unsupported", (int)b_op.kind);
+ }
+ emit_shift_cl(mc, w, sub, rd);
+ return;
+ }
+
+ /* For commutative ops, canonicalize IMM to the RHS so the imm-form
+ * check below fires uniformly. ISUB is non-commutative — IMM-on-LHS
+ * still materializes. */
+ switch (op) {
+ case BO_IADD:
+ case BO_AND:
+ case BO_OR:
+ case BO_XOR:
+ case BO_IMUL: {
+ if (a_op.kind == OPK_IMM && b_op.kind != OPK_IMM) {
+ Operand t_op = a_op; a_op = b_op; b_op = t_op;
+ }
+ break;
+ }
+ default: break;
+ }
+
+ /* IMM-form fast paths. For ADD/SUB/AND/OR/XOR the ALU imm encoding
+ * reads-and-writes a single reg — copy ra → dst first, then `dst OP=
+ * imm`. For IMUL the imm form is three-operand (`dst = src * imm`)
+ * and reads from `ra` directly without the prep copy. */
+ if (b_op.kind == OPK_IMM && a_op.kind == OPK_REG &&
+ (op == BO_IADD || op == BO_ISUB || op == BO_AND || op == BO_OR ||
+ op == BO_XOR || op == BO_IMUL)) {
+ i64 imm = b_op.v.imm;
+ u32 ra = a_op.v.reg & 0xFu;
+ if (op == BO_IMUL) {
+ if (imm_fits_i8(imm)) {
+ emit_imul_imm8(mc, w, rd, ra, (i8)imm);
+ return;
+ }
+ if (imm_fits_i32(imm)) {
+ emit_imul_imm32(mc, w, rd, ra, (i32)imm);
+ return;
+ }
+ } else {
+ u32 sub;
+ switch (op) {
+ case BO_IADD: sub = 0u; break;
+ case BO_OR: sub = 1u; break;
+ case BO_AND: sub = 4u; break;
+ case BO_ISUB: sub = 5u; break;
+ case BO_XOR: sub = 6u; break;
+ default: sub = 0u; break; /* unreachable */
+ }
+ if (imm_fits_i8(imm)) {
+ if (rd != ra) emit_mov_rr(mc, w, rd, ra);
+ emit_alu_imm8(mc, w, sub, rd, (i8)imm);
+ return;
+ }
+ if (imm_fits_i32(imm)) {
+ if (rd != ra) emit_mov_rr(mc, w, rd, ra);
+ emit_alu_imm32(mc, w, sub, rd, (i32)imm);
+ return;
+ }
+ }
+ /* Fall through to materialize for >32-bit literals. */
+ }
+
+ /* Generic 2-operand ALU: copy ra → dst, then dst op= rb. */
+ u32 ra = x64_force_reg_int(t, a_op, w, X64_RAX);
+ if (rd != ra) emit_mov_rr(mc, w, rd, ra);
+ u32 rb = x64_force_reg_int(t, b_op, w, X64_R11);
+ switch (op) {
+ case BO_IADD: emit_alu_rr(mc, w, 0x01, rd, rb); break;
+ case BO_ISUB: emit_alu_rr(mc, w, 0x29, rd, rb); break;
+ case BO_AND: emit_alu_rr(mc, w, 0x21, rd, rb); break;
+ case BO_OR: emit_alu_rr(mc, w, 0x09, rd, rb); break;
+ case BO_XOR: emit_alu_rr(mc, w, 0x31, rd, rb); break;
+ case BO_IMUL: emit_imul_rr(mc, w, rd, rb); break;
+ default:
+ compiler_panic(t->c, impl_of(t)->loc, "x64 binop: op %d unimpl",
+ (int)op);
+ }
+}
+
+static void x_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) {
+ MCEmitter* mc = t->mc;
+ int w = type_is_64(dst.type) ? 1 : 0;
+ u32 rd = dst.v.reg & 0xFu;
+ /* IMM operand is legal per the CGTarget contract (arch.h); materialize
+ * into a scratch register when not already a register. cg folds
+ * literal unops upstream (cg_fold_unop), so this path is reached only
+ * when opt's emit hands us an unfolded literal. */
+ u32 ra = x64_force_reg_int(t, a_op, w, X64_R11);
+ switch (op) {
+ case UO_NEG:
+ if (rd != ra) emit_mov_rr(mc, w, rd, ra);
+ emit_f7_rm(mc, w, 3u, rd);
+ return;
+ case UO_BNOT:
+ if (rd != ra) emit_mov_rr(mc, w, rd, ra);
+ emit_f7_rm(mc, w, 2u, rd);
+ return;
+ case UO_NOT:
+ /* !x → (x == 0) materialized as 0/1 in dst. */
+ emit_test_self(mc, w, ra);
+ emit_setcc(mc, X64_CC_E, rd);
+ emit_movzx_r32_r8(mc, rd, rd);
+ return;
+ default:
+ compiler_panic(t->c, impl_of(t)->loc, "x64 unop: op %d unimpl",
+ (int)op);
+ }
+}
+
+static void x_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) {
+ XImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+ u32 rd = dst.v.reg & 0xFu;
+ u32 rs = src.v.reg & 0xFu;
+ switch (k) {
+ case CV_SEXT: {
+ u32 src_bytes = type_byte_size(src.type);
+ int w = type_is_64(dst.type) ? 1 : 0;
+ emit_extend_rr(mc, w, /*signed=*/1, src_bytes, rd, rs);
+ return;
+ }
+ case CV_ZEXT: {
+ u32 src_bytes = type_byte_size(src.type);
+ int w = type_is_64(dst.type) ? 1 : 0;
+ emit_extend_rr(mc, w, /*signed=*/0, src_bytes, rd, rs);
+ return;
+ }
+ case CV_TRUNC: {
+ /* In-reg truncation: `mov r32, r32` clears high 32. Narrower stores
+ * select width themselves. */
+ emit_mov_rr(mc, 0, rd, rs);
+ return;
+ }
+ case CV_ITOF_S:
+ case CV_ITOF_U: {
+ int w_src = type_is_64(src.type) ? 1 : 0;
+ u8 prefix2 = type_is_fp_double(dst.type) ? 0xF2 : 0xF3;
+ if (k == CV_ITOF_U && w_src == 1) {
+ compiler_panic(t->c, a->loc,
+ "x64 convert: u64→fp not yet implemented");
+ }
+ if (k == CV_ITOF_U) {
+ /* u32→fp: zero-extend to 64-bit, then signed cvtsi2sd works. */
+ emit_extend_rr(mc, 0, 0, 4, X64_R11, rs);
+ rs = X64_R11;
+ w_src = 1;
+ }
+ emit_sse_rr_w(mc, prefix2, 0x2A, w_src, rd, rs);
+ return;
+ }
+ case CV_FTOI_S:
+ case CV_FTOI_U: {
+ int w_dst = type_is_64(dst.type) ? 1 : 0;
+ u8 prefix2 = type_is_fp_double(src.type) ? 0xF2 : 0xF3;
+ if (k == CV_FTOI_U && w_dst == 1) {
+ compiler_panic(t->c, a->loc,
+ "x64 convert: fp→u64 not yet implemented");
+ }
+ emit_sse_rr_w(mc, prefix2, 0x2C, w_dst, rd, rs);
+ return;
+ }
+ case CV_FEXT:
+ emit_sse_rr(mc, 0xF3, 0x5A, rd, rs);
+ return;
+ case CV_FTRUNC:
+ emit_sse_rr(mc, 0xF2, 0x5A, rd, rs);
+ return;
+ case CV_BITCAST: {
+ /* movd/movq between xmm and GPR. */
+ if (src.cls == RC_INT && dst.cls == RC_FP) {
+ int w = type_is_64(dst.type) ? 1 : 0;
+ emit_sse_rr_w(mc, 0x66, 0x6E, w, rd, rs);
+ } else if (src.cls == RC_FP && dst.cls == RC_INT) {
+ int w = type_is_64(src.type) ? 1 : 0;
+ emit_sse_rr_w(mc, 0x66, 0x7E, w, rs, rd);
+ } else {
+ compiler_panic(t->c, a->loc,
+ "x64 convert BITCAST: same-class not supported");
+ }
+ return;
+ }
+ default:
+ compiler_panic(t->c, a->loc, "x64 convert kind %d unimpl", (int)k);
+ }
+}
+
+/* ============================================================
+ * Calls / return */
+
+static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
+ u32* next_fp, u32* stack_off) {
+ XImpl* a = impl_of(t);
+ /* Synthesize one-part DIRECT for variadic args (av->abi NULL). */
+ ABIArgInfo va_ai;
+ ABIArgPart va_pt;
+ const ABIArgInfo* ai = av->abi;
+ if (!ai) {
+ u32 sz = type_byte_size(av->type);
+ memset(&va_ai, 0, sizeof va_ai);
+ memset(&va_pt, 0, sizeof va_pt);
+ va_ai.kind = ABI_ARG_DIRECT;
+ va_ai.parts = &va_pt;
+ va_ai.nparts = 1;
+ va_pt.cls = (av->storage.cls == RC_FP) ? ABI_CLASS_FP : ABI_CLASS_INT;
+ va_pt.size = sz;
+ va_pt.align = sz;
+ va_pt.src_offset = 0;
+ ai = &va_ai;
+ }
+ if (ai->kind == ABI_ARG_IGNORE) return;
+ if (ai->kind == ABI_ARG_INDIRECT) {
+ /* Pass &av->storage_local in the next int arg reg. */
+ u32 dst_reg = (*next_int < 6) ? g_int_arg_regs[(*next_int)++] : X64_RAX;
+ int to_stack = (*next_int > 6) || (dst_reg == X64_RAX && *next_int == 6);
+ /* Above is awkward — recompute clearly: */
+ if (*next_int >= 6 + (a->has_sret ? 0 : 0)) {
+ /* (next_int was already bumped past 6) — stack route */
+ }
+ to_stack = (dst_reg == X64_RAX);
+ if (av->storage.kind == OPK_LOCAL) {
+ XSlot* s = x64_slot_get(a, av->storage.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "x64 call: bad byval slot");
+ emit_lea(t->mc, dst_reg, X64_RBP, -(i32)s->off);
+ } else if (av->storage.kind == OPK_INDIRECT) {
+ emit_lea(t->mc, dst_reg, av->storage.v.ind.base & 0xFu,
+ av->storage.v.ind.ofs);
+ } else {
+ compiler_panic(t->c, a->loc,
+ "x64 call: INDIRECT arg storage kind %d unsupported",
+ (int)av->storage.kind);
+ }
+ if (to_stack) {
+ emit_mov_store(t->mc, 8, dst_reg, X64_RSP, (i32)*stack_off);
+ *stack_off += 8;
+ }
+ return;
+ }
+
+ for (u16 i = 0; i < ai->nparts; ++i) {
+ const ABIArgPart* pt = &ai->parts[i];
+ u32 sz = pt->size;
+ if (pt->cls == ABI_CLASS_INT) {
+ int to_stack = (*next_int >= 6);
+ u32 dst_reg = to_stack ? X64_RAX : g_int_arg_regs[(*next_int)++];
+ switch (av->storage.kind) {
+ case OPK_IMM: {
+ int w = (sz == 8) ? 1 : 0;
+ x64_emit_load_imm(t->mc, w, dst_reg, av->storage.v.imm);
+ break;
+ }
+ case OPK_REG: {
+ int w = (sz == 8) ? 1 : 0;
+ u32 sr = av->storage.v.reg & 0xFu;
+ if (sr != dst_reg) emit_mov_rr(t->mc, w, dst_reg, sr);
+ break;
+ }
+ case OPK_LOCAL: {
+ XSlot* s = x64_slot_get(a, av->storage.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "x64 call: bad arg slot");
+ emit_mov_load(t->mc, sz, 0, dst_reg, X64_RBP,
+ -(i32)s->off + (i32)pt->src_offset);
+ break;
+ }
+ case OPK_INDIRECT: {
+ /* cg holds INDIRECT base regs in {RBX, R10, R12..R15}, disjoint
+ * from arg regs (RDI/RSI/RDX/RCX/R8/R9) and the RAX scratch, so
+ * the base survives across the part loop. */
+ emit_mov_load(t->mc, sz, 0, dst_reg, av->storage.v.ind.base & 0xFu,
+ av->storage.v.ind.ofs + (i32)pt->src_offset);
+ break;
+ }
+ default:
+ compiler_panic(t->c, a->loc,
+ "x64 call: arg storage kind %d unsupported",
+ (int)av->storage.kind);
+ }
+ if (to_stack) {
+ emit_mov_store(t->mc, 8, dst_reg, X64_RSP, (i32)*stack_off);
+ *stack_off += 8;
+ }
+ } else if (pt->cls == ABI_CLASS_FP) {
+ int to_stack = (*next_fp >= 8);
+ u8 prefix2 = (sz == 8) ? 0xF2 : 0xF3;
+ if (!to_stack) {
+ u32 dst_x = (*next_fp)++;
+ if (av->storage.kind == OPK_REG) {
+ u32 sx = av->storage.v.reg & 0xFu;
+ if (sx != dst_x) emit_sse_rr(t->mc, prefix2, 0x10, dst_x, sx);
+ } else if (av->storage.kind == OPK_INDIRECT) {
+ emit_sse_load(t->mc, prefix2, 0x10, dst_x,
+ av->storage.v.ind.base & 0xFu,
+ av->storage.v.ind.ofs + (i32)pt->src_offset);
+ } else {
+ compiler_panic(t->c, a->loc,
+ "x64 call: FP arg storage kind %d unsupported",
+ (int)av->storage.kind);
+ }
+ } else {
+ if (av->storage.kind == OPK_REG) {
+ emit_sse_store(t->mc, prefix2, 0x11, av->storage.v.reg & 0xFu,
+ X64_RSP, (i32)*stack_off);
+ } else if (av->storage.kind == OPK_INDIRECT) {
+ /* Load through xmm15 (scratch — last in g_fp_order so cg won't
+ * have it live mid-call) then store. */
+ emit_sse_load(t->mc, prefix2, 0x10, X64_XMM15,
+ av->storage.v.ind.base & 0xFu,
+ av->storage.v.ind.ofs + (i32)pt->src_offset);
+ emit_sse_store(t->mc, prefix2, 0x11, X64_XMM15, X64_RSP,
+ (i32)*stack_off);
+ } else {
+ compiler_panic(t->c, a->loc,
+ "x64 call: FP stack-arg storage kind %d unsupported",
+ (int)av->storage.kind);
+ }
+ *stack_off += 8;
+ }
+ } else {
+ compiler_panic(t->c, a->loc, "x64 call: ABI class %d unimpl",
+ (int)pt->cls);
+ }
+ }
+}
+
+static void x_call(CGTarget* t, const CGCallDesc* d) {
+ XImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+
+ u32 next_int = 0, next_fp = 0, stack_off = 0;
+
+ /* sret: caller puts destination pointer in rdi. */
+ if (d->abi && d->abi->has_sret) {
+ if (d->ret.storage.kind != OPK_LOCAL) {
+ compiler_panic(t->c, a->loc, "x64 call: sret destination must be LOCAL");
+ }
+ XSlot* s = x64_slot_get(a, d->ret.storage.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "x64 call: bad sret slot");
+ emit_lea(mc, X64_RDI, X64_RBP, -(i32)s->off);
+ next_int = 1;
+ }
+ for (u32 i = 0; i < d->nargs; ++i) {
+ emit_arg_value(t, &d->args[i], &next_int, &next_fp, &stack_off);
+ }
+ u32 needed = (stack_off + 15u) & ~15u;
+ if (needed > a->max_outgoing) a->max_outgoing = needed;
+
+ /* Variadic calls: AL = number of XMM regs used. */
+ if (d->abi && d->abi->variadic) {
+ x64_emit_load_imm(mc, 0, X64_RAX, (i64)next_fp);
+ }
+
+ if (d->callee.kind == OPK_GLOBAL) {
+ /* call rel32: E8 + disp32 + R_X64_PLT32. */
+ u8 op = 0xE8;
+ mc->emit_bytes(mc, &op, 1);
+ u32 disp_pos = mc->pos(mc);
+ emit_u32le(mc, 0);
+ mc->emit_reloc_at(mc, mc->section_id, disp_pos, R_X64_PLT32,
+ d->callee.v.global.sym,
+ d->callee.v.global.addend - 4, 1, 0);
+ } else if (d->callee.kind == OPK_REG) {
+ u32 r = d->callee.v.reg & 0xFu;
+ emit_rex(mc, 0, 0, 0, r);
+ u8 buf[2] = {0xFF, modrm(3u, 2u, r)};
+ mc->emit_bytes(mc, buf, 2);
+ } else {
+ compiler_panic(t->c, a->loc, "x64 call: callee kind %d unsupported",
+ (int)d->callee.kind);
+ }
+
+ /* Receive return value. */
+ const ABIArgInfo* ri = &d->abi->ret;
+ if (ri->kind == ABI_ARG_IGNORE || ri->kind == ABI_ARG_INDIRECT) return;
+ if (ri->nparts == 0) return;
+
+ Operand rs = d->ret.storage;
+ u32 next_int_ret = 0, next_fp_ret = 0;
+ static const u32 ret_int_regs[2] = {X64_RAX, X64_RDX};
+ for (u16 i = 0; i < ri->nparts; ++i) {
+ const ABIArgPart* p = &ri->parts[i];
+ u32 src_reg;
+ if (p->cls == ABI_CLASS_INT) src_reg = ret_int_regs[next_int_ret++];
+ else if (p->cls == ABI_CLASS_FP) src_reg = (u32)(X64_XMM0 + next_fp_ret++);
+ else compiler_panic(t->c, a->loc, "x64 call: ret cls %d unimpl",
+ (int)p->cls);
+
+ if (rs.kind == OPK_REG) {
+ if (ri->nparts != 1) {
+ compiler_panic(t->c, a->loc,
+ "x64 call: REG ret_storage with %u parts",
+ (unsigned)ri->nparts);
+ }
+ if (p->cls == ABI_CLASS_INT) {
+ int w = (p->size == 8) ? 1 : 0;
+ u32 dr = rs.v.reg & 0xFu;
+ if (dr != src_reg) emit_mov_rr(mc, w, dr, src_reg);
+ } else {
+ u8 prefix2 = (p->size == 8) ? 0xF2 : 0xF3;
+ u32 dr = rs.v.reg & 0xFu;
+ if (dr != src_reg) emit_sse_rr(mc, prefix2, 0x10, dr, src_reg);
+ }
+ } else if (rs.kind == OPK_LOCAL || rs.kind == OPK_INDIRECT) {
+ u32 base_reg;
+ i32 base_off;
+ if (rs.kind == OPK_LOCAL) {
+ XSlot* s = x64_slot_get(a, rs.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "x64 call: bad ret slot");
+ base_reg = X64_RBP;
+ base_off = -(i32)s->off;
+ } else {
+ base_reg = rs.v.ind.base & 0xFu;
+ base_off = rs.v.ind.ofs;
+ }
+ i32 off = base_off + (i32)p->src_offset;
+ if (p->cls == ABI_CLASS_INT) {
+ emit_mov_store(mc, p->size, src_reg, base_reg, off);
+ } else {
+ u8 prefix2 = (p->size == 8) ? 0xF2 : 0xF3;
+ emit_sse_store(mc, prefix2, 0x11, src_reg, base_reg, off);
+ }
+ } else if (rs.kind == OPK_IMM && rs.type && rs.type->kind == TY_VOID) {
+ /* void ret placeholder — nothing to do. */
+ } else {
+ compiler_panic(t->c, a->loc,
+ "x64 call: ret_storage kind %d unsupported",
+ (int)rs.kind);
+ }
+ }
+}
+
+static void x_ret(CGTarget* t, const CGABIValue* val) {
+ XImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+
+ if (val) {
+ const ABIArgInfo* ri = val->abi;
+ if (ri && ri->kind == ABI_ARG_INDIRECT) {
+ /* sret: reload destination pointer into rdi, memcpy source into [rdi]. */
+ u32 src_base;
+ i32 src_base_off;
+ u32 nbytes;
+ if (val->storage.kind == OPK_LOCAL) {
+ XSlot* s = x64_slot_get(a, val->storage.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "x64 ret: bad sret slot");
+ src_base = X64_RBP;
+ src_base_off = -(i32)s->off;
+ nbytes = s->size;
+ } else if (val->storage.kind == OPK_INDIRECT) {
+ src_base = val->storage.v.ind.base & 0xFu;
+ src_base_off = val->storage.v.ind.ofs;
+ nbytes = val->size;
+ if (!nbytes) {
+ compiler_panic(t->c, a->loc,
+ "x64 ret indirect: missing aggregate size");
+ }
+ } else {
+ compiler_panic(t->c, a->loc,
+ "x64 ret indirect: storage kind %d unsupported",
+ (int)val->storage.kind);
+ }
+ if (a->sret_ptr_slot != FRAME_SLOT_NONE) {
+ XSlot* sp = x64_slot_get(a, a->sret_ptr_slot);
+ if (sp) emit_mov_load(mc, 8, 0, X64_RDI, X64_RBP, -(i32)sp->off);
+ }
+ u32 i = 0;
+ while (i + 8 <= nbytes) {
+ emit_mov_load(mc, 8, 0, X64_RAX, src_base, src_base_off + (i32)i);
+ emit_mov_store(mc, 8, X64_RAX, X64_RDI, (i32)i);
+ i += 8;
+ }
+ while (i + 4 <= nbytes) {
+ emit_mov_load(mc, 4, 0, X64_RAX, src_base, src_base_off + (i32)i);
+ emit_mov_store(mc, 4, X64_RAX, X64_RDI, (i32)i);
+ i += 4;
+ }
+ while (i + 2 <= nbytes) {
+ emit_mov_load(mc, 2, 0, X64_RAX, src_base, src_base_off + (i32)i);
+ emit_mov_store(mc, 2, X64_RAX, X64_RDI, (i32)i);
+ i += 2;
+ }
+ while (i < nbytes) {
+ emit_mov_load(mc, 1, 0, X64_RAX, src_base, src_base_off + (i32)i);
+ emit_mov_store(mc, 1, X64_RAX, X64_RDI, (i32)i);
+ i += 1;
+ }
+ /* Convention: return sret pointer in rax. */
+ emit_mov_rr(mc, 1, X64_RAX, X64_RDI);
+ } else if (val->storage.kind == OPK_REG) {
+ if (val->storage.cls == RC_FP) {
+ u8 prefix2 = type_is_fp_double(val->storage.type) ? 0xF2 : 0xF3;
+ u32 sr = val->storage.v.reg & 0xFu;
+ if (sr != X64_XMM0) emit_sse_rr(mc, prefix2, 0x10, X64_XMM0, sr);
+ } else {
+ int w = type_is_64(val->storage.type) ? 1 : 0;
+ u32 sr = val->storage.v.reg & 0xFu;
+ if (sr != X64_RAX) emit_mov_rr(mc, w, X64_RAX, sr);
+ }
+ } else if (val->storage.kind == OPK_IMM) {
+ int w = type_is_64(val->storage.type) ? 1 : 0;
+ x64_emit_load_imm(mc, w, X64_RAX, val->storage.v.imm);
+ } else if (val->storage.kind == OPK_LOCAL ||
+ val->storage.kind == OPK_INDIRECT) {
+ /* DIRECT struct return: load each part into rax/rdx or xmm0/xmm1. */
+ u32 base_reg;
+ i32 base_off;
+ if (val->storage.kind == OPK_LOCAL) {
+ XSlot* s = x64_slot_get(a, val->storage.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "x64 ret: bad local slot");
+ base_reg = X64_RBP;
+ base_off = -(i32)s->off;
+ } else {
+ base_reg = val->storage.v.ind.base & 0xFu;
+ base_off = val->storage.v.ind.ofs;
+ }
+ const ABIArgInfo* ri2 = val->abi;
+ u32 next_int_ret = 0, next_fp_ret = 0;
+ static const u32 ret_int_regs[2] = {X64_RAX, X64_RDX};
+ for (u16 i = 0; i < (ri2 ? ri2->nparts : 0); ++i) {
+ const ABIArgPart* pt = &ri2->parts[i];
+ i32 off = base_off + (i32)pt->src_offset;
+ if (pt->cls == ABI_CLASS_INT) {
+ emit_mov_load(mc, pt->size, 0, ret_int_regs[next_int_ret++],
+ base_reg, off);
+ } else if (pt->cls == ABI_CLASS_FP) {
+ u8 prefix2 = (pt->size == 8) ? 0xF2 : 0xF3;
+ emit_sse_load(mc, prefix2, 0x10, (u32)(X64_XMM0 + next_fp_ret++),
+ base_reg, off);
+ } else {
+ compiler_panic(t->c, a->loc, "x64 ret: ret part cls %d unimpl",
+ (int)pt->cls);
+ }
+ }
+ }
+ }
+ emit_jmp_label(mc, a->epilogue_label);
+}
+
+/* ============================================================
+ * Alloca / VLA.
+ *
+ * Layout (low → high addresses, after a `sub rsp, aligned_size`):
+ * [rsp + 0, +max_outgoing): outgoing-arg area
+ * [rsp + max_outgoing, +max_outgoing +aligned_size): newly allocated block
+ *
+ * max_outgoing is only known at func_end (it is the max across all
+ * x_call sites in the function), so each alloca emits a placeholder
+ * `lea dst, [rsp + 0]` whose 4-byte disp is patched at func_end. The
+ * epilogue restores rsp via `leave` (mov rsp, rbp; pop rbp), so no
+ * extra dance is needed when alloca is present. */
+
+static void emit_lea_rsp_disp32(MCEmitter* mc, u32 dst, u32* out_disp_pos) {
+ /* Force the disp32 form (mod=10, rm=SIB, base=rsp, no index, scale=0)
+ * regardless of the displacement value so func_end has a fixed-width
+ * field to patch. 8 bytes: REX.W [+R] | 0x8D | ModRM | SIB | disp32. */
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex(mc, 1, dst, 0, X64_RSP);
+ u8 op = 0x8D;
+ mc->emit_bytes(mc, &op, 1);
+ u8 mr = modrm(2u, dst & 7u, 4u);
+ mc->emit_bytes(mc, &mr, 1);
+ u8 s = sib(0, 4u, X64_RSP);
+ mc->emit_bytes(mc, &s, 1);
+ *out_disp_pos = mc->pos(mc);
+ emit_u32le(mc, 0);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+static void x_alloca_(CGTarget* t, Operand d, Operand sz, u32 align) {
+ XImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+ if (d.kind != OPK_REG)
+ compiler_panic(t->c, a->loc, "x64 alloca: dst must be REG");
+ if (align > 16) {
+ compiler_panic(t->c, a->loc,
+ "x64 alloca: align %u > 16 not yet supported", align);
+ }
+
+ if (sz.kind == OPK_IMM) {
+ i64 v = sz.v.imm;
+ if (v < 0) compiler_panic(t->c, a->loc, "x64 alloca: negative size");
+ u64 aligned = ((u64)v + 15u) & ~(u64)15u;
+ if (aligned == 0) aligned = 16;
+ /* sub rsp, imm32 : REX.W 0x81 /5 imm32 (7 bytes). */
+ emit_rex(mc, 1, 0, 0, X64_RSP);
+ u8 buf[2] = {0x81, modrm(3u, 5u, X64_RSP)};
+ mc->emit_bytes(mc, buf, 2);
+ emit_u32le(mc, (u32)aligned);
+ } else if (sz.kind == OPK_REG) {
+ u32 sz_reg = sz.v.reg & 0xFu;
+ /* rax = (sz_reg + 15) & ~15 */
+ emit_lea(mc, X64_RAX, sz_reg, 15);
+ /* and rax, -16 : REX.W 0x83 /4 imm8(0xF0). */
+ emit_rex(mc, 1, 0, 0, X64_RAX);
+ u8 abuf[3] = {0x83, modrm(3u, 4u, X64_RAX), 0xF0};
+ mc->emit_bytes(mc, abuf, 3);
+ /* sub rsp, rax */
+ emit_alu_rr(mc, 1, 0x29, X64_RSP, X64_RAX);
+ } else {
+ compiler_panic(t->c, a->loc, "x64 alloca: size kind %d unsupported",
+ (int)sz.kind);
+ }
+
+ /* lea dst, [rsp + max_outgoing] — placeholder, patched at func_end. */
+ if (a->nalloca_patches == a->alloca_patches_cap) {
+ u32 ncap = a->alloca_patches_cap ? a->alloca_patches_cap * 2u : 4u;
+ XAllocaPatch* nb = arena_array(t->c->tu, XAllocaPatch, ncap);
+ if (a->alloca_patches)
+ memcpy(nb, a->alloca_patches, sizeof(XAllocaPatch) * a->nalloca_patches);
+ a->alloca_patches = nb;
+ a->alloca_patches_cap = ncap;
+ }
+ u32 disp_pos;
+ emit_lea_rsp_disp32(mc, d.v.reg & 0xFu, &disp_pos);
+ a->alloca_patches[a->nalloca_patches].disp_pos = disp_pos;
+ a->nalloca_patches++;
+ a->has_alloca = 1;
+}
+
+/* SysV AMD64 __va_list_tag (24 bytes, 8-aligned):
+ * off 0 u32 gp_offset next free GP slot in reg_save_area (0..48)
+ * off 4 u32 fp_offset next free FP slot (48..176)
+ * off 8 ptr overflow_arg_area pointer to next stack-passed arg
+ * off 16 ptr reg_save_area pointer to the 176-byte save area
+ *
+ * The reg_save_area layout (filled in func_begin):
+ * +0..+40 : rdi, rsi, rdx, rcx, r8, r9 (8B each)
+ * +48..+168 : xmm0..xmm7 at 16B stride (low 8B written via movsd)
+ *
+ * va_arg dispatches on dst class. When the relevant offset reaches its
+ * max (48 for GP, 176 for FP), fall through to overflow_arg_area at
+ * 8-byte stride. */
+
+static void x_va_start_(CGTarget* t, Operand ap_op) {
+ XImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+ if (!a->is_variadic)
+ compiler_panic(t->c, a->loc, "x64 va_start: function not variadic");
+ u32 ap = ap_op.v.reg & 0xFu;
+ XSlot* rs = x64_slot_get(a, a->reg_save_slot);
+ if (!rs) compiler_panic(t->c, a->loc, "x64 va_start: no reg_save_slot");
+
+ /* gp_offset = next_param_int * 8 */
+ x64_emit_load_imm(mc, 0, X64_RAX, (i64)(a->next_param_int * 8u));
+ emit_mov_store(mc, 4, X64_RAX, ap, 0);
+ /* fp_offset = 48 + next_param_fp * 16 */
+ x64_emit_load_imm(mc, 0, X64_RAX, (i64)(48u + a->next_param_fp * 16u));
+ emit_mov_store(mc, 4, X64_RAX, ap, 4);
+ /* overflow_arg_area = rbp + 16 + next_param_stack */
+ emit_lea(mc, X64_RAX, X64_RBP, (i32)(16u + a->next_param_stack));
+ emit_mov_store(mc, 8, X64_RAX, ap, 8);
+ /* reg_save_area = rbp - reg_save_slot.off */
+ emit_lea(mc, X64_RAX, X64_RBP, -(i32)rs->off);
+ emit_mov_store(mc, 8, X64_RAX, ap, 16);
+}
+
+static void x_va_arg_(CGTarget* t, Operand dst, Operand ap_op,
+ const Type* ty) {
+ MCEmitter* mc = t->mc;
+ u32 ap = ap_op.v.reg & 0xFu;
+ u32 sz = type_byte_size(ty);
+ int is_fp = (dst.cls == RC_FP);
+ u32 offs_field = is_fp ? 4u : 0u;
+ u32 max_offs = is_fp ? 176u : 48u;
+ u32 stride = is_fp ? 16u : 8u;
+ u32 dr = dst.v.reg & 0xFu;
+
+ MCLabel L_stack = mc->label_new(mc);
+ MCLabel L_done = mc->label_new(mc);
+
+ /* eax = ap[offs_field]; cmp eax, max_offs; jae L_stack. */
+ emit_mov_load(mc, 4, 0, X64_RAX, ap, (i32)offs_field);
+ if (max_offs <= 127u) {
+ emit_cmp_imm8(mc, 0, X64_RAX, (i8)max_offs);
+ } else {
+ /* cmp eax, imm32 : 0x3D imm32 (5 bytes, EAX-specific form). */
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ u8 op = 0x3D;
+ mc->emit_bytes(mc, &op, 1);
+ emit_u32le(mc, max_offs);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+ }
+ emit_jcc_label(mc, X64_CC_AE, L_stack);
+
+ /* Reg path:
+ * r11 = ap[16] (reg_save_area)
+ * r11 = r11 + rax
+ * load dst from [r11 + 0]
+ * eax += stride; ap[offs_field] = eax
+ * jmp L_done */
+ emit_mov_load(mc, 8, 0, X64_R11, ap, 16);
+ emit_alu_rr(mc, 1, 0x01, X64_R11, X64_RAX);
+ if (is_fp) {
+ u8 prefix = (sz == 8) ? 0xF2 : 0xF3;
+ emit_sse_load(mc, prefix, 0x10, dr, X64_R11, 0);
+ } else {
+ int sx = type_is_signed(ty);
+ emit_mov_load(mc, sz, sx, dr, X64_R11, 0);
+ }
+ /* add eax, imm8 : 0x83 /0 imm8 (no REX needed for eax). */
+ {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ u8 buf[3] = {0x83, modrm(3u, 0u, X64_RAX), (u8)stride};
+ mc->emit_bytes(mc, buf, 3);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+ }
+ emit_mov_store(mc, 4, X64_RAX, ap, (i32)offs_field);
+ emit_jmp_label(mc, L_done);
+
+ /* L_stack:
+ * r11 = ap[8] (overflow_arg_area)
+ * load dst from [r11 + 0]
+ * r11 += 8; ap[8] = r11 */
+ mc->label_place(mc, L_stack);
+ emit_mov_load(mc, 8, 0, X64_R11, ap, 8);
+ if (is_fp) {
+ u8 prefix = (sz == 8) ? 0xF2 : 0xF3;
+ emit_sse_load(mc, prefix, 0x10, dr, X64_R11, 0);
+ } else {
+ int sx = type_is_signed(ty);
+ emit_mov_load(mc, sz, sx, dr, X64_R11, 0);
+ }
+ /* add r11, 8 : REX.WB 0x83 /0 imm8. */
+ {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ u8 rex = (u8)(X64_REX_BASE | X64_REX_W | X64_REX_B);
+ mc->emit_bytes(mc, &rex, 1);
+ u8 buf[3] = {0x83, modrm(3u, 0u, X64_R11 & 7u), 8};
+ mc->emit_bytes(mc, buf, 3);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+ }
+ emit_mov_store(mc, 8, X64_R11, ap, 8);
+
+ mc->label_place(mc, L_done);
+}
+
+static void x_va_end_(CGTarget* t, Operand a) {
+ (void)t;
+ (void)a;
+}
+
+static void x_va_copy_(CGTarget* t, Operand d, Operand s) {
+ MCEmitter* mc = t->mc;
+ u32 dr = d.v.reg & 0xFu;
+ u32 sr = s.v.reg & 0xFu;
+ /* va_list is 24 bytes; three 8B loads + stores via rax. */
+ for (u32 i = 0; i < 24u; i += 8u) {
+ emit_mov_load(mc, 8, 0, X64_RAX, sr, (i32)i);
+ emit_mov_store(mc, 8, X64_RAX, dr, (i32)i);
+ }
+}
+
+/* ============================================================
+ * Atomics (Group K).
+ *
+ * x86 has a strong memory model: plain MOV is acquire on loads and
+ * release on stores, so most MemOrders need no extra fence. The
+ * exception is SEQ_CST stores, which need a full StoreLoad barrier —
+ * realized either via XCHG (which has implicit LOCK) or MOV+MFENCE.
+ * All LOCK-prefixed RMWs (XADD/XCHG/CMPXCHG) act as full barriers,
+ * subsuming any MemOrder the front end requests. */
+
+static void emit_lock_prefix(MCEmitter* mc) {
+ u8 b = 0xF0;
+ mc->emit_bytes(mc, &b, 1);
+}
+
+static void emit_mfence(MCEmitter* mc) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ u8 b[3] = {0x0F, 0xAE, 0xF0};
+ mc->emit_bytes(mc, b, 3);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+static void emit_ud2(MCEmitter* mc) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ u8 b[2] = {0x0F, 0x0B};
+ mc->emit_bytes(mc, b, 2);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* LOCK XADD [base+disp], src. Opcode 0F C1 /r (32/64-bit; sets src=prior,
+ * mem=mem+src). */
+static void emit_lock_xadd(MCEmitter* mc, int w, u32 src, u32 base, i32 disp) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_lock_prefix(mc);
+ emit_rex(mc, w, src, 0, base);
+ u8 op[2] = {0x0F, 0xC1};
+ mc->emit_bytes(mc, op, 2);
+ emit_mem_operand(mc, src, base, disp);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* XCHG [base+disp], src. Opcode 87 /r. LOCK is implicit when the
+ * destination is memory, but we emit it explicitly for clarity. */
+static void emit_lock_xchg_mem(MCEmitter* mc, int w, u32 src, u32 base,
+ i32 disp) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_lock_prefix(mc);
+ emit_rex(mc, w, src, 0, base);
+ u8 op = 0x87;
+ mc->emit_bytes(mc, &op, 1);
+ emit_mem_operand(mc, src, base, disp);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* LOCK CMPXCHG [base+disp], src. Opcode 0F B1 /r. Compares RAX with [mem];
+ * if equal, [mem]=src and ZF=1; else RAX=[mem] and ZF=0. */
+static void emit_lock_cmpxchg(MCEmitter* mc, int w, u32 src, u32 base,
+ i32 disp) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_lock_prefix(mc);
+ emit_rex(mc, w, src, 0, base);
+ u8 op[2] = {0x0F, 0xB1};
+ mc->emit_bytes(mc, op, 2);
+ emit_mem_operand(mc, src, base, disp);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* POPCNT rd, rs. Encoding: F3 0F B8 /r. */
+static void emit_popcnt(MCEmitter* mc, int w, u32 dst, u32 src) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ u8 p = 0xF3;
+ mc->emit_bytes(mc, &p, 1);
+ emit_rex(mc, w, dst, 0, src);
+ u8 op[2] = {0x0F, 0xB8};
+ mc->emit_bytes(mc, op, 2);
+ emit_rm_reg(mc, dst, src);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* BSF/BSR rd, rs. opcode2 = 0xBC (BSF) or 0xBD (BSR). */
+static void emit_bs(MCEmitter* mc, int w, u8 opcode2, u32 dst, u32 src) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex(mc, w, dst, 0, src);
+ u8 op[2] = {0x0F, opcode2};
+ mc->emit_bytes(mc, op, 2);
+ emit_rm_reg(mc, dst, src);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* BSWAP r32/r64. Opcode 0F C8+r; REX.W for r64; REX.B if reg>=8. */
+static void emit_bswap(MCEmitter* mc, int w, u32 reg) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex(mc, w, 0, 0, reg);
+ u8 op[2] = {0x0F, (u8)(0xC8 + (reg & 7))};
+ mc->emit_bytes(mc, op, 2);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* ROL r/m16, imm8. Used to swap bytes in a 16-bit value (ROL by 8). */
+static void emit_rol16_imm8(MCEmitter* mc, u32 reg, u8 imm) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ u8 p = 0x66;
+ mc->emit_bytes(mc, &p, 1);
+ emit_rex(mc, 0, 0, 0, reg);
+ u8 buf[3];
+ buf[0] = 0xC1;
+ buf[1] = modrm(3u, 0u, reg & 7u);
+ buf[2] = imm;
+ mc->emit_bytes(mc, buf, 3);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* XOR r/m, imm32 — opcode 81 /6. Used to compute (bits-1) - x via XOR. */
+static void emit_xor_imm32(MCEmitter* mc, int w, u32 reg, i32 imm) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex(mc, w, 0, 0, reg);
+ u8 op = 0x81;
+ mc->emit_bytes(mc, &op, 1);
+ emit_rm_reg(mc, 6u, reg);
+ emit_u32le(mc, (u32)imm);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* Resolve an atomic addr operand to (base, disp) for a memory operand.
+ * Accepts OPK_REG (pointer in reg, disp=0), OPK_LOCAL, or OPK_INDIRECT. */
+static u32 atomic_addr_base(CGTarget* t, Operand addr, i32* out_disp) {
+ if (addr.kind == OPK_REG) {
+ *out_disp = 0;
+ return addr.v.reg & 0xFu;
+ }
+ return addr_base(t, addr, out_disp);
+}
+
+static void x_atomic_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma,
+ MemOrder ord) {
+ MCEmitter* mc = t->mc;
+ (void)ord; /* x86: plain MOV satisfies all orders for loads. */
+ u32 sz = ma.size ? ma.size : type_byte_size(dst.type);
+ i32 disp;
+ u32 base = atomic_addr_base(t, addr, &disp);
+ int signed_ = type_is_signed(ma.type ? ma.type : dst.type);
+ emit_mov_load(mc, sz, signed_, dst.v.reg & 0xFu, base, disp);
+}
+
+static void x_atomic_store(CGTarget* t, Operand addr, Operand src, MemAccess ma,
+ MemOrder ord) {
+ XImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+ u32 sz = ma.size ? ma.size : type_byte_size(src.type);
+ int w = (sz == 8) ? 1 : 0;
+ i32 disp;
+ u32 base = atomic_addr_base(t, addr, &disp);
+
+ /* Materialize src into a register. */
+ u32 sr;
+ if (src.kind == OPK_IMM) {
+ x64_emit_load_imm(mc, w, X64_R11, src.v.imm);
+ sr = X64_R11;
+ } else if (src.kind == OPK_REG) {
+ sr = src.v.reg & 0xFu;
+ } else {
+ compiler_panic(t->c, a->loc, "x64 atomic_store: src kind %d unsupported",
+ (int)src.kind);
+ }
+
+ if (ord == MO_SEQ_CST) {
+ /* SEQ_CST store: XCHG implicitly fences. Move src into r11 so the
+ * caller's reg is unmodified, then xchg [mem], r11. */
+ if (sr != X64_R11) emit_mov_rr(mc, w, X64_R11, sr);
+ emit_lock_xchg_mem(mc, w, X64_R11, base, disp);
+ return;
+ }
+ /* Plain store covers RELAXED / RELEASE. */
+ emit_mov_store(mc, sz, sr, base, disp);
+}
+
+static void x_atomic_rmw(CGTarget* t, AtomicOp op, Operand dst, Operand addr,
+ Operand val, MemAccess ma, MemOrder ord) {
+ XImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+ (void)ord; /* LOCK-prefixed ops are unconditionally full barriers. */
+ u32 sz = ma.size ? ma.size : type_byte_size(dst.type);
+ int w = (sz == 8) ? 1 : 0;
+ i32 disp;
+ u32 base = atomic_addr_base(t, addr, &disp);
+ u32 dr = dst.v.reg & 0xFu;
+
+ /* Materialize val into r11 (it's our working temp). For SUB we negate
+ * it so the XADD does the subtraction. */
+ if (val.kind == OPK_IMM) {
+ i64 v = val.v.imm;
+ if (op == AO_SUB) v = -v;
+ x64_emit_load_imm(mc, w, X64_R11, v);
+ } else if (val.kind == OPK_REG) {
+ u32 vr = val.v.reg & 0xFu;
+ if (vr != X64_R11) emit_mov_rr(mc, w, X64_R11, vr);
+ if (op == AO_SUB) emit_f7_rm(mc, w, 3u, X64_R11); /* NEG */
+ } else {
+ compiler_panic(t->c, a->loc, "x64 atomic_rmw: val kind %d unsupported",
+ (int)val.kind);
+ }
+
+ if (op == AO_ADD || op == AO_SUB) {
+ /* LOCK XADD [base], r11 — afterwards r11 holds prior. */
+ emit_lock_xadd(mc, w, X64_R11, base, disp);
+ if (dr != X64_R11) emit_mov_rr(mc, w, dr, X64_R11);
+ return;
+ }
+ if (op == AO_XCHG) {
+ emit_lock_xchg_mem(mc, w, X64_R11, base, disp);
+ if (dr != X64_R11) emit_mov_rr(mc, w, dr, X64_R11);
+ return;
+ }
+
+ /* AND/OR/XOR/NAND: CMPXCHG retry loop.
+ *
+ * mov rax, [mem]
+ * .retry:
+ * mov rcx, rax ; new = prior
+ * <op> rcx, r11 ; combine with val
+ * [NAND: not rcx]
+ * lock cmpxchg [mem], rcx
+ * jne .retry
+ * mov dr, rax
+ *
+ * rax = prior (cmpxchg implicit), rcx = new (scratch), r11 = val. */
+ emit_mov_load(mc, sz, 0, X64_RAX, base, disp);
+ MCLabel L_retry = mc->label_new(mc);
+ mc->label_place(mc, L_retry);
+ emit_mov_rr(mc, w, X64_RCX, X64_RAX);
+ switch (op) {
+ case AO_AND:
+ emit_alu_rr(mc, w, 0x21, X64_RCX, X64_R11);
+ break;
+ case AO_OR:
+ emit_alu_rr(mc, w, 0x09, X64_RCX, X64_R11);
+ break;
+ case AO_XOR:
+ emit_alu_rr(mc, w, 0x31, X64_RCX, X64_R11);
+ break;
+ case AO_NAND:
+ emit_alu_rr(mc, w, 0x21, X64_RCX, X64_R11);
+ emit_f7_rm(mc, w, 2u, X64_RCX); /* NOT */
+ break;
+ default:
+ compiler_panic(t->c, a->loc, "x64 atomic_rmw: op %d unimpl", (int)op);
+ }
+ emit_lock_cmpxchg(mc, w, X64_RCX, base, disp);
+ emit_jcc_label(mc, X64_CC_NE, L_retry);
+ if (dr != X64_RAX) emit_mov_rr(mc, w, dr, X64_RAX);
+}
+
+static void x_atomic_cas(CGTarget* t, Operand prior, Operand ok, Operand addr,
+ Operand expected, Operand desired, MemAccess ma,
+ MemOrder succ, MemOrder fail) {
+ XImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+ (void)succ;
+ (void)fail;
+ u32 sz = ma.size ? ma.size : type_byte_size(prior.type);
+ int w = (sz == 8) ? 1 : 0;
+ i32 disp;
+ u32 base = atomic_addr_base(t, addr, &disp);
+
+ /* RAX = expected. */
+ if (expected.kind == OPK_IMM) {
+ x64_emit_load_imm(mc, w, X64_RAX, expected.v.imm);
+ } else if (expected.kind == OPK_REG) {
+ u32 er = expected.v.reg & 0xFu;
+ if (er != X64_RAX) emit_mov_rr(mc, w, X64_RAX, er);
+ } else {
+ compiler_panic(t->c, a->loc, "x64 atomic_cas: exp kind %d unsupported",
+ (int)expected.kind);
+ }
+ /* R11 = desired. */
+ if (desired.kind == OPK_IMM) {
+ x64_emit_load_imm(mc, w, X64_R11, desired.v.imm);
+ } else if (desired.kind == OPK_REG) {
+ u32 dr2 = desired.v.reg & 0xFu;
+ if (dr2 != X64_R11) emit_mov_rr(mc, w, X64_R11, dr2);
+ } else {
+ compiler_panic(t->c, a->loc, "x64 atomic_cas: des kind %d unsupported",
+ (int)desired.kind);
+ }
+
+ emit_lock_cmpxchg(mc, w, X64_R11, base, disp);
+
+ /* ok = ZF (success). */
+ u32 ok_r = ok.v.reg & 0xFu;
+ emit_setcc(mc, X64_CC_E, ok_r);
+ emit_movzx_r32_r8(mc, ok_r, ok_r);
+
+ /* prior = rax. */
+ u32 pr = prior.v.reg & 0xFu;
+ if (pr != X64_RAX) emit_mov_rr(mc, w, pr, X64_RAX);
+}
+
+static void x_fence(CGTarget* t, MemOrder o) {
+ /* x86: only SEQ_CST needs an explicit StoreLoad barrier. RELAXED is
+ * a no-op; ACQUIRE/RELEASE/ACQ_REL are satisfied by plain MOV. */
+ if (o == MO_SEQ_CST) emit_mfence(t->mc);
+}
+
+/* ============================================================
+ * Intrinsics (Group L). */
+
+static void x_intrinsic(CGTarget* t, IntrinKind kind, Operand* dsts, u32 nd,
+ const Operand* args, u32 na) {
+ XImpl* a = impl_of(t);
+ MCEmitter* mc = t->mc;
+ (void)nd;
+ (void)na;
+
+ switch (kind) {
+ case INTRIN_POPCOUNT: {
+ Operand src = args[0];
+ Operand dst = dsts[0];
+ int w = type_is_64(src.type) ? 1 : 0;
+ emit_popcnt(mc, w, dst.v.reg & 0xFu, src.v.reg & 0xFu);
+ return;
+ }
+ case INTRIN_CTZ: {
+ /* BSF gives the index of the lowest set bit (undefined for 0). */
+ Operand src = args[0];
+ Operand dst = dsts[0];
+ int w = type_is_64(src.type) ? 1 : 0;
+ emit_bs(mc, w, 0xBC, dst.v.reg & 0xFu, src.v.reg & 0xFu);
+ return;
+ }
+ case INTRIN_CLZ: {
+ /* BSR gives the index of the highest set bit; clz = (bits-1) - bsr.
+ * XOR with (bits-1) computes the subtraction for in-range values. */
+ Operand src = args[0];
+ Operand dst = dsts[0];
+ int w = type_is_64(src.type) ? 1 : 0;
+ u32 dr = dst.v.reg & 0xFu;
+ emit_bs(mc, w, 0xBD, dr, src.v.reg & 0xFu);
+ emit_xor_imm32(mc, w, dr, w ? 63 : 31);
+ return;
+ }
+ case INTRIN_BSWAP16: {
+ Operand src = args[0];
+ Operand dst = dsts[0];
+ u32 dr = dst.v.reg & 0xFu;
+ u32 sr = src.v.reg & 0xFu;
+ if (dr != sr) emit_mov_rr(mc, 0, dr, sr);
+ emit_rol16_imm8(mc, dr, 8);
+ return;
+ }
+ case INTRIN_BSWAP32: {
+ Operand src = args[0];
+ Operand dst = dsts[0];
+ u32 dr = dst.v.reg & 0xFu;
+ u32 sr = src.v.reg & 0xFu;
+ if (dr != sr) emit_mov_rr(mc, 0, dr, sr);
+ emit_bswap(mc, 0, dr);
+ return;
+ }
+ case INTRIN_BSWAP64: {
+ Operand src = args[0];
+ Operand dst = dsts[0];
+ u32 dr = dst.v.reg & 0xFu;
+ u32 sr = src.v.reg & 0xFu;
+ if (dr != sr) emit_mov_rr(mc, 1, dr, sr);
+ emit_bswap(mc, 1, dr);
+ return;
+ }
+ case INTRIN_MEMCPY:
+ case INTRIN_MEMMOVE: {
+ /* args = (dst_addr, src_addr, n_bytes). v1: const n, REG ptrs. */
+ Operand da = args[0], sa = args[1], nb = args[2];
+ if (da.kind != OPK_REG || sa.kind != OPK_REG || nb.kind != OPK_IMM) {
+ compiler_panic(t->c, a->loc,
+ "x64 intrinsic: %s with non-const n or non-REG ptr",
+ kind == INTRIN_MEMCPY ? "memcpy" : "memmove");
+ }
+ u32 dr = da.v.reg & 0xFu;
+ u32 sr = sa.v.reg & 0xFu;
+ u32 n = (u32)nb.v.imm;
+ if (kind == INTRIN_MEMCPY) {
+ u32 i = 0;
+ while (i + 8 <= n) {
+ emit_mov_load(mc, 8, 0, X64_RAX, sr, (i32)i);
+ emit_mov_store(mc, 8, X64_RAX, dr, (i32)i);
+ i += 8;
+ }
+ while (i + 4 <= n) {
+ emit_mov_load(mc, 4, 0, X64_RAX, sr, (i32)i);
+ emit_mov_store(mc, 4, X64_RAX, dr, (i32)i);
+ i += 4;
+ }
+ while (i + 2 <= n) {
+ emit_mov_load(mc, 2, 0, X64_RAX, sr, (i32)i);
+ emit_mov_store(mc, 2, X64_RAX, dr, (i32)i);
+ i += 2;
+ }
+ while (i < n) {
+ emit_mov_load(mc, 1, 0, X64_RAX, sr, (i32)i);
+ emit_mov_store(mc, 1, X64_RAX, dr, (i32)i);
+ i += 1;
+ }
+ } else {
+ /* memmove: copy backward so dst>src overlap is safe. */
+ u32 i = n;
+ while (i >= 8) {
+ i -= 8;
+ emit_mov_load(mc, 8, 0, X64_RAX, sr, (i32)i);
+ emit_mov_store(mc, 8, X64_RAX, dr, (i32)i);
+ }
+ while (i >= 4) {
+ i -= 4;
+ emit_mov_load(mc, 4, 0, X64_RAX, sr, (i32)i);
+ emit_mov_store(mc, 4, X64_RAX, dr, (i32)i);
+ }
+ while (i >= 2) {
+ i -= 2;
+ emit_mov_load(mc, 2, 0, X64_RAX, sr, (i32)i);
+ emit_mov_store(mc, 2, X64_RAX, dr, (i32)i);
+ }
+ while (i >= 1) {
+ i -= 1;
+ emit_mov_load(mc, 1, 0, X64_RAX, sr, (i32)i);
+ emit_mov_store(mc, 1, X64_RAX, dr, (i32)i);
+ }
+ }
+ return;
+ }
+ case INTRIN_MEMSET: {
+ /* args = (dst_addr, byte, n). */
+ Operand da = args[0], bv = args[1], nb = args[2];
+ if (da.kind != OPK_REG || nb.kind != OPK_IMM) {
+ compiler_panic(t->c, a->loc,
+ "x64 intrinsic: memset with non-const n / non-REG ptr");
+ }
+ u32 dr = da.v.reg & 0xFu;
+ u32 n = (u32)nb.v.imm;
+ /* Build a 64-bit value with the byte broadcast across all 8 bytes. */
+ if (bv.kind == OPK_IMM) {
+ u8 byte = (u8)(bv.v.imm & 0xffu);
+ u64 b64 = byte;
+ b64 |= b64 << 8;
+ b64 |= b64 << 16;
+ b64 |= b64 << 32;
+ x64_emit_load_imm(mc, 1, X64_RAX, (i64)b64);
+ } else if (bv.kind == OPK_REG) {
+ /* Broadcast low byte of bv across 8 bytes: rax = bv * 0x0101010101010101. */
+ x64_emit_load_imm(mc, 1, X64_R11, (i64)0x0101010101010101ll);
+ emit_mov_rr(mc, 1, X64_RAX, bv.v.reg & 0xFu);
+ emit_imul_rr(mc, 1, X64_RAX, X64_R11);
+ } else {
+ compiler_panic(t->c, a->loc,
+ "x64 intrinsic: memset byte kind %d unsupported",
+ (int)bv.kind);
+ }
+ u32 i = 0;
+ while (i + 8 <= n) {
+ emit_mov_store(mc, 8, X64_RAX, dr, (i32)i);
+ i += 8;
+ }
+ while (i + 4 <= n) {
+ emit_mov_store(mc, 4, X64_RAX, dr, (i32)i);
+ i += 4;
+ }
+ while (i + 2 <= n) {
+ emit_mov_store(mc, 2, X64_RAX, dr, (i32)i);
+ i += 2;
+ }
+ while (i < n) {
+ emit_mov_store(mc, 1, X64_RAX, dr, (i32)i);
+ i += 1;
+ }
+ return;
+ }
+ case INTRIN_PREFETCH:
+ /* Drop the hint. */
+ return;
+ case INTRIN_ASSUME_ALIGNED: {
+ /* dst = src (alignment is a hint only). */
+ Operand src = args[0];
+ Operand dst = dsts[0];
+ u32 dr = dst.v.reg & 0xFu;
+ u32 sr = src.v.reg & 0xFu;
+ if (dr != sr) emit_mov_rr(mc, 1, dr, sr);
+ return;
+ }
+ case INTRIN_EXPECT: {
+ /* dst = val; expected hint dropped. */
+ Operand val = args[0];
+ Operand dst = dsts[0];
+ int w = type_is_64(dst.type) ? 1 : 0;
+ u32 dr = dst.v.reg & 0xFu;
+ if (val.kind == OPK_REG) {
+ u32 sr = val.v.reg & 0xFu;
+ if (sr != dr) emit_mov_rr(mc, w, dr, sr);
+ } else if (val.kind == OPK_IMM) {
+ x64_emit_load_imm(mc, w, dr, val.v.imm);
+ } else {
+ compiler_panic(t->c, a->loc,
+ "x64 intrinsic: expect val kind %d unsupported",
+ (int)val.kind);
+ }
+ return;
+ }
+ case INTRIN_UNREACHABLE:
+ case INTRIN_TRAP:
+ emit_ud2(mc);
+ return;
+ case INTRIN_ADD_OVERFLOW:
+ case INTRIN_SUB_OVERFLOW: {
+ /* dsts: [val, ovf]. ADD/SUB sets OF on signed overflow; SETO captures. */
+ Operand a_op = args[0], b_op = args[1];
+ Operand dval = dsts[0], dovf = dsts[1];
+ int w = type_is_64(dval.type) ? 1 : 0;
+ u32 rd = dval.v.reg & 0xFu;
+ u32 ra = x64_force_reg_int(t, a_op, w, X64_RAX);
+ if (rd != ra) emit_mov_rr(mc, w, rd, ra);
+ u32 rb = x64_force_reg_int(t, b_op, w, X64_R11);
+ u8 op = (kind == INTRIN_ADD_OVERFLOW) ? 0x01 : 0x29;
+ emit_alu_rr(mc, w, op, rd, rb);
+ u32 dovf_r = dovf.v.reg & 0xFu;
+ emit_setcc(mc, X64_CC_O, dovf_r);
+ emit_movzx_r32_r8(mc, dovf_r, dovf_r);
+ return;
+ }
+ case INTRIN_MUL_OVERFLOW: {
+ /* dsts: [val, ovf]. IMUL r32, r/m32 (0F AF /r) is the signed
+ * two-operand form: low 32 bits of product go to dst, OF set if
+ * the result didn't fit. i64 not yet supported. */
+ Operand a_op = args[0], b_op = args[1];
+ Operand dval = dsts[0], dovf = dsts[1];
+ int w = type_is_64(dval.type) ? 1 : 0;
+ if (w) {
+ compiler_panic(t->c, a->loc,
+ "x64 intrinsic: mul_overflow on i64 not yet supported");
+ }
+ u32 rd = dval.v.reg & 0xFu;
+ u32 ra = x64_force_reg_int(t, a_op, w, X64_RAX);
+ if (rd != ra) emit_mov_rr(mc, w, rd, ra);
+ u32 rb = x64_force_reg_int(t, b_op, w, X64_R11);
+ emit_imul_rr(mc, w, rd, rb);
+ u32 dovf_r = dovf.v.reg & 0xFu;
+ emit_setcc(mc, X64_CC_O, dovf_r);
+ emit_movzx_r32_r8(mc, dovf_r, dovf_r);
+ return;
+ }
+ default:
+ compiler_panic(t->c, a->loc, "x64 intrinsic: kind %d unsupported",
+ (int)kind);
+ }
+}
+static void x_asm_block(CGTarget* t, const char* tmpl,
+ const AsmConstraint* outs, u32 no, Operand* oo,
+ const AsmConstraint* ins, u32 ni, const Operand* io,
+ const Sym* clobs, u32 nc) {
+ (void)tmpl;
+ (void)outs;
+ (void)no;
+ (void)oo;
+ (void)ins;
+ (void)ni;
+ (void)io;
+ (void)clobs;
+ (void)nc;
+ x_panic(t, "asm_block");
+}
+
+static void x_set_loc(CGTarget* t, SrcLoc l) {
+ ((XImpl*)t)->loc = l;
+ if (t->mc) t->mc->set_loc(t->mc, l);
+}
+
+static void x_finalize(CGTarget* t) { (void)t; }
+static void x_destroy(CGTarget* t) { (void)t; }
+
+static void cgt_cleanup(void* arg) { cgtarget_free((CGTarget*)arg); }
+
+CGTarget* x64_cgtarget_new(Compiler* c, ObjBuilder* o, MCEmitter* m) {
+ XImpl* x = arena_new(c->tu, XImpl);
+ memset(x, 0, sizeof *x);
+
+ CGTarget* t = &x->base;
+ t->c = c;
+ t->obj = o;
+ t->mc = m;
+
+ t->func_begin = x_func_begin;
+ t->func_end = x_func_end;
+
+ t->alloc_reg = x_alloc_reg;
+ t->free_reg = x_free_reg;
+ t->frame_slot = x_frame_slot;
+ t->param = x_param;
+ t->clobbers = x_clobbers;
+ t->spill_reg = x_spill_reg;
+ t->reload_reg = x_reload_reg;
+
+ t->label_new = x_label_new;
+ t->label_place = x_label_place;
+ t->jump = x_jump;
+ t->cmp_branch = x_cmp_branch;
+
+ t->scope_begin = x_scope_begin;
+ t->scope_else = x_scope_else;
+ t->scope_end = x_scope_end;
+ t->break_to = x_break_to;
+ t->continue_to = x_continue_to;
+
+ t->load_imm = x_load_imm;
+ t->load_const = x_load_const;
+ t->copy = x_copy;
+ t->load = x_load;
+ t->store = x_store;
+ t->addr_of = x_addr_of;
+ t->tls_addr_of = x_tls_addr_of;
+ t->copy_bytes = x_copy_bytes;
+ t->set_bytes = x_set_bytes;
+ t->bitfield_load = x_bitfield_load;
+ t->bitfield_store = x_bitfield_store;
+
+ t->binop = x_binop;
+ t->unop = x_unop;
+ t->cmp = x_cmp;
+ t->convert = x_convert;
+
+ t->call = x_call;
+ t->ret = x_ret;
+
+ t->alloca_ = x_alloca_;
+ t->va_start_ = x_va_start_;
+ t->va_arg_ = x_va_arg_;
+ t->va_end_ = x_va_end_;
+ t->va_copy_ = x_va_copy_;
+
+ t->setjmp_ = NULL;
+ t->longjmp_ = NULL;
+
+ t->atomic_load = x_atomic_load;
+ t->atomic_store = x_atomic_store;
+ t->atomic_rmw = x_atomic_rmw;
+ t->atomic_cas = x_atomic_cas;
+ t->fence = x_fence;
+
+ t->intrinsic = x_intrinsic;
+ t->asm_block = x_asm_block;
+
+ t->set_loc = x_set_loc;
+ t->finalize = x_finalize;
+ t->destroy = x_destroy;
+
+ compiler_defer(c, cgt_cleanup, t);
+ return t;
+}
diff --git a/src/link/link_internal.h b/src/link/link_internal.h
@@ -109,8 +109,87 @@ struct Linker {
CompilerCleanup* deferred; /* registered by link_new */
};
-/* Defined in link_layout.c. */
+/* ---- GC liveness (link_resolve.c) ---------------------------------------- */
+
+typedef struct GcLive {
+ u8** marks; /* marks[input_idx][obj_sec_id]; 0/1, sized to nsec_per_input[ii]
+ */
+ u32* nsec; /* obj_section_count per input */
+ u32 ninputs;
+} GcLive;
+
+typedef struct GcQueue {
+ u64* items; /* (u64) packed: hi32 = input_idx, lo32 = obj_sec_id */
+ u32 n;
+ u32 cap;
+} GcQueue;
+
+/* ---- Cross-file helpers (link_layout.c → link_reloc_layout.c) ------------ */
+
+/* Four-bucket segment partitioning by permission (defined in link_layout.c). */
+typedef enum SegBucket {
+ SEG_RX = 0, /* SF_ALLOC | SF_EXEC */
+ SEG_R = 1, /* SF_ALLOC, no EXEC, no WRITE */
+ SEG_RW = 2, /* SF_ALLOC | SF_WRITE (incl. BSS) */
+ SEG_TLS = 3, /* SF_ALLOC | SF_TLS (.tdata + .tbss) */
+ SEG_NBUCKETS = 4,
+} SegBucket;
+
+/* section_kept: 1 for allocatable progbits/nobits sections (link_layout.c). */
+int link_section_kept(const Section* s);
+/* bucket_for: map section flags to SegBucket (link_layout.c). */
+SegBucket link_bucket_for(u16 flags);
+/* layout_page_size: page size for segment alignment (link_layout.c). */
+u64 link_layout_page_size(Linker* l);
+
+/* Append a fresh symbol slot and return its id (link_layout.c). */
+LinkSymId link_append_symbol(LinkImage* img, const LinkSymbol* tmpl);
+/* Append a fresh reloc slot and return it (link_layout.c). */
+LinkRelocApply* link_append_reloc_slot(LinkImage* img);
+
+/* Emit or upsert a synthetic global boundary symbol (link_layout.c). */
+void link_emit_boundary_sym(Linker* l, LinkImage* img, const char* name,
+ u64 vaddr);
+
+/* Detect __start_<X> / __stop_<X> with <X> a valid C identifier.
+ * Defined in link_resolve.c; used by link_reloc_layout.c. */
+int link_gc_split_start_stop(const char* s, size_t n, size_t* out_off,
+ size_t* out_len, int* out_is_start);
+
+/* GC liveness helpers (link_resolve.c). */
+int link_gc_live_get(const GcLive* g, u32 ii, ObjSecId j);
+
+/* Segment/section growth helpers for iplt (link_reloc_layout.c). */
+u32 link_iplt_alloc_segments(LinkImage* img, u32 nseg);
+u32 link_iplt_alloc_sections(LinkImage* img, u32 nsec);
+
+/* ---- Public entries (link_resolve.c) -------------------------------------- */
void link_ingest_archives(struct Linker*);
+void link_resolve_symbols(struct Linker*, LinkImage*);
+void link_resolve_undefs(struct Linker*, LinkImage*);
+void link_gc_compute(struct Linker*, LinkImage*, GcLive*);
+void link_gc_live_alloc(GcLive* g, struct Linker* l, Heap* h);
+void link_gc_live_free(GcLive* g, Heap* h);
+void link_gc_drop_dead_globals(struct Linker*, LinkImage*, const GcLive*);
+
+/* ---- Public entries (link_layout.c) --------------------------------------- */
+void link_layout_sections(struct Linker*, LinkImage*, const GcLive*);
+void link_layout_commons(struct Linker*, LinkImage*);
+void link_emit_segment_bytes(struct Linker*, LinkImage*);
+
+/* ---- Public entries (link_reloc_layout.c) --------------------------------- */
+void link_assign_symbol_vaddrs(struct Linker*, LinkImage*);
+void link_emit_array_boundaries(struct Linker*, LinkImage*);
+void link_emit_tls_boundaries(struct Linker*, LinkImage*);
+void link_emit_encoding_section_boundaries(struct Linker*, LinkImage*);
+void link_layout_jit_stubs(struct Linker*, LinkImage*, u32 map_size,
+ LinkSymId** stub_map_out);
+void link_layout_got(struct Linker*, LinkImage*, u32 map_size,
+ LinkSymId** got_map_out);
+void link_layout_iplt(struct Linker*, LinkImage*);
+void link_emit_relocations(struct Linker*, LinkImage*, const LinkSymId* got_map,
+ const LinkSymId* stub_map);
+void link_resolve_entry(struct Linker*, LinkImage*);
/* Defined in link.c. Walks the Linker's inputs and records each input's
* ObjBuilder on the LinkImage so the JIT debug view can reach its
diff --git a/src/link/link_layout.c b/src/link/link_layout.c
@@ -1,4 +1,6 @@
-/* link_resolve: builds a fresh LinkImage from the Linker's inputs.
+/* link_layout.c — section bucketing, vaddr assignment, scripted layout,
+ * COMMON BSS allocation, segment-byte copying, and the top-level
+ * link_resolve orchestration function.
*
* Image-relative discipline: every vaddr / file_offset on the produced
* image treats the image as based at 0. Consumers (link_emit_elf,
@@ -32,26 +34,15 @@ static SrcLoc no_loc(void) {
* back to 16 KiB otherwise — large enough for any current Linux/aarch64
* loader. A future cross-link with mismatched host/target page sizes
* will need a target-derived value here instead. */
-static u64 layout_page_size(Linker* l) {
+u64 link_layout_page_size(Linker* l) {
const CfreeExecMem* m = (l && l->c && l->c->env) ? l->c->env->execmem : NULL;
if (m && m->page_size) return (u64)m->page_size;
return 0x4000u;
}
-/* Four-bucket segment partitioning by permission. TLS sections live
- * in their own bucket: they're emitted as a PT_LOAD (so the kernel
- * maps the .tdata template) and then referenced by a PT_TLS phdr;
- * symbols in TLS sections need separate vaddr-to-offset arithmetic
- * for TLSLE relocs. */
-typedef enum SegBucket {
- SEG_RX = 0, /* SF_ALLOC | SF_EXEC */
- SEG_R = 1, /* SF_ALLOC, no EXEC, no WRITE */
- SEG_RW = 2, /* SF_ALLOC | SF_WRITE (incl. BSS) */
- SEG_TLS = 3, /* SF_ALLOC | SF_TLS (.tdata + .tbss) */
- SEG_NBUCKETS = 4,
-} SegBucket;
-
-static int section_kept(const Section* s) {
+/* Four-bucket segment partitioning: see SegBucket in link_internal.h. */
+
+int link_section_kept(const Section* s) {
/* This cut keeps allocatable progbits/nobits sections only. Debug,
* symtab/strtab, group, and note sections are dropped — none of
* them participate in a static ET_EXEC layout. */
@@ -61,7 +52,7 @@ static int section_kept(const Section* s) {
return 0;
}
-static SegBucket bucket_for(u16 flags) {
+SegBucket link_bucket_for(u16 flags) {
if (flags & SF_TLS) return SEG_TLS;
if (flags & SF_EXEC) return SEG_RX;
if (flags & SF_WRITE) return SEG_RW;
@@ -82,7 +73,7 @@ static LinkSymbol* append_symbol_slot(LinkImage* img) {
return s;
}
-static LinkSymId append_symbol(LinkImage* img, const LinkSymbol* tmpl) {
+LinkSymId link_append_symbol(LinkImage* img, const LinkSymbol* tmpl) {
LinkSymbol* s = append_symbol_slot(img);
LinkSymId id = s->id;
*s = *tmpl;
@@ -90,593 +81,12 @@ static LinkSymId append_symbol(LinkImage* img, const LinkSymbol* tmpl) {
return id;
}
-static LinkRelocApply* append_reloc_slot(LinkImage* img) {
+LinkRelocApply* link_append_reloc_slot(LinkImage* img) {
LinkRelocApply* r = LinkRelocs_push(&img->relocs, NULL);
if (!r) compiler_panic(img->c, no_loc(), "link: oom growing relocs");
return r;
}
-/* ---- per-input symbol/section maps ---- */
-
-static void map_alloc(LinkImage* img, InputMap* m, u32 nsym, u32 nsection) {
- Heap* h = img->heap;
- m->nsym = nsym;
- m->sym = (LinkSymId*)h->alloc(h, sizeof(*m->sym) * nsym, _Alignof(LinkSymId));
- if (!m->sym)
- compiler_panic(img->c, no_loc(), "link: oom on input symbol map");
- memset(m->sym, 0, sizeof(*m->sym) * nsym);
- m->nsection = nsection;
- m->section = (LinkSectionId*)h->alloc(h, sizeof(*m->section) * nsection,
- _Alignof(LinkSectionId));
- if (!m->section)
- compiler_panic(img->c, no_loc(), "link: oom on input section map");
- memset(m->section, 0, sizeof(*m->section) * nsection);
-}
-
-/* ---- pass 1: collect symbols and pick section layout ---- */
-
-/* Defined-symbol replacement policy: a stronger binding wins; a
- * stronger binding seen second replaces the existing record in place.
- * Two SB_GLOBAL definitions of the same name are a hard error. */
-static int bind_strength(u8 bind) {
- /* Higher == stronger. */
- switch (bind) {
- case SB_GLOBAL:
- return 3;
- case SB_WEAK:
- return 2;
- case SB_LOCAL:
- return 1;
- default:
- return 0;
- }
-}
-
-static void resolve_symbols(Linker* l, LinkImage* img) {
- u32 ii;
- /* Per-input pass: register every ObjSym (locals included), and
- * insert defined globals/weaks into img->globals. Locals stay
- * out of the hash. */
- for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
- LinkInput* in = LinkInputs_at(&l->inputs, ii);
- ObjBuilder* ob = in->obj;
- InputMap* m = &img->input_maps[ii];
- u32 nsym = obj_section_count(ob);
- (void)nsym;
- ObjSymIter* it;
- ObjSymEntry e;
-
- /* DSO inputs do not contribute symbol definitions to the image —
- * their exports satisfy undefs through resolve_undefs's
- * DSO-search path, which marks the consuming LinkSymbols as
- * imported. Skipping here keeps DSO names out of img->globals
- * so a static-side defined symbol of the same name doesn't
- * collide and a DSO export doesn't accidentally win. */
- if (in->kind == LINK_INPUT_DSO_BYTES) continue;
-
- /* obj.h: ObjSymId 0 is the "none" sentinel; the iterator skips
- * it. We need an upper bound for the per-input symbol map,
- * which is the builder's nsymbols (count incl. id-0 sentinel).
- * Walk via the iterator to learn how many real entries there
- * are; allocate the map to a safe upper bound by counting. */
- u32 nsyms_in_input = 0;
- it = obj_symiter_new(ob);
- while (obj_symiter_next(it, &e)) ++nsyms_in_input;
- obj_symiter_free(it);
-
- map_alloc(img, m, nsyms_in_input + 1u /* +1 for id-0 slot */,
- obj_section_count(ob));
-
- it = obj_symiter_new(ob);
- while (obj_symiter_next(it, &e)) {
- const ObjSym* s = e.sym;
- LinkSymbol rec;
- LinkSymId existing;
- /* Same prune as elf_emit / macho_emit: an extern declaration that
- * the TU never relocated against is not a real linker input. The
- * in-memory cc->link path skips the file emitter, so we apply the
- * same filter here. The "logical undef" predicate (no section AND
- * not SK_ABS/SK_COMMON) covers both `SK_UNDEF` (already-normalized
- * by the readers) and the SK_FUNC/SK_OBJ-with-no-section shape the
- * cgtarget mints for `extern` declarations. */
- {
- int is_logical_undef = (s->section_id == OBJ_SEC_NONE) &&
- (s->kind != SK_ABS) && (s->kind != SK_COMMON);
- if (is_logical_undef && !s->referenced &&
- (s->bind == SB_GLOBAL || s->bind == SB_WEAK)) {
- continue;
- }
- }
- /* "Defined" means: not SK_UNDEF AND has a backing storage — a
- * containing section, an absolute value, or COMMON reservation.
- * cgtarget paths emit SK_FUNC / SK_OBJ for an `extern`
- * declaration's bookkeeping symbol with section_id = 0; those are
- * still undefs from the linker's perspective. ELF's read_elf
- * already normalizes those to SK_UNDEF; this check unifies the
- * in-memory pipeline with that. SK_FILE (STT_FILE) is a defined
- * local marker carrying a source filename at SHN_ABS — it has no
- * section, but it is not undef. */
- int is_def = (s->kind != SK_UNDEF) &&
- (s->kind == SK_ABS || s->kind == SK_COMMON ||
- s->kind == SK_FILE ||
- s->section_id != OBJ_SEC_NONE);
-
- memset(&rec, 0, sizeof(rec));
- rec.name = s->name;
- rec.input_id = in->id;
- rec.obj_sym = e.id;
- rec.section_id = LINK_SEC_NONE; /* filled later */
- rec.value = s->value;
- rec.size = s->size;
- rec.common_align = (s->kind == SK_COMMON) ? (u32)s->common_align : 0u;
- rec.bind = (u8)s->bind;
- rec.kind = (u8)s->kind;
- rec.defined = (u8)is_def;
- rec.vaddr = 0;
-
- if (is_def && (s->bind == SB_GLOBAL || s->bind == SB_WEAK) &&
- s->name != 0) {
- /* Try to insert. On collision, apply replacement
- * policy in-place against the existing LinkSymbol. */
- LinkSymId fresh = (LinkSymId)(LinkSyms_count(&img->syms) + 1u);
- if (symhash_insert(&img->globals, s->name, fresh, &existing)) {
- /* No collision — append a new slot. */
- m->sym[e.id] = append_symbol(img, &rec);
- } else {
- LinkSymbol* prev = LinkSyms_at(&img->syms, existing - 1);
- int new_strength = bind_strength((u8)s->bind);
- int old_strength = bind_strength(prev->bind);
- /* COMMON symbols coalesce: largest size wins. */
- if (prev->kind == SK_COMMON && rec.kind == SK_COMMON) {
- if (rec.size > prev->size) {
- u32 new_align = (rec.common_align > prev->common_align)
- ? rec.common_align
- : prev->common_align;
- rec.id = existing;
- rec.common_align = new_align;
- *prev = rec;
- }
- m->sym[e.id] = existing;
- } else if (rec.kind == SK_COMMON) {
- /* Strong def beats COMMON — keep existing. */
- m->sym[e.id] = existing;
- } else if (prev->kind == SK_COMMON) {
- /* Strong def beats COMMON — override. */
- rec.id = existing;
- *prev = rec;
- m->sym[e.id] = existing;
- } else if (new_strength > old_strength) {
- /* This def wins; keep the existing LinkSymId
- * stable so prior references resolve, but
- * update the contents. */
- rec.id = existing;
- *prev = rec;
- m->sym[e.id] = existing;
- } else if (new_strength == old_strength &&
- new_strength == bind_strength(SB_GLOBAL)) {
- /* Two strong defs — fatal. */
- size_t namelen;
- const char* nm = pool_str(l->c->global, s->name, &namelen);
- compiler_panic(l->c, no_loc(),
- "link: duplicate definition of "
- "global symbol '%.*s'",
- (int)namelen, nm);
- } else {
- /* New def is weaker — keep existing, point
- * the per-input map at the existing id so
- * relocations from this input still resolve. */
- m->sym[e.id] = existing;
- }
- }
- } else {
- /* Locals + undefs each get their own LinkSymId. Globals
- * with name == 0 (rare; unnamed temporaries promoted
- * to global by some assemblers) also land here. */
- m->sym[e.id] = append_symbol(img, &rec);
- }
- }
- obj_symiter_free(it);
- }
-}
-
-/* Search the DSO inputs for a defined exported symbol matching
- * `name`. Returns the LinkInputId of the first DSO that exports
- * `name` (with its name interned in the same global pool, so a Sym
- * comparison is sufficient), or LINK_INPUT_NONE if no DSO matches.
- * Walks DSOs in input order so a leftmost-wins rule applies — same
- * behaviour as GNU ld for ambiguous DSO exports. */
-static LinkInputId find_dso_export(Linker* l, Sym name) {
- u32 ii;
- ObjSymIter* it;
- ObjSymEntry e;
- if (name == 0) return LINK_INPUT_NONE;
- for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
- LinkInput* in = LinkInputs_at(&l->inputs, ii);
- if (in->kind != LINK_INPUT_DSO_BYTES) continue;
- it = obj_symiter_new(in->obj);
- while (obj_symiter_next(it, &e)) {
- const ObjSym* s = e.sym;
- if (s->name != name) continue;
- if (s->kind == SK_UNDEF) continue;
- if (s->bind == SB_LOCAL) continue;
- obj_symiter_free(it);
- return in->id;
- }
- obj_symiter_free(it);
- }
- return LINK_INPUT_NONE;
-}
-
-static void resolve_undefs(Linker* l, LinkImage* img) {
- u32 i;
- /* For every symbol that's still SK_UNDEF and visible by name, look
- * it up in the global hash. If still undef, try the resolver. If
- * still undef, fatal. */
- for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
- LinkSymbol* s = LinkSyms_at(&img->syms, i);
- if (s->defined) continue;
- if (s->name != 0) {
- LinkSymId hit = symhash_get(&img->globals, s->name);
- if (hit != LINK_SYM_NONE && hit != s->id) {
- LinkSymbol* def = LinkSyms_at(&img->syms, hit - 1);
- if (def->defined) {
- /* Re-point this undef at the global definition by
- * copying the resolved fields. The id remains
- * stable so per-input maps don't need to be
- * rewritten — relocations just look up via this
- * symbol's eventual vaddr. */
- s->section_id = def->section_id;
- s->value = def->value;
- s->vaddr = def->vaddr;
- s->kind = def->kind;
- s->bind = def->bind;
- s->defined = 1;
- continue;
- }
- }
- }
- /* Dynamic-link match: a DSO input exports this name. The symbol
- * stays "structurally undefined" — the static linker never
- * computes a vaddr for it — but we mark it imported so the panic
- * path below leaves it alone, and so later phases (PLT/GOT slot
- * synthesis, .rela.dyn emit) know to wire it through dynamic
- * relocs. The DSO's input id ends up in DT_NEEDED via the
- * input's `soname` field. The actual JUMP_SLOT / GLOB_DAT /
- * needs_plt / needs_got decisions land in Phases 4–5 alongside
- * the synthetic-section work. */
- if (s->name != 0) {
- LinkInputId dso = find_dso_export(l, s->name);
- if (dso != LINK_INPUT_NONE) {
- s->imported = 1;
- s->dso_input_id = dso;
- continue;
- }
- }
- if (l->resolver && s->name != 0) {
- size_t namelen;
- const char* nm = pool_str(l->c->global, s->name, &namelen);
- /* The resolver expects a NUL-terminated C string; pool
- * strings are NUL-terminated by pool_intern. */
- (void)namelen;
- void* p = l->resolver(l->resolver_user, nm);
- if (p) {
- s->kind = SK_ABS;
- s->vaddr = (u64)(uintptr_t)p;
- s->defined = 1;
- continue;
- }
- }
- if (s->bind == SB_WEAK) {
- /* Weak undef resolves to NULL — references that go through
- * the GOT see a zero slot (case 16_weak_undef). Mark as
- * SK_ABS with vaddr=0 so emit/JIT skip the relative-base
- * adjustments. */
- s->kind = SK_ABS;
- s->vaddr = 0;
- s->defined = 1;
- continue;
- }
- {
- size_t namelen;
- const char* nm = s->name ? pool_str(l->c->global, s->name, &namelen)
- : (namelen = 0, "");
- /* Strip the format's C-mangle byte so the diagnostic shows the
- * source-level name (matches decl.c's emit policy). */
- obj_format_demangle_c(l->c, &nm, &namelen);
- compiler_panic(l->c, no_loc(), "link: undefined reference to '%.*s'",
- (int)namelen, nm);
- }
- }
-}
-
-/* ---- pass 1b: --gc-sections liveness ----
- *
- * Granularity is the input section: pairs (input_idx, ObjSecId).
- * Roots:
- * 1. The section defining the entry symbol.
- * 2. Every SSEM_INIT_ARRAY / SSEM_FINI_ARRAY / SSEM_PREINIT_ARRAY
- * (these are KEEP() in standard linker scripts).
- * 3. SF_RETAIN sections (SHF_GNU_RETAIN, i.e. clang's
- * __attribute__((retain)) / used).
- * Edges:
- * For each live section, every reloc whose target sym has a
- * defining section pulls that section live. References whose
- * target name is __start_<X> / __stop_<X> with valid C-identifier
- * <X> additionally pull every section named <X> live.
- * Iterates to a fixed point. When l->gc_sections is 0, every kept
- * section is marked live unconditionally so downstream passes share
- * a single "is this section live?" predicate.
- *
- * The mark table is consumed by layout_sections (skips dead sections),
- * by gc_drop_dead_globals (clears `defined` on syms whose section was
- * dropped), and indirectly by emit_reloc_records / layout_got (which
- * filter through m->section[j] == LINK_SEC_NONE since dead sections
- * never get a LinkSectionId). */
-
-typedef struct GcLive {
- u8** marks; /* marks[input_idx][obj_sec_id]; 0/1, sized to nsec_per_input[ii]
- */
- u32* nsec; /* obj_section_count per input */
- u32 ninputs;
-} GcLive;
-
-typedef struct GcQueue {
- u64* items; /* (u64) packed: hi32 = input_idx, lo32 = obj_sec_id */
- u32 n;
- u32 cap;
-} GcQueue;
-
-#define GC_PACK(ii, j) (((u64)(u32)(ii) << 32) | (u32)(j))
-#define GC_II(p) ((u32)((p) >> 32))
-#define GC_J(p) ((ObjSecId)((p) & 0xffffffffu))
-
-static void gc_queue_push(GcQueue* q, Heap* h, u32 ii, ObjSecId j) {
- if (VEC_GROW(h, q->items, q->cap, q->n + 1u))
- return; /* skip; caller iterates to fixed point */
- q->items[q->n++] = GC_PACK(ii, j);
-}
-
-static void gc_live_alloc(GcLive* g, Linker* l, Heap* h) {
- u32 ii;
- g->ninputs = LinkInputs_count(&l->inputs);
- g->marks =
- LinkInputs_count(&l->inputs)
- ? (u8**)h->alloc(h, sizeof(*g->marks) * LinkInputs_count(&l->inputs),
- _Alignof(u8*))
- : NULL;
- g->nsec =
- LinkInputs_count(&l->inputs)
- ? (u32*)h->alloc(h, sizeof(*g->nsec) * LinkInputs_count(&l->inputs),
- _Alignof(u32))
- : NULL;
- if (LinkInputs_count(&l->inputs) && (!g->marks || !g->nsec))
- compiler_panic(l->c, no_loc(), "link: oom on gc live map");
- for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
- u32 nsec = obj_section_count(LinkInputs_at(&l->inputs, ii)->obj);
- g->nsec[ii] = nsec;
- g->marks[ii] = (u8*)h->alloc(h, nsec ? nsec : 1u, 1);
- if (!g->marks[ii]) compiler_panic(l->c, no_loc(), "link: oom on gc marks");
- memset(g->marks[ii], 0, nsec);
- }
-}
-
-static void gc_live_free(GcLive* g, Heap* h) {
- u32 ii;
- if (g->marks) {
- for (ii = 0; ii < g->ninputs; ++ii)
- if (g->marks[ii])
- h->free(h, g->marks[ii], g->nsec[ii] ? g->nsec[ii] : 1u);
- h->free(h, g->marks, sizeof(*g->marks) * g->ninputs);
- }
- if (g->nsec) h->free(h, g->nsec, sizeof(*g->nsec) * g->ninputs);
-}
-
-static int gc_live_get(const GcLive* g, u32 ii, ObjSecId j) {
- if (ii >= g->ninputs || j == OBJ_SEC_NONE || j >= g->nsec[ii]) return 0;
- return g->marks[ii][j];
-}
-
-static void gc_mark(GcLive* g, GcQueue* q, Heap* h, u32 ii, ObjSecId j) {
- if (ii >= g->ninputs || j == OBJ_SEC_NONE || j >= g->nsec[ii]) return;
- if (g->marks[ii][j]) return;
- g->marks[ii][j] = 1;
- gc_queue_push(q, h, ii, j);
-}
-
-/* From a LinkSymId, find the (input_idx, obj_sec_id) of its defining
- * section. Returns 1 on hit. Recurses one level through img->globals
- * for undef symbols whose name resolves to a global definition. */
-static int gc_def_site(LinkImage* img, Linker* l, LinkSymId id, u32* out_ii,
- ObjSecId* out_sid) {
- const LinkSymbol* s;
- ObjBuilder* ob;
- const ObjSym* osym;
- if (id == LINK_SYM_NONE || id > LinkSyms_count(&img->syms)) return 0;
- s = LinkSyms_at(&img->syms, id - 1);
- if (!s->defined) {
- LinkSymId hit;
- if (s->name == 0) return 0;
- hit = symhash_get(&img->globals, s->name);
- if (hit == LINK_SYM_NONE || hit == s->id) return 0;
- return gc_def_site(img, l, hit, out_ii, out_sid);
- }
- if (s->kind == SK_ABS || s->kind == SK_COMMON) return 0;
- if (s->input_id == LINK_INPUT_NONE) return 0; /* synthesized */
- ob = LinkInputs_at(&l->inputs, s->input_id - 1)->obj;
- osym = obj_symbol_get(ob, s->obj_sym);
- if (!osym || osym->section_id == OBJ_SEC_NONE) return 0;
- *out_ii = (u32)(s->input_id - 1u);
- *out_sid = osym->section_id;
- return 1;
-}
-
-/* Detect __start_<X> / __stop_<X> with <X> a valid C identifier.
- * On hit, *out_off is the offset of <X> within `s`, *out_len its
- * length, and *out_is_start is 1 for __start_ / 0 for __stop_.
- * out_is_start may be NULL when the caller doesn't need to
- * distinguish (e.g. GC, which retains for either prefix). */
-static int gc_split_start_stop(const char* s, size_t n, size_t* out_off,
- size_t* out_len, int* out_is_start) {
- static const char START[] = "__start_";
- static const char STOP[] = "__stop_";
- size_t off, len, i;
- int is_start;
- if (n > sizeof(START) - 1u && memcmp(s, START, sizeof(START) - 1u) == 0) {
- off = sizeof(START) - 1u;
- is_start = 1;
- } else if (n > sizeof(STOP) - 1u && memcmp(s, STOP, sizeof(STOP) - 1u) == 0) {
- off = sizeof(STOP) - 1u;
- is_start = 0;
- } else {
- return 0;
- }
- len = n - off;
- if (len == 0) return 0;
- {
- char c = s[off];
- if (!(c == '_' || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')))
- return 0;
- }
- for (i = 1; i < len; ++i) {
- char c = s[off + i];
- if (!(c == '_' || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
- (c >= '0' && c <= '9')))
- return 0;
- }
- *out_off = off;
- *out_len = len;
- if (out_is_start) *out_is_start = is_start;
- return 1;
-}
-
-static void gc_promote_by_section_name(Linker* l, GcLive* g, GcQueue* q,
- Heap* h, Sym section_name) {
- u32 ii, j;
- for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
- ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
- u32 nsec = obj_section_count(ob);
- for (j = 1; j < nsec; ++j) {
- const Section* s = obj_section_get(ob, j);
- if (!s || !section_kept(s)) continue;
- if (s->name != section_name) continue;
- gc_mark(g, q, h, ii, j);
- }
- }
-}
-
-static void gc_compute(Linker* l, LinkImage* img, GcLive* g) {
- u32 ii, j, k;
- GcQueue q;
- Heap* h = img->heap;
-
- /* GC disabled: every kept section becomes live. Downstream passes
- * use the same is-live predicate, so this keeps logic uniform. */
- if (!l->gc_sections) {
- for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
- ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
- u32 nsec = obj_section_count(ob);
- for (j = 1; j < nsec; ++j) {
- const Section* s = obj_section_get(ob, j);
- if (s && section_kept(s)) g->marks[ii][j] = 1;
- }
- }
- return;
- }
-
- memset(&q, 0, sizeof(q));
-
- /* Static roots: SF_RETAIN + init/fini/preinit_array. */
- for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
- ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
- u32 nsec = obj_section_count(ob);
- for (j = 1; j < nsec; ++j) {
- const Section* s = obj_section_get(ob, j);
- int root;
- if (!s || !section_kept(s)) continue;
- root = (s->flags & SF_RETAIN) || s->sem == SSEM_INIT_ARRAY ||
- s->sem == SSEM_FINI_ARRAY || s->sem == SSEM_PREINIT_ARRAY;
- if (root) gc_mark(g, &q, h, ii, j);
- }
- }
-
- /* Entry symbol's defining section. Linker default entry is "_start"
- * (set in link_new); the JIT path overrides via link_set_entry. */
- if (l->entry_name != 0) {
- LinkSymId id = symhash_get(&img->globals, l->entry_name);
- u32 tii;
- ObjSecId tsid;
- if (gc_def_site(img, l, id, &tii, &tsid)) gc_mark(g, &q, h, tii, tsid);
- }
-
- /* Worklist: pop a live section, mark every section reachable through
- * its relocs. Each reloc's target may also be a __start_/__stop_
- * encoding-section reference, in which case sections of that name
- * become live. */
- while (q.n > 0) {
- u64 v = q.items[--q.n];
- u32 cii = GC_II(v);
- ObjSecId cj = GC_J(v);
- ObjBuilder* ob = LinkInputs_at(&l->inputs, cii)->obj;
- InputMap* m = &img->input_maps[cii];
- u32 total = obj_reloc_total(ob);
- (void)obj_section_count;
- if (!total) continue;
- for (k = 0; k < total; ++k) {
- const Reloc* r = obj_reloc_at(ob, k);
- LinkSymId target;
- const LinkSymbol* tsym;
- u32 tii;
- ObjSecId tsid;
- if (r->section_id != cj) continue;
- if (r->sym == OBJ_SYM_NONE || r->sym >= m->nsym) continue;
- target = m->sym[r->sym];
- if (target == LINK_SYM_NONE) continue;
- tsym = LinkSyms_at(&img->syms, target - 1);
-
- if (tsym->name != 0) {
- size_t namelen, off, ilen;
- const char* nm = pool_str(l->c->global, tsym->name, &namelen);
- if (gc_split_start_stop(nm, namelen, &off, &ilen, NULL)) {
- Sym secname = pool_intern(l->c->global, nm + off, ilen);
- gc_promote_by_section_name(l, g, &q, h, secname);
- }
- }
-
- if (gc_def_site(img, l, target, &tii, &tsid))
- gc_mark(g, &q, h, tii, tsid);
- }
- }
-
- if (q.items) h->free(h, q.items, sizeof(*q.items) * q.cap);
-}
-
-/* After layout, clear `defined` on every LinkSymbol whose contributing
- * input section was dropped. The global hash entry stays — lookups
- * (cfree_jit_lookup, link_symbol_lookup) gate on `defined`. */
-static void gc_drop_dead_globals(Linker* l, LinkImage* img, const GcLive* g) {
- u32 i;
- if (!l->gc_sections) return;
- for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
- LinkSymbol* s = LinkSyms_at(&img->syms, i);
- ObjBuilder* ob;
- const ObjSym* osym;
- ObjSecId osid;
- if (!s->defined) continue;
- if (s->kind == SK_ABS || s->kind == SK_COMMON) continue;
- if (s->input_id == LINK_INPUT_NONE) continue;
- ob = LinkInputs_at(&l->inputs, s->input_id - 1)->obj;
- osym = obj_symbol_get(ob, s->obj_sym);
- if (!osym) continue;
- osid = osym->section_id;
- if (osid == OBJ_SEC_NONE) continue;
- if (gc_live_get(g, (u32)(s->input_id - 1u), osid)) continue;
- /* Section was dropped — sym vanishes from the output. */
- s->defined = 0;
- s->vaddr = 0;
- s->section_id = LINK_SEC_NONE;
- }
-}
-
/* ---- pass 2: section assignment + segment layout ---- */
typedef struct SecRef {
@@ -709,12 +119,12 @@ typedef struct PlaceEntry {
u8 pad[3];
} PlaceEntry;
-static void layout_sections_scripted(Linker* l, LinkImage* img,
- const GcLive* g);
+static void link_layout_sections_scripted(Linker* l, LinkImage* img,
+ const GcLive* g);
-static void layout_sections(Linker* l, LinkImage* img, const GcLive* g) {
+void link_layout_sections(Linker* l, LinkImage* img, const GcLive* g) {
if (l->script) {
- layout_sections_scripted(l, img, g);
+ link_layout_sections_scripted(l, img, g);
return;
}
Heap* h = img->heap;
@@ -726,7 +136,7 @@ static void layout_sections(Linker* l, LinkImage* img, const GcLive* g) {
ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
for (j = 1; j < obj_section_count(ob); ++j) {
const Section* s = obj_section_get(ob, j);
- if (s && section_kept(s) && gc_live_get(g, ii, j)) ++total_kept;
+ if (s && link_section_kept(s) && link_gc_live_get(g, ii, j)) ++total_kept;
}
}
@@ -750,11 +160,11 @@ static void layout_sections(Linker* l, LinkImage* img, const GcLive* g) {
ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
for (j = 1; j < obj_section_count(ob); ++j) {
const Section* s = obj_section_get(ob, j);
- if (!s || !section_kept(s) || !gc_live_get(g, ii, j)) continue;
+ if (!s || !link_section_kept(s) || !link_gc_live_get(g, ii, j)) continue;
entries[e].input_idx = ii;
entries[e].obj_sec_id = j;
entries[e].name = s->name;
- entries[e].bucket = bucket_for(s->flags);
+ entries[e].bucket = link_bucket_for(s->flags);
entries[e].placed = 0;
++e;
}
@@ -870,7 +280,7 @@ static void layout_sections(Linker* l, LinkImage* img, const GcLive* g) {
if (!seg_count[b]) continue;
nat_align = seg_align[b] ? seg_align[b] : 1u;
align = (u64)nat_align;
- if (align < layout_page_size(l)) align = layout_page_size(l);
+ if (align < link_layout_page_size(l)) align = link_layout_page_size(l);
cursor = ALIGN_UP(cursor, (u64)(align));
seg = &img->segments[img->nsegments];
@@ -939,12 +349,11 @@ static void layout_sections(Linker* l, LinkImage* img, const GcLive* g) {
* placing matched input sections at the dot location counter. One
* LinkSegment per non-DISCARD output section maps 1:1 to a PT_LOAD on
* emit. Symbol assignments (top-level and in-section) materialize as
- * defined LinkSymbol globals via upsert_global_sym (the same upsert
- * pattern emit_boundary_sym uses).
+ * defined LinkSymbol globals via link_emit_boundary_sym.
*
* Discard handling: `/DISCARD/` matches input sections by glob and
* leaves their per-input m->section[id] entry as LINK_SEC_NONE — the
- * downstream emit_reloc_records / link_symbols_to_sections passes
+ * downstream emit_reloc_records / link_assign_symbol_vaddrs passes
* already treat that as "section dropped" so they're naturally
* excluded from segments, gc, and reloc apply. */
@@ -1039,28 +448,60 @@ static u64 eval_link_expr(Linker* l, LinkImage* img, u64 dot,
}
}
-/* Format-aware C-symbol mangling for linker-synthesized boundaries.
- * Defers to obj_format_c_mangle so the boundary symbols match the
- * mangling decl.c emits for the corresponding `extern` references. */
+/* Format-aware C-symbol mangling for linker-synthesized boundaries. */
static Sym boundary_name(Linker* l, const char* name) {
return obj_format_c_mangle(l->c, name);
}
-static void emit_boundary_sym(Linker* l, LinkImage* img, const char* name,
- u64 vaddr); /* defined below */
+/* Upsert a global symbol with the given absolute vaddr. Satisfies any
+ * prior undef ref in place; fans out to per-input duplicate name slots. */
+void link_emit_boundary_sym(Linker* l, LinkImage* img, const char* name,
+ u64 vaddr) {
+ Sym sym = boundary_name(l, name);
+ LinkSymId id = symhash_get(&img->globals, sym);
+ LinkSymbol rec;
+ u32 i, n;
+ memset(&rec, 0, sizeof(rec));
+ rec.name = sym;
+ rec.kind = SK_OBJ;
+ rec.defined = 1;
+ rec.vaddr = vaddr;
+ rec.bind = SB_GLOBAL;
+ if (id != LINK_SYM_NONE) {
+ *LinkSyms_at(&img->syms, id - 1) = rec;
+ LinkSyms_at(&img->syms, id - 1)->id = id;
+ } else {
+ LinkSymId fresh = link_append_symbol(img, &rec);
+ symhash_insert(&img->globals, sym, fresh, &id);
+ }
+ n = LinkSyms_count(&img->syms);
+ for (i = 0; i < n; ++i) {
+ LinkSymbol* s = LinkSyms_at(&img->syms, i);
+ if (s->name != sym) continue;
+ if (s->id == id) continue;
+ if (s->bind == SB_LOCAL) continue;
+ s->section_id = LINK_SEC_NONE;
+ s->value = 0;
+ s->vaddr = vaddr;
+ s->kind = SK_OBJ;
+ s->defined = 1;
+ s->imported = 0;
+ }
+}
+
+/* link_define_boundary: public alias used by link_dyn.c. */
+void link_define_boundary(Linker* l, LinkImage* img, const char* name,
+ u64 vaddr) {
+ link_emit_boundary_sym(l, img, name, vaddr);
+}
-/* Upsert a global symbol with the given absolute vaddr. Mirrors the
- * emit_boundary_sym pattern: satisfies any prior undef ref in place;
- * fans out to per-input duplicate name slots. */
+/* Upsert a global symbol (mirror of emit_boundary_sym, used by apply_asn). */
static void upsert_global_sym(Linker* l, LinkImage* img, const char* name,
u64 vaddr) {
- emit_boundary_sym(l, img, name, vaddr);
+ link_emit_boundary_sym(l, img, name, vaddr);
}
-/* Apply one CfreeLinkAssignment. CFREE_LAS_DOT updates *dot; SYM /
- * PROVIDE upserts a global. PROVIDE only fires when the name isn't
- * already strongly defined; for v1 we accept it as equivalent to SYM
- * (no input to kernel.lds defines these names). */
+/* Apply one CfreeLinkAssignment. */
static void apply_asn(Linker* l, LinkImage* img, u64* dot,
const CfreeLinkAssignment* asn) {
int err = 0;
@@ -1082,38 +523,28 @@ static void apply_asn(Linker* l, LinkImage* img, u64* dot,
}
static int input_match_section(const CfreeLinkInputMatch* m, const char* nm) {
- /* file_pattern is ignored for v1 — kernel.lds uses `*(...)` only. */
return match_glob(m->section_pattern, nm);
}
-static void layout_sections_scripted(Linker* l, LinkImage* img,
- const GcLive* g) {
+static void link_layout_sections_scripted(Linker* l, LinkImage* img,
+ const GcLive* g) {
Heap* h = img->heap;
const CfreeLinkScript* script = l->script;
u64 dot = 0;
- /* Scripted layout: vaddrs are absolute (driven by `dot`), but file
- * offsets follow a separate cursor packed sequentially after the
- * eventual ehdr+phdrs. The writer adds headers_load to file_offsets
- * (only) post-layout. */
u64 file_cursor = 0;
u32 ii, j, k, si;
u32 total_kept = 0;
img->scripted = 1;
- /* Pass 0: count GC-live, kept, allocatable input sections — the
- * upper bound on placeable LinkSections. The actual count placed
- * may be lower (DISCARD sinks, unmatched). */
for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
for (j = 1; j < obj_section_count(ob); ++j) {
const Section* s = obj_section_get(ob, j);
- if (s && section_kept(s) && gc_live_get(g, ii, j)) ++total_kept;
+ if (s && link_section_kept(s) && link_gc_live_get(g, ii, j)) ++total_kept;
}
}
- /* Pre-allocate img->sections at the upper bound; img->nsections
- * tracks the actual count placed. */
img->sections = total_kept ? (LinkSection*)h->alloc(
h, sizeof(*img->sections) * total_kept,
_Alignof(LinkSection))
@@ -1121,8 +552,6 @@ static void layout_sections_scripted(Linker* l, LinkImage* img,
if (total_kept && !img->sections)
compiler_panic(img->c, no_loc(), "link: oom on sections");
- /* Per-section "claimed" bitmap to enforce single-claim across the
- * whole script. Indexed by [input_idx][obj_sec_id]. */
u8** claimed = NULL;
if (LinkInputs_count(&l->inputs)) {
u32 ni = LinkInputs_count(&l->inputs);
@@ -1138,17 +567,11 @@ static void layout_sections_scripted(Linker* l, LinkImage* img,
}
}
- /* Pass 1: top-level dot assignments establish the base address
- * before any placement. SYM/PROVIDE assignments at the top level
- * are deferred to pass 3 so they capture the post-placement dot
- * (e.g. `_end = .` at the script's tail). */
for (k = 0; k < script->ntop_asns; ++k) {
const CfreeLinkAssignment* a = &script->top_asns[k];
if (a->kind == CFREE_LAS_DOT) apply_asn(l, img, &dot, a);
}
- /* Pre-allocate img->segments at the upper bound (one per non-DISCARD
- * output section). */
u32 nseg_max = 0;
for (si = 0; si < script->nsections; ++si)
if (strcmp(script->sections[si].name, "/DISCARD/") != 0) ++nseg_max;
@@ -1174,16 +597,11 @@ static void layout_sections_scripted(Linker* l, LinkImage* img,
sizeof(*img->segment_bytes_cap) * nseg_max);
}
- /* Pass 2: walk output sections in declaration order. */
for (si = 0; si < script->nsections; ++si) {
const CfreeLinkOutputSection* os = &script->sections[si];
int is_discard = (strcmp(os->name, "/DISCARD/") == 0);
if (is_discard) {
- /* Mark every matched input section as claimed. We don't add
- * them to img->sections; their m->section[id] stays
- * LINK_SEC_NONE so reloc-apply / link_symbols_to_sections
- * naturally skip them. */
u32 mi;
for (mi = 0; mi < os->ninputs; ++mi) {
const CfreeLinkInputMatch* im = &os->inputs[mi];
@@ -1205,11 +623,6 @@ static void layout_sections_scripted(Linker* l, LinkImage* img,
continue;
}
- /* Non-DISCARD output section. Process all in-section asns first
- * (header ALIGN encoded as the first dot-asn, plus any
- * `__bss_start = .` style early captures), then walk inputs in
- * declaration order, claiming matches across all inputs in input
- * order. Each placed input section advances dot. */
u64 sec_start_dot;
u32 perms = 0;
LinkSegmentId seg_id = (LinkSegmentId)(img->nsegments + 1u);
@@ -1220,13 +633,11 @@ static void layout_sections_scripted(Linker* l, LinkImage* img,
u32 nsec_in_seg = 0;
u32 first_section_idx = img->nsections;
- /* Apply in-section asns (pre-input). */
for (k = 0; k < os->nasns; ++k) {
apply_asn(l, img, &dot, &os->asns[k]);
}
sec_start_dot = dot;
- /* Walk input matches; for each, scan all inputs in input order. */
{
u32 mi;
for (mi = 0; mi < os->ninputs; ++mi) {
@@ -1243,9 +654,9 @@ static void layout_sections_scripted(Linker* l, LinkImage* img,
LinkSection* ls;
LinkSectionId lsid;
if (claimed[ii][j]) continue;
- if (!gc_live_get(g, ii, j)) continue;
+ if (!link_gc_live_get(g, ii, j)) continue;
s = obj_section_get(ob, j);
- if (!s || !section_kept(s)) continue;
+ if (!s || !link_section_kept(s)) continue;
nm = pool_str(l->c->global, s->name, &nl);
if (!nm) continue;
if (!input_match_section(im, nm)) continue;
@@ -1268,8 +679,6 @@ static void layout_sections_scripted(Linker* l, LinkImage* img,
ls->align = align;
ls->name = s->name;
ls->sem = s->sem;
- /* file_offset within the segment buffer: distance from
- * sec_start_dot. NOBITS contributes no file bytes. */
ls->file_offset = ofs - sec_start_dot;
ls->input_offset = ls->file_offset;
m->section[j] = lsid;
@@ -1285,11 +694,7 @@ static void layout_sections_scripted(Linker* l, LinkImage* img,
}
}
- /* Materialize the segment for this output section. Empty output
- * sections (no input matched) are dropped — they'd produce an
- * empty PT_LOAD which the loader rejects. */
if (nsec_in_seg == 0) {
- /* Roll back nsections (no entries appended in the empty case). */
continue;
}
@@ -1298,8 +703,6 @@ static void layout_sections_scripted(Linker* l, LinkImage* img,
seg->id = seg_id;
seg->flags = SF_ALLOC | perms;
seg->vaddr = sec_start_dot;
- /* Page-align each segment's file offset so the writer can keep file
- * offset and vaddr congruent mod page size for the runtime loader. */
file_cursor = ALIGN_UP(file_cursor, (u64)PAGE_SIZE);
seg->file_offset = file_cursor;
seg->mem_size = mem_size_accum;
@@ -1316,10 +719,6 @@ static void layout_sections_scripted(Linker* l, LinkImage* img,
memset(img->segment_bytes[img->nsegments], 0, (size_t)file_size_accum);
}
- /* Shift each section's vaddr/file_offset onto the segment's
- * absolute base. Sections were laid out with vaddr = absolute
- * dot already, so vaddr is correct as-is; file_offset needs
- * to become absolute (segment-base + relative). */
{
u32 fi;
for (fi = first_section_idx; fi < img->nsections; ++fi) {
@@ -1331,15 +730,12 @@ static void layout_sections_scripted(Linker* l, LinkImage* img,
img->nsegments++;
}
- /* Pass 3: top-level SYM / PROVIDE assignments capture the
- * post-placement dot (e.g. `_end = .`). */
for (k = 0; k < script->ntop_asns; ++k) {
const CfreeLinkAssignment* a = &script->top_asns[k];
if (a->kind == CFREE_LAS_SYM || a->kind == CFREE_LAS_PROVIDE)
apply_asn(l, img, &dot, a);
}
- /* Free claim map. */
if (claimed) {
u32 ni = LinkInputs_count(&l->inputs);
for (ii = 0; ii < ni; ++ii) {
@@ -1351,14 +747,12 @@ static void layout_sections_scripted(Linker* l, LinkImage* img,
}
/* ---- pass 2b: COMMON symbol BSS allocation ---- */
-/* After segments are laid out, extend the RW segment's BSS area to
- * accommodate any SK_COMMON symbols that have no section yet. */
-static void layout_commons(Linker* l, LinkImage* img) {
+
+void link_layout_commons(Linker* l, LinkImage* img) {
u32 i;
(void)l;
LinkSegment* rw_seg = NULL;
- /* Find the RW segment. */
for (i = 0; i < img->nsegments; ++i) {
if (img->segments[i].flags & SF_WRITE) {
rw_seg = &img->segments[i];
@@ -1366,7 +760,6 @@ static void layout_commons(Linker* l, LinkImage* img) {
}
}
- /* First pass: check if we even have COMMON symbols. */
{
int has_common = 0;
for (i = 0; i < LinkSyms_count(&img->syms); ++i)
@@ -1378,7 +771,6 @@ static void layout_commons(Linker* l, LinkImage* img) {
if (!has_common) return;
}
- /* If no RW segment exists, create one. */
if (!rw_seg) {
u32 nseg = img->nsegments + 1u;
LinkSegment* segs;
@@ -1389,7 +781,7 @@ static void layout_commons(Linker* l, LinkImage* img) {
u64 end = img->segments[i].vaddr + img->segments[i].mem_size;
if (end > vaddr) vaddr = end;
}
- vaddr = ALIGN_UP(vaddr, (u64)(layout_page_size(l)));
+ vaddr = ALIGN_UP(vaddr, (u64)(link_layout_page_size(l)));
segs = (LinkSegment*)img->heap->realloc(
img->heap, img->segments, sizeof(*img->segments) * img->nsegments,
sizeof(*img->segments) * nseg, _Alignof(LinkSegment));
@@ -1415,20 +807,12 @@ static void layout_commons(Linker* l, LinkImage* img) {
rw_seg->file_offset = vaddr;
rw_seg->file_size = 0;
rw_seg->mem_size = 0;
- rw_seg->align = (u32)layout_page_size(l);
+ rw_seg->align = (u32)link_layout_page_size(l);
img->segment_bytes[img->nsegments] = NULL;
img->segment_bytes_cap[img->nsegments] = 0;
img->nsegments++;
}
- /* Synthesize a single NOBITS LinkSection that wraps every COMMON
- * symbol. Without a backing section, COMMON symbols carry a vaddr
- * but no section_id — Mach-O's shift_sections rebases by
- * section_id and would leave their vaddrs at pre-shift coordinates,
- * with the __got slot pointing into the wrong segment. Giving each
- * common a section_id lets link_symbols_to_sections recompute vaddr
- * from section.vaddr + value, and plan_layout / shift_sections
- * picks it up like any other writable zerofill section. */
{
Heap* h = img->heap;
u64 bss_start = rw_seg->vaddr + rw_seg->mem_size;
@@ -1437,7 +821,6 @@ static void layout_commons(Linker* l, LinkImage* img) {
LinkSection* commsec;
LinkSectionId comm_lsid;
- /* First sweep computes layout + max alignment. */
for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
LinkSymbol* s = LinkSyms_at(&img->syms, i);
u32 align;
@@ -1449,7 +832,6 @@ static void layout_commons(Linker* l, LinkImage* img) {
bss_cursor += s->size ? s->size : 1u;
}
- /* Append the synthetic NOBITS LinkSection. */
{
u32 new_nsec = img->nsections + 1u;
LinkSection* nsec = (LinkSection*)h->realloc(
@@ -1476,13 +858,12 @@ static void layout_commons(Linker* l, LinkImage* img) {
commsec->sem = SSEM_NOBITS;
img->nsections++;
- /* Second sweep wires each common to the synthetic section. */
for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
LinkSymbol* s = LinkSyms_at(&img->syms, i);
if (s->kind != SK_COMMON || !s->defined) continue;
s->section_id = comm_lsid;
s->vaddr = bss_start + s->value;
- s->kind = SK_OBJ; /* no longer COMMON once placed */
+ s->kind = SK_OBJ;
}
rw_seg->mem_size = bss_cursor - rw_seg->vaddr;
@@ -1490,14 +871,13 @@ static void layout_commons(Linker* l, LinkImage* img) {
}
}
-/* Copy each input section's bytes into its segment buffer. BSS
- * sections contribute no file bytes. */
-static void emit_segment_bytes(Linker* l, LinkImage* img) {
+/* Copy each input section's bytes into its segment buffer. */
+void link_emit_segment_bytes(Linker* l, LinkImage* img) {
u32 j;
for (j = 0; j < img->nsections; ++j) {
LinkSection* ls = &img->sections[j];
ObjBuilder* ob;
- if (ls->input_id == LINK_INPUT_NONE) continue; /* synthetic (e.g. .bss.common) */
+ if (ls->input_id == LINK_INPUT_NONE) continue;
ob = LinkInputs_at(&l->inputs, ls->input_id - 1)->obj;
const Section* s = obj_section_get(ob, ls->obj_section_id);
LinkSegment* seg = &img->segments[ls->segment_id - 1];
@@ -1510,1662 +890,18 @@ static void emit_segment_bytes(Linker* l, LinkImage* img) {
}
}
-/* ---- pass 3: assign symbol vaddrs (now that section vaddrs are final) ----
- *
- * Map per-input ObjSecId -> LinkSectionId on every defined symbol, then
- * compute its final image-relative vaddr. Run after resolve_symbols and
- * layout_sections so both the per-input maps and section vaddrs exist. */
-static void link_symbols_to_sections(Linker* l, LinkImage* img) {
- u32 ii;
- for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
- LinkInput* in = LinkInputs_at(&l->inputs, ii);
- ObjBuilder* ob = in->obj;
- InputMap* m = &img->input_maps[ii];
- ObjSymIter* it;
- ObjSymEntry e;
- /* DSO inputs were skipped in resolve_symbols — their per-input
- * map is unallocated. They contribute no defined LinkSymbols
- * either, so there's nothing to map to a section. */
- if (in->kind == LINK_INPUT_DSO_BYTES) continue;
- it = obj_symiter_new(ob);
- while (obj_symiter_next(it, &e)) {
- LinkSymId lsid = m->sym[e.id];
- LinkSymbol* ls;
- if (lsid == LINK_SYM_NONE) continue;
- ls = LinkSyms_at(&img->syms, lsid - 1);
- if (!ls->defined) continue;
- if (ls->kind == SK_ABS && ls->vaddr != 0) continue;
- if (e.sym->section_id == OBJ_SEC_NONE) continue;
- /* Only update from this input if this is the input that
- * contributed the winning definition. */
- if (ls->input_id != LinkInputs_at(&l->inputs, ii)->id) continue;
- ls->section_id = m->section[e.sym->section_id];
- }
- obj_symiter_free(it);
- }
- /* Now compute vaddrs. */
- {
- u32 i;
- for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
- LinkSymbol* s = LinkSyms_at(&img->syms, i);
- if (s->kind == SK_ABS && s->vaddr != 0) continue;
- if (!s->defined) continue;
- if (s->section_id == LINK_SEC_NONE) continue;
- s->vaddr = img->sections[s->section_id - 1].vaddr + s->value;
- }
- }
- /* Resolve undef-against-global once defs are addressed. */
- {
- u32 i;
- for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
- LinkSymbol* s = LinkSyms_at(&img->syms, i);
- if (s->defined) continue;
- if (s->name == 0) continue;
- {
- LinkSymId hit = symhash_get(&img->globals, s->name);
- if (hit != LINK_SYM_NONE && hit != s->id) {
- LinkSymbol* def = LinkSyms_at(&img->syms, hit - 1);
- if (def->defined) {
- s->section_id = def->section_id;
- s->value = def->value;
- s->vaddr = def->vaddr;
- s->kind = def->kind;
- s->defined = 1;
- }
- }
- }
- }
- }
-}
-
-/* ---- pass 3b: linker-synthesized boundary symbols ---- */
-
-void link_define_boundary(Linker* l, LinkImage* img, const char* name,
- u64 vaddr) {
- emit_boundary_sym(l, img, name, vaddr);
-}
-
-static void emit_boundary_sym(Linker* l, LinkImage* img, const char* name,
- u64 vaddr) {
- Sym sym = boundary_name(l, name);
- LinkSymId id = symhash_get(&img->globals, sym);
- LinkSymbol rec;
- u32 i, n;
- memset(&rec, 0, sizeof(rec));
- rec.name = sym;
- rec.kind = SK_OBJ;
- rec.defined = 1;
- rec.vaddr = vaddr;
- rec.bind = SB_GLOBAL;
- if (id != LINK_SYM_NONE) {
- /* Satisfy any existing undef reference. */
- *LinkSyms_at(&img->syms, id - 1) = rec;
- LinkSyms_at(&img->syms, id - 1)->id = id;
- } else {
- LinkSymId fresh = append_symbol(img, &rec);
- symhash_insert(&img->globals, sym, fresh, &id);
- }
- /* Per-input undef LinkSymbols are stored in their own slots
- * (resolve_symbols never folds undefs into the def's slot). When
- * an emit_boundary_sym call runs after resolve_undefs (e.g.
- * layout_iplt's __start_iplt_pairs / __stop_iplt_pairs), each
- * undef ref already carries a stale vaddr (zero, from a
- * weak-zero resolve, or whatever the prior def held). Walk
- * img->syms by name and re-copy so downstream consumers
- * (layout_got's GOT-slot ABS64 fills, emit_reloc_records) see
- * the new vaddr. Locals never share names with globals so the
- * bind check just guards the unusual case of a local with the
- * same name. */
- n = LinkSyms_count(&img->syms);
- for (i = 0; i < n; ++i) {
- LinkSymbol* s = LinkSyms_at(&img->syms, i);
- if (s->name != sym) continue;
- if (s->id == id) continue;
- if (s->bind == SB_LOCAL) continue;
- s->section_id = LINK_SEC_NONE;
- s->value = 0;
- s->vaddr = vaddr;
- s->kind = SK_OBJ;
- s->defined = 1;
- /* If resolve_undefs previously matched this name as an import
- * from a DSO (e.g. libc.so exports _DYNAMIC for its own image),
- * the linker-supplied definition wins — clear the import marker
- * so apply_all_relocs treats it as a normal defined symbol. */
- s->imported = 0;
- }
-}
-
-static void emit_array_boundaries(Linker* l, LinkImage* img) {
- u32 ii, j;
- /* Per-semantic: track [min_vaddr, max_vaddr+size]. */
- u64 init_start = (u64)-1, init_end = 0;
- u64 fini_start = (u64)-1, fini_end = 0;
- u64 preinit_start = (u64)-1, preinit_end = 0;
-
- for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
- ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
- InputMap* m = &img->input_maps[ii];
- for (j = 1; j < obj_section_count(ob); ++j) {
- const Section* s = obj_section_get(ob, j);
- LinkSectionId ls_id;
- const LinkSection* ls;
- u64 start, end;
- if (!s) continue;
- if (s->sem != SSEM_INIT_ARRAY && s->sem != SSEM_FINI_ARRAY &&
- s->sem != SSEM_PREINIT_ARRAY)
- continue;
- ls_id = m->section[j];
- if (ls_id == LINK_SEC_NONE) continue;
- ls = &img->sections[ls_id - 1];
- start = ls->vaddr;
- end = ls->vaddr + ls->size;
- if (s->sem == SSEM_INIT_ARRAY) {
- if (start < init_start) init_start = start;
- if (end > init_end) init_end = end;
- } else if (s->sem == SSEM_FINI_ARRAY) {
- if (start < fini_start) fini_start = start;
- if (end > fini_end) fini_end = end;
- } else {
- if (start < preinit_start) preinit_start = start;
- if (end > preinit_end) preinit_end = end;
- }
- }
- }
-
- /* Synthetic init/fini/preinit sections (e.g. layout_iplt's
- * .preinit_array entry pointing at __cfree_ifunc_init) carry
- * input_id == LINK_INPUT_NONE and aren't visible through the
- * input_maps loop above; fold them in here so the boundary
- * symbols cover them too. */
- {
- u32 i;
- for (i = 0; i < img->nsections; ++i) {
- const LinkSection* ls = &img->sections[i];
- u64 start, end;
- if (ls->input_id != LINK_INPUT_NONE) continue;
- if (ls->sem != SSEM_INIT_ARRAY && ls->sem != SSEM_FINI_ARRAY &&
- ls->sem != SSEM_PREINIT_ARRAY)
- continue;
- start = ls->vaddr;
- end = ls->vaddr + ls->size;
- if (ls->sem == SSEM_INIT_ARRAY) {
- if (start < init_start) init_start = start;
- if (end > init_end) init_end = end;
- } else if (ls->sem == SSEM_FINI_ARRAY) {
- if (start < fini_start) fini_start = start;
- if (end > fini_end) fini_end = end;
- } else {
- if (start < preinit_start) preinit_start = start;
- if (end > preinit_end) preinit_end = end;
- }
- }
- }
-
- if (init_start == (u64)-1) {
- init_start = 0;
- init_end = 0;
- }
- if (fini_start == (u64)-1) {
- fini_start = 0;
- fini_end = 0;
- }
- if (preinit_start == (u64)-1) {
- preinit_start = 0;
- preinit_end = 0;
- }
-
- emit_boundary_sym(l, img, "__init_array_start", init_start);
- emit_boundary_sym(l, img, "__init_array_end", init_end);
- emit_boundary_sym(l, img, "__fini_array_start", fini_start);
- emit_boundary_sym(l, img, "__fini_array_end", fini_end);
- emit_boundary_sym(l, img, "__preinit_array_start", preinit_start);
- emit_boundary_sym(l, img, "__preinit_array_end", preinit_end);
-}
-
-/* Synthesize TLS boundary symbols so the freestanding _start can size
- * and seed the per-thread block:
- * __tdata_start, __tdata_end : image vaddrs of the .tdata template
- * (memcpy source for the new TLS block).
- * __tbss_size : SK_ABS holding the .tbss byte count
- * (memset target after the .tdata copy).
- * All three are always emitted. When no TLS exists they resolve to
- * zero, which makes the _start TLS prologue a no-op. The .tdata
- * extent is the file portion of the TLS segment; the .tbss extent is
- * the trailing memsz - filesz tail. */
-static void emit_tls_boundaries(Linker* l, LinkImage* img) {
- u64 tdata_start = img->tls_vaddr;
- u64 tdata_end = img->tls_vaddr + img->tls_filesz;
- u64 tbss_size = img->tls_memsz - img->tls_filesz;
- Sym sym_size = pool_intern_cstr(l->c->global, "__tbss_size");
- LinkSymId id;
- LinkSymbol rec;
-
- emit_boundary_sym(l, img, "__tdata_start", tdata_start);
- emit_boundary_sym(l, img, "__tdata_end", tdata_end);
-
- /* __tbss_size is an absolute count, not an address: SK_ABS so
- * shift_image_addresses leaves it alone and the symbol's value
- * IS the size when read as `(size_t)__tbss_size`. */
- id = symhash_get(&img->globals, sym_size);
- memset(&rec, 0, sizeof(rec));
- rec.name = sym_size;
- rec.kind = SK_ABS;
- rec.bind = SB_GLOBAL;
- rec.defined = 1;
- rec.vaddr = tbss_size;
- if (id != LINK_SYM_NONE) {
- *LinkSyms_at(&img->syms, id - 1) = rec;
- LinkSyms_at(&img->syms, id - 1)->id = id;
- } else {
- LinkSymId fresh = append_symbol(img, &rec);
- symhash_insert(&img->globals, sym_size, fresh, &id);
- }
-}
-
-/* ---- pass 3c: __start_<X>/__stop_<X> encoding-section boundaries ----
- *
- * For every undef LinkSymbol whose name is __start_<X>/__stop_<X> with
- * <X> a valid C identifier, find the span of every output LinkSection
- * sourced from an input section named <X>, and resolve the symbol to
- * its low (start) or high (stop) vaddr. Sections that were dropped by
- * GC don't contribute (m->section[j] == LINK_SEC_NONE). */
-static void emit_encoding_section_boundaries(Linker* l, LinkImage* img) {
- u32 i, ii, j;
- for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
- LinkSymbol* sym = LinkSyms_at(&img->syms, i);
- const char* nm;
- size_t namelen, off, ilen;
- int is_start;
- Sym secname;
- u64 lo = (u64)-1;
- u64 hi = 0;
- int found = 0;
- if (sym->defined) continue;
- if (sym->name == 0) continue;
- nm = pool_str(l->c->global, sym->name, &namelen);
- if (!gc_split_start_stop(nm, namelen, &off, &ilen, &is_start)) continue;
- secname = pool_intern(l->c->global, nm + off, ilen);
- for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
- ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
- InputMap* m = &img->input_maps[ii];
- for (j = 1; j < obj_section_count(ob); ++j) {
- const Section* s = obj_section_get(ob, j);
- LinkSectionId ls_id;
- const LinkSection* ls;
- u64 start, end;
- if (!s || s->name != secname) continue;
- ls_id = m->section[j];
- if (ls_id == LINK_SEC_NONE) continue;
- ls = &img->sections[ls_id - 1];
- start = ls->vaddr;
- end = ls->vaddr + ls->size;
- if (start < lo) lo = start;
- if (end > hi) hi = end;
- found = 1;
- }
- }
- if (!found) continue;
- sym->kind = SK_OBJ;
- sym->bind = SB_GLOBAL;
- sym->defined = 1;
- sym->vaddr = is_start ? lo : hi;
- }
-}
-
-/* ---- pass 4: relocation records ---- */
-
-static u8 reloc_width(RelocKind k) {
- switch (k) {
- case R_ABS32:
- case R_REL32:
- case R_PC32:
- case R_GOT32:
- case R_PLT32:
- case R_X64_PLT32:
- case R_X64_32S:
- case R_X64_TPOFF32:
- case R_X64_GOTPCREL:
- case R_X64_GOTPCRELX:
- case R_X64_REX_GOTPCRELX:
- case R_X64_GOTPC32:
- return 4;
- case R_ABS64:
- case R_REL64:
- case R_PC64:
- case R_X64_TPOFF64:
- return 8;
- case R_AARCH64_ABS16:
- case R_AARCH64_PREL16:
- return 2;
- case R_AARCH64_JUMP26:
- case R_AARCH64_CALL26:
- case R_AARCH64_CONDBR19:
- case R_AARCH64_TSTBR14:
- case R_AARCH64_LD_PREL_LO19:
- case R_AARCH64_ADR_PREL_LO21:
- case R_AARCH64_ADR_PREL_PG_HI21:
- case R_AARCH64_ADR_PREL_PG_HI21_NC:
- case R_AARCH64_ADD_ABS_LO12_NC:
- case R_AARCH64_LDST8_ABS_LO12_NC:
- case R_AARCH64_LDST16_ABS_LO12_NC:
- case R_AARCH64_LDST32_ABS_LO12_NC:
- case R_AARCH64_LDST64_ABS_LO12_NC:
- case R_AARCH64_LDST128_ABS_LO12_NC:
- case R_AARCH64_ADR_GOT_PAGE:
- case R_AARCH64_LD64_GOT_LO12_NC:
- case R_AARCH64_TLSLE_ADD_TPREL_HI12:
- case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
- case R_AARCH64_TLVP_LOAD_PAGE21:
- case R_AARCH64_TLVP_LOAD_PAGEOFF12:
- return 4;
- case R_RV_HI20:
- case R_RV_LO12_I:
- case R_RV_LO12_S:
- case R_RV_BRANCH:
- case R_RV_JAL:
- case R_RV_PCREL_HI20:
- case R_RV_PCREL_LO12_I:
- case R_RV_PCREL_LO12_S:
- case R_RV_GOT_HI20:
- case R_RV_TPREL_HI20:
- case R_RV_TPREL_LO12_I:
- case R_RV_TPREL_LO12_S:
- return 4;
- case R_RV_CALL:
- return 8;
- case R_RV_RVC_BRANCH:
- case R_RV_RVC_JUMP:
- return 2;
- /* Marker relocs that don't alter site bytes; width nonzero so the
- * apply path treats them as recognized. */
- case R_RV_RELAX:
- case R_RV_TPREL_ADD:
- return 4;
- /* RISC-V ADD/SUB/SET fixup relocs — modify the section bytes in
- * place. Width is the byte count touched. SUB6/SET6 modify one
- * byte (the low 6 bits) like SET8/SUB8. */
- case R_RV_ADD8:
- case R_RV_SUB8:
- case R_RV_SUB6:
- case R_RV_SET6:
- case R_RV_SET8:
- return 1;
- case R_RV_ADD16:
- case R_RV_SUB16:
- case R_RV_SET16:
- return 2;
- case R_RV_ADD32:
- case R_RV_SUB32:
- case R_RV_SET32:
- return 4;
- case R_RV_ADD64:
- case R_RV_SUB64:
- return 8;
- default:
- return 0;
- }
-}
-
-static int reloc_uses_got(u16 kind) {
- switch (kind) {
- case R_AARCH64_ADR_GOT_PAGE:
- case R_AARCH64_LD64_GOT_LO12_NC:
- case R_X64_GOTPCREL:
- case R_X64_GOTPCRELX:
- case R_X64_REX_GOTPCRELX:
- case R_RV_GOT_HI20:
- return 1;
- default:
- return 0;
- }
-}
-
-/* Forward decls — defined alongside layout_iplt below. */
-static u32 layout_iplt_alloc_segments(LinkImage* img, u32 nseg);
-static u32 layout_iplt_alloc_sections(LinkImage* img, u32 nsec);
-
-/* ---- pass: JIT call stubs ----
- *
- * For the JIT path on AArch64, route every CALL26/JUMP26 against a
- * resolver-supplied or weak-undef symbol (SK_ABS) through a 12-byte
- * stub colocated with .text inside the JIT mapping. The stub is
- * ADRP x16, slot ; LDR x16,[x16,#:lo12:slot] ; BR x16
- * and the slot is an 8-byte GOT entry filled by a per-slot R_ABS64
- * reloc against a synthetic resolver-pointer LinkSymbol (whose vaddr
- * is the original SK_ABS target's vaddr — a host pointer for
- * resolver-supplied symbols, 0 for weak-undef).
- *
- * Rationale: without this routing, CALL26 to a resolver-supplied host
- * function (e.g. libc `printf` from `cfree run`) trips link_reloc's
- * ±128 MiB range check, since the JIT mapping is arbitrarily far from
- * the host VA the resolver returned.
- *
- * The stub_map output is a sparse array indexed by LinkSymId
- * (size = LinkSyms_count(&img->syms)+1 at pass entry; the new stub /
- * slot / resolver_rec LinkSymbols are never themselves looked up
- * through this map). emit_reloc_records consults it to redirect
- * CALL26/JUMP26 targets.
- *
- * Runs after resolve_undefs (SK_ABS is set) and before
- * emit_reloc_records (so the redirect takes effect). Only runs on
- * AArch64 JIT (`!emit_static_exe`); the exe path covers the same
- * shape via PLT (ELF) / stubs (Mach-O).
- *
- * Address-taking via GOT_LOAD still resolves to the original
- * resolver-supplied vaddr (the GOT slot's R_ABS64 against the
- * non-redirected symbol). Address-taking via direct PCREL would land
- * on the stub instead, but clang does not emit non-GOT-routed
- * pointer-to-extern on AArch64. */
-static void layout_jit_call_stubs(Linker* l, LinkImage* img, u32 map_size,
- LinkSymId** stub_map_out) {
- Heap* h = img->heap;
- const LinkArchDesc* arch;
- LinkSymId* stub_map;
- LinkSymId* targets = NULL;
- u32 ntarget = 0, tcap = 0;
- u32 ii, k, i;
- u64 page;
- u64 base_vaddr = 0;
- u64 stubs_vaddr, slots_vaddr;
- u64 stubs_size, slots_size;
- u32 stubs_seg_idx, slots_seg_idx;
- u32 seg_base, sec_base;
- LinkSegment* stubs_seg;
- LinkSegment* slots_seg;
- LinkSection* stubs_sec;
- LinkSection* slots_sec;
- u8* stubs_bytes;
-
- *stub_map_out = NULL;
- if (l->emit_static_exe) return;
- if (l->c->target.arch != CFREE_ARCH_ARM_64) return;
-
- arch = link_arch_desc_for(l->c);
- if (!arch) return;
-
- stub_map = (LinkSymId*)h->alloc(h, sizeof(*stub_map) * map_size,
- _Alignof(LinkSymId));
- if (!stub_map) compiler_panic(img->c, no_loc(), "link: oom on stub map");
- memset(stub_map, 0, sizeof(*stub_map) * map_size);
-
- /* Pass A: collect unique SK_ABS targets of CALL26/JUMP26. */
- for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
- ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
- InputMap* m = &img->input_maps[ii];
- u32 total = obj_reloc_total(ob);
- if (!total) continue;
- for (k = 0; k < total; ++k) {
- const Reloc* r = obj_reloc_at(ob, k);
- const Section* s = obj_section_get(ob, r->section_id);
- LinkSymId target;
- const LinkSymbol* tgt;
- if (!s || !section_kept(s)) continue;
- if (m->section[r->section_id] == LINK_SEC_NONE) continue;
- if (r->kind != R_AARCH64_CALL26 && r->kind != R_AARCH64_JUMP26) continue;
- if (r->sym == OBJ_SYM_NONE || r->sym >= m->nsym) continue;
- target = m->sym[r->sym];
- if (target == LINK_SYM_NONE) continue;
- tgt = LinkSyms_at(&img->syms, target - 1);
- if (!tgt || tgt->kind != SK_ABS) continue;
- if (stub_map[target] != LINK_SYM_NONE) continue;
- if (VEC_GROW(h, targets, tcap, ntarget + 1u))
- compiler_panic(img->c, no_loc(), "link: oom on stub target list");
- targets[ntarget] = target;
- /* Sentinel marker; replaced with the stub's LinkSymId in pass C. */
- stub_map[target] = (LinkSymId)(ntarget + 1u);
- ntarget++;
- }
- }
-
- if (ntarget == 0) {
- if (targets) h->free(h, targets, sizeof(*targets) * tcap);
- h->free(h, stub_map, sizeof(*stub_map) * map_size);
- return;
- }
- /* Reset sentinels — pass C writes real stub LinkSymIds. */
- for (i = 0; i < ntarget; ++i) stub_map[targets[i]] = LINK_SYM_NONE;
-
- /* Pass B: allocate RX stubs segment + RW slots segment. Both land
- * page-aligned after the current image tail; layout_iplt may run
- * before us (IFUNC), and layout_got after — none of those passes
- * shift segments allocated here. */
- page = layout_page_size(l);
- for (i = 0; i < img->nsegments; ++i) {
- u64 end = img->segments[i].vaddr + img->segments[i].mem_size;
- if (end > base_vaddr) base_vaddr = end;
- }
- base_vaddr = ALIGN_UP(base_vaddr, (u64)page);
- stubs_vaddr = base_vaddr;
- stubs_size = (u64)ntarget * (u64)arch->iplt_stub_size;
- slots_vaddr = ALIGN_UP(stubs_vaddr + stubs_size, (u64)page);
- slots_size = (u64)ntarget * 8u;
-
- seg_base = layout_iplt_alloc_segments(img, 2u);
- stubs_seg_idx = seg_base + 0u;
- slots_seg_idx = seg_base + 1u;
-
- stubs_seg = &img->segments[stubs_seg_idx];
- memset(stubs_seg, 0, sizeof(*stubs_seg));
- stubs_seg->id = (LinkSegmentId)(stubs_seg_idx + 1u);
- stubs_seg->flags = SF_ALLOC | SF_EXEC;
- stubs_seg->file_offset = stubs_vaddr;
- stubs_seg->vaddr = stubs_vaddr;
- stubs_seg->file_size = stubs_size;
- stubs_seg->mem_size = stubs_size;
- stubs_seg->align = (u32)page;
- stubs_seg->nsections = 1;
- img->segment_bytes[stubs_seg_idx] = (u8*)h->alloc(h, (size_t)stubs_size, 16);
- img->segment_bytes_cap[stubs_seg_idx] = (size_t)stubs_size;
- if (!img->segment_bytes[stubs_seg_idx])
- compiler_panic(img->c, no_loc(), "link: oom on jit stubs bytes");
- memset(img->segment_bytes[stubs_seg_idx], 0, (size_t)stubs_size);
-
- slots_seg = &img->segments[slots_seg_idx];
- memset(slots_seg, 0, sizeof(*slots_seg));
- slots_seg->id = (LinkSegmentId)(slots_seg_idx + 1u);
- slots_seg->flags = SF_ALLOC | SF_WRITE;
- slots_seg->file_offset = slots_vaddr;
- slots_seg->vaddr = slots_vaddr;
- slots_seg->file_size = slots_size;
- slots_seg->mem_size = slots_size;
- slots_seg->align = (u32)page;
- slots_seg->nsections = 1;
- img->segment_bytes[slots_seg_idx] = (u8*)h->alloc(h, (size_t)slots_size, 16);
- img->segment_bytes_cap[slots_seg_idx] = (size_t)slots_size;
- if (!img->segment_bytes[slots_seg_idx])
- compiler_panic(img->c, no_loc(), "link: oom on jit stub slots bytes");
- memset(img->segment_bytes[slots_seg_idx], 0, (size_t)slots_size);
- img->nsegments += 2u;
-
- sec_base = layout_iplt_alloc_sections(img, 2u);
- stubs_sec = &img->sections[sec_base + 0u];
- memset(stubs_sec, 0, sizeof(*stubs_sec));
- stubs_sec->id = (LinkSectionId)(sec_base + 0u + 1u);
- stubs_sec->input_id = LINK_INPUT_NONE;
- stubs_sec->obj_section_id = OBJ_SEC_NONE;
- stubs_sec->segment_id = stubs_seg->id;
- stubs_sec->input_offset = 0;
- stubs_sec->file_offset = stubs_vaddr;
- stubs_sec->vaddr = stubs_vaddr;
- stubs_sec->size = stubs_size;
- stubs_sec->flags = SF_ALLOC | SF_EXEC;
- stubs_sec->align = 4;
- stubs_sec->name = pool_intern_cstr(l->c->global, ".cfree_jit_call_stubs");
- stubs_sec->sem = SSEM_PROGBITS;
-
- slots_sec = &img->sections[sec_base + 1u];
- memset(slots_sec, 0, sizeof(*slots_sec));
- slots_sec->id = (LinkSectionId)(sec_base + 1u + 1u);
- slots_sec->input_id = LINK_INPUT_NONE;
- slots_sec->obj_section_id = OBJ_SEC_NONE;
- slots_sec->segment_id = slots_seg->id;
- slots_sec->input_offset = 0;
- slots_sec->file_offset = slots_vaddr;
- slots_sec->vaddr = slots_vaddr;
- slots_sec->size = slots_size;
- slots_sec->flags = SF_ALLOC | SF_WRITE;
- slots_sec->align = 8;
- slots_sec->name = pool_intern_cstr(l->c->global, ".cfree_jit_call_slots");
- slots_sec->sem = SSEM_PROGBITS;
- img->nsections += 2u;
-
- /* Pass C: per target, emit stub bytes, synthesize slot + resolver
- * LinkSymbols, and queue the 3 relocs that wire them together. */
- stubs_bytes = img->segment_bytes[stubs_seg_idx];
- for (i = 0; i < ntarget; ++i) {
- LinkSymId orig = targets[i];
- LinkSymbol* orig_sym = LinkSyms_at(&img->syms, orig - 1);
- u64 stub_vaddr = stubs_vaddr + (u64)i * (u64)arch->iplt_stub_size;
- u64 slot_vaddr = slots_vaddr + (u64)i * 8u;
- LinkSymbol slot_rec, resolver_rec, stub_rec;
- LinkSymId slot_id, resolver_id, stub_id;
- LinkArchIPltReloc stub_relocs[2];
- u32 nstub_relocs;
- LinkRelocApply rrec;
- u8* stub_dst = stubs_bytes + (size_t)i * (size_t)arch->iplt_stub_size;
- u32 ri;
-
- nstub_relocs =
- arch->emit_iplt_stub(stub_dst, stub_vaddr, slot_vaddr, stub_relocs);
-
- memset(&slot_rec, 0, sizeof(slot_rec));
- slot_rec.kind = SK_OBJ;
- slot_rec.bind = SB_LOCAL;
- slot_rec.defined = 1;
- slot_rec.section_id = slots_sec->id;
- slot_rec.vaddr = slot_vaddr;
- slot_rec.size = 8;
- slot_id = append_symbol(img, &slot_rec);
-
- /* Preserve the original SK_ABS vaddr (host pointer / NULL) for the
- * slot's R_ABS64. Redirecting the original LinkSymbol would
- * change semantics for non-call references (e.g. data loads). */
- memset(&resolver_rec, 0, sizeof(resolver_rec));
- resolver_rec.kind = SK_ABS;
- resolver_rec.bind = SB_LOCAL;
- resolver_rec.defined = 1;
- resolver_rec.vaddr = orig_sym->vaddr;
- resolver_id = append_symbol(img, &resolver_rec);
-
- memset(&stub_rec, 0, sizeof(stub_rec));
- stub_rec.kind = SK_FUNC;
- stub_rec.bind = SB_LOCAL;
- stub_rec.defined = 1;
- stub_rec.section_id = stubs_sec->id;
- stub_rec.vaddr = stub_vaddr;
- stub_rec.size = arch->iplt_stub_size;
- stub_id = append_symbol(img, &stub_rec);
- stub_map[orig] = stub_id;
-
- /* Stub→slot relocs (ADR_PREL_PG_HI21 + LDST64_ABS_LO12_NC). */
- for (ri = 0; ri < nstub_relocs; ++ri) {
- memset(&rrec, 0, sizeof(rrec));
- rrec.input_id = LINK_INPUT_NONE;
- rrec.section_id = OBJ_SEC_NONE;
- rrec.link_section_id = stubs_sec->id;
- rrec.offset = (u32)(i * arch->iplt_stub_size) +
- stub_relocs[ri].offset_in_stub;
- rrec.width = stub_relocs[ri].width;
- rrec.write_vaddr = stub_vaddr + stub_relocs[ri].offset_in_stub;
- rrec.write_file_offset = rrec.write_vaddr;
- rrec.kind = stub_relocs[ri].kind;
- rrec.target = slot_id;
- rrec.addend = 0;
- *append_reloc_slot(img) = rrec;
- }
-
- /* Slot R_ABS64 against resolver_rec (preserves original vaddr). */
- memset(&rrec, 0, sizeof(rrec));
- rrec.input_id = LINK_INPUT_NONE;
- rrec.section_id = OBJ_SEC_NONE;
- rrec.link_section_id = slots_sec->id;
- rrec.offset = (u32)(i * 8u);
- rrec.width = 8;
- rrec.write_vaddr = slot_vaddr;
- rrec.write_file_offset = slot_vaddr;
- rrec.kind = R_ABS64;
- rrec.target = resolver_id;
- rrec.addend = 0;
- *append_reloc_slot(img) = rrec;
- }
-
- if (targets) h->free(h, targets, sizeof(*targets) * tcap);
- *stub_map_out = stub_map;
-}
-
-static void emit_reloc_records(Linker* l, LinkImage* img,
- const LinkSymId* got_map,
- const LinkSymId* stub_map) {
- u32 ii;
- for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
- ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
- InputMap* m = &img->input_maps[ii];
- u32 total = obj_reloc_total(ob);
- u32 k;
- if (total == 0) continue;
- for (k = 0; k < total; ++k) {
- const Reloc* r = obj_reloc_at(ob, k);
- const Section* s = obj_section_get(ob, r->section_id);
- LinkSymId target;
- LinkSection* ls;
- LinkRelocApply rec;
- if (!s || !section_kept(s)) continue;
- /* Skip relocs whose containing section was GC'd. */
- if (m->section[r->section_id] == LINK_SEC_NONE) continue;
- /* RISC-V marker relocs (RELAX, TPREL_ADD, ALIGN) reference no
- * symbol — they annotate the prior reloc for relaxation, TLS
- * thread-pointer ADD folding, or alignment-aware code shrinking.
- * We don't relax, so drop them entirely. */
- if (r->kind == R_RV_RELAX || r->kind == R_RV_TPREL_ADD ||
- r->kind == R_RV_ALIGN)
- continue;
- if (r->sym == OBJ_SYM_NONE || r->sym >= m->nsym)
- compiler_panic(l->c, no_loc(), "link: reloc references unknown symbol");
- target = m->sym[r->sym];
- if (target == LINK_SYM_NONE)
- compiler_panic(l->c, no_loc(),
- "link: reloc references unmapped symbol");
- /* GOT-based relocs target the synthetic .got slot, not the
- * symbol itself. The slot is filled by a per-slot R_ABS64
- * reloc emitted by layout_got. */
- if (got_map && reloc_uses_got(r->kind)) {
- LinkSymId slot = got_map[target];
- if (slot == LINK_SYM_NONE)
- compiler_panic(l->c, no_loc(), "link: GOT slot missing for symbol");
- target = slot;
- }
- /* JIT path: CALL26/JUMP26 against a resolver-supplied (or
- * weak-undef) SK_ABS target is routed through a per-target stub
- * synthesized by layout_jit_call_stubs. The stub is colocated
- * with .text inside the JIT mapping so the branch displacement
- * fits ±128 MiB even when the real target is a host pointer
- * arbitrarily far away. stub_map is sparse — only entries for
- * targets a CALL26/JUMP26 was actually emitted against are
- * populated. */
- if (stub_map && (r->kind == R_AARCH64_CALL26 ||
- r->kind == R_AARCH64_JUMP26)) {
- LinkSymId stub = stub_map[target];
- if (stub != LINK_SYM_NONE) target = stub;
- }
- ls = &img->sections[m->section[r->section_id] - 1];
- memset(&rec, 0, sizeof(rec));
- rec.input_id = LinkInputs_at(&l->inputs, ii)->id;
- rec.section_id = r->section_id;
- rec.link_section_id = ls->id;
- rec.offset = r->offset;
- rec.width = reloc_width((RelocKind)r->kind);
- rec.write_vaddr = ls->vaddr + r->offset;
- rec.write_file_offset = ls->file_offset + r->offset;
- rec.kind = (RelocKind)r->kind;
- rec.target = target;
- rec.addend = r->addend;
- if (rec.width == 0)
- compiler_panic(l->c, no_loc(), "link: unsupported reloc kind %u",
- (unsigned)r->kind);
- *append_reloc_slot(img) = rec;
- }
- }
-}
-
-/* ---- pass 3c: GOT layout ----
- *
- * Static-PIC GOT for cases where clang emits R_AARCH64_ADR_GOT_PAGE +
- * R_AARCH64_LD64_GOT_LO12_NC (typical for weak-extern references). We
- * append a fresh RW segment carrying one 8-byte slot per unique target
- * symbol, synthesize a LinkSymbol per slot (so emit_reloc_records can
- * redirect the GOT-page/LO12 reloc to the slot), and emit a per-slot
- * R_ABS64 reloc that fills the slot with the symbol's resolved runtime
- * vaddr at apply time. Weak-undef targets stay at vaddr 0 so the slot
- * carries NULL.
- *
- * The returned `got_map_out` is a sparse array of size
- * (LinkSyms_count(&img->syms)+1) indexed by LinkSymId, holding the slot's
- * synthetic LinkSymId (or LINK_SYM_NONE for symbols that don't need a slot).
- * Caller frees. */
-static void layout_got(Linker* l, LinkImage* img, u32 map_size,
- LinkSymId** got_map_out) {
- Heap* h = img->heap;
- LinkSymId* got_map;
- LinkSymId* slot_targets = NULL;
- u32 slot_cap = 0;
- u32 nslot = 0;
- u32 ii, j, k;
- u64 page;
- u64 base_vaddr = 0;
- u64 got_size;
- LinkSegment* gotseg;
- LinkSection* gotsec;
- u32 gotseg_idx;
- u32 si;
-
- *got_map_out = NULL;
-
- /* map_size is the caller's pre-pass symbol count (+ 1 for the 1-based
- * LinkSymId space). Synthetic syms appended below are never indexed
- * through got_map, so the map is correctly sized despite further
- * growth of img->syms. */
- got_map = (LinkSymId*)h->alloc(h, sizeof(*got_map) * map_size,
- _Alignof(LinkSymId));
- if (!got_map) compiler_panic(img->c, no_loc(), "link: oom on got map");
- memset(got_map, 0, sizeof(*got_map) * map_size);
-
- /* Pass A: scan input relocs for GOT-using kinds. */
-
- for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
- ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
- InputMap* m = &img->input_maps[ii];
- u32 total = obj_reloc_total(ob);
- if (!total) continue;
- for (k = 0; k < total; ++k) {
- const Reloc* r = obj_reloc_at(ob, k);
- const Section* s = obj_section_get(ob, r->section_id);
- LinkSymId target;
- if (!s || !section_kept(s)) continue;
- if (m->section[r->section_id] == LINK_SEC_NONE) continue;
- if (!reloc_uses_got(r->kind)) continue;
- if (r->sym == OBJ_SYM_NONE || r->sym >= m->nsym) continue;
- target = m->sym[r->sym];
- if (target == LINK_SYM_NONE) continue;
- if (got_map[target] != LINK_SYM_NONE) continue;
- if (VEC_GROW(h, slot_targets, slot_cap, nslot + 1u))
- compiler_panic(img->c, no_loc(), "link: oom on got slot list");
- slot_targets[nslot] = target;
- /* Mark sentinel; replaced with real slot LinkSymId below. */
- got_map[target] = (LinkSymId)(nslot + 1u);
- nslot++;
- }
- }
-
- if (nslot == 0) {
- if (slot_targets)
- h->free(h, slot_targets, sizeof(*slot_targets) * slot_cap);
- h->free(h, got_map, sizeof(*got_map) * map_size);
- return;
- }
-
- /* Reset got_map markers — we'll fill in real slot ids in pass C. */
- for (si = 0; si < nslot; ++si) got_map[slot_targets[si]] = LINK_SYM_NONE;
-
- /* Pass B: append a new RW segment for .got, page-aligned after the
- * existing segment span. */
- page = layout_page_size(l);
- for (j = 0; j < img->nsegments; ++j) {
- u64 end = img->segments[j].vaddr + img->segments[j].mem_size;
- if (end > base_vaddr) base_vaddr = end;
- }
- base_vaddr = ALIGN_UP(base_vaddr, (u64)(page));
- got_size = (u64)nslot * 8u;
-
- {
- u32 new_nseg = img->nsegments + 1u;
- LinkSegment* nsegs = (LinkSegment*)h->realloc(
- h, img->segments, sizeof(*img->segments) * img->nsegments,
- sizeof(*img->segments) * new_nseg, _Alignof(LinkSegment));
- u8** nsbufs = (u8**)h->realloc(
- h, img->segment_bytes, sizeof(*img->segment_bytes) * img->nsegments,
- sizeof(*img->segment_bytes) * new_nseg, _Alignof(u8*));
- size_t* nscaps = (size_t*)h->realloc(
- h, img->segment_bytes_cap,
- sizeof(*img->segment_bytes_cap) * img->nsegments,
- sizeof(*img->segment_bytes_cap) * new_nseg, _Alignof(size_t));
- if (!nsegs || !nsbufs || !nscaps)
- compiler_panic(img->c, no_loc(), "link: oom on got segment");
- img->segments = nsegs;
- img->segment_bytes = nsbufs;
- img->segment_bytes_cap = nscaps;
- }
-
- gotseg_idx = img->nsegments;
- gotseg = &img->segments[gotseg_idx];
- memset(gotseg, 0, sizeof(*gotseg));
- gotseg->id = (LinkSegmentId)(gotseg_idx + 1u);
- gotseg->flags = SF_ALLOC | SF_WRITE;
- gotseg->file_offset = base_vaddr;
- gotseg->vaddr = base_vaddr;
- gotseg->file_size = got_size;
- gotseg->mem_size = got_size;
- gotseg->align = (u32)page;
- gotseg->nsections = 1;
-
- img->segment_bytes[gotseg_idx] = (u8*)h->alloc(h, (size_t)got_size, 16);
- img->segment_bytes_cap[gotseg_idx] = (size_t)got_size;
- if (!img->segment_bytes[gotseg_idx])
- compiler_panic(img->c, no_loc(), "link: oom on got bytes");
- memset(img->segment_bytes[gotseg_idx], 0, (size_t)got_size);
- img->nsegments++;
-
- /* Pass C: append the synthetic .got LinkSection. */
- {
- u32 new_nsec = img->nsections + 1u;
- LinkSection* nsections = (LinkSection*)h->realloc(
- h, img->sections, sizeof(*img->sections) * img->nsections,
- sizeof(*img->sections) * new_nsec, _Alignof(LinkSection));
- if (!nsections)
- compiler_panic(img->c, no_loc(), "link: oom on got section");
- img->sections = nsections;
- }
- gotsec = &img->sections[img->nsections];
- memset(gotsec, 0, sizeof(*gotsec));
- gotsec->id = (LinkSectionId)(img->nsections + 1u);
- gotsec->input_id = LINK_INPUT_NONE;
- gotsec->obj_section_id = OBJ_SEC_NONE;
- gotsec->segment_id = gotseg->id;
- gotsec->input_offset = 0;
- gotsec->file_offset = base_vaddr;
- gotsec->vaddr = base_vaddr;
- gotsec->size = got_size;
- gotsec->flags = SF_ALLOC | SF_WRITE;
- gotsec->align = 8;
- gotsec->name = pool_intern_cstr(img->c->global, ".got");
- gotsec->sem = SSEM_PROGBITS;
- img->nsections++;
-
- /* Pass D: per slot, synthesize a LinkSymbol and emit the R_ABS64
- * reloc that fills it at apply time. */
- for (si = 0; si < nslot; ++si) {
- LinkSymId orig = slot_targets[si];
- u64 slot_vaddr = base_vaddr + (u64)si * 8u;
- LinkSymbol sym_rec;
- LinkRelocApply rrec;
- LinkSymId slot_id;
-
- memset(&sym_rec, 0, sizeof(sym_rec));
- sym_rec.name = 0;
- sym_rec.kind = SK_OBJ;
- sym_rec.bind = SB_LOCAL;
- sym_rec.defined = 1;
- sym_rec.section_id = gotsec->id;
- sym_rec.vaddr = slot_vaddr;
- sym_rec.size = 8;
- slot_id = append_symbol(img, &sym_rec);
- got_map[orig] = slot_id;
-
- memset(&rrec, 0, sizeof(rrec));
- rrec.input_id = LINK_INPUT_NONE;
- rrec.section_id = OBJ_SEC_NONE;
- rrec.link_section_id = gotsec->id;
- rrec.offset = (u32)(si * 8u);
- rrec.width = 8;
- rrec.write_vaddr = slot_vaddr;
- rrec.write_file_offset = base_vaddr + (u64)si * 8u;
- rrec.kind = R_ABS64;
- rrec.target = orig;
- rrec.addend = 0;
- *append_reloc_slot(img) = rrec;
- }
-
- if (slot_targets) h->free(h, slot_targets, sizeof(*slot_targets) * slot_cap);
-
- *got_map_out = got_map;
-}
-
-/* ---- pass 3d: STT_GNU_IFUNC trampoline (.iplt + .igot.plt + .iplt.pairs) --
- *
- * Per defined SK_IFUNC symbol we synthesize:
- * - A 12-byte stub in a fresh RX segment (.iplt): three AArch64
- * instructions that load an 8-byte function pointer and tail-call
- * it. Encoded as ADRP x16 / LDR x16,[x16] / BR x16 with the
- * ADR_PREL_PG_HI21 + LDST64_ABS_LO12_NC immediates left zero —
- * the existing reloc machinery patches them against a synthetic
- * LinkSymbol whose vaddr is the matching slot.
- * - An 8-byte slot in a fresh RW segment (.igot.plt), zero-initialized.
- * - A 16-byte (resolver_ptr, slot_ptr) entry in a parallel RW
- * section .iplt.pairs (also page-aligned segment for cleanliness),
- * filled at apply time via two R_ABS64 relocs. The boundary
- * symbols __start_iplt_pairs / __stop_iplt_pairs cover the span
- * so the rt member ifunc_init.c can iterate it.
- *
- * The IFUNC LinkSymbol's vaddr is then redirected to the stub. The
- * legacy in-image img->iplt_pairs[] table is also populated so the
- * JIT path's pre-resolver can call each resolver and store its
- * return value into the slot's runtime address — that path doesn't
- * use the .iplt.pairs data section.
- *
- * When emit_static_exe is set (cfree_link_exe path), an additional
- * 8-byte SSEM_PREINIT_ARRAY section is synthesized that holds one
- * R_ABS64 reloc against __cfree_ifunc_init. Preinit runs strictly
- * before .init_array, so user ctors that call IFUNCs see their
- * .igot.plt slots already filled.
- *
- * Invariant: runs after link_symbols_to_sections so the resolver's
- * vaddr is final; before emit_array_boundaries so the synthetic
- * .init_array entry contributes to __init_array_start/end; before
- * resolve_undefs so cross-TU undef references see the post-redirect
- * (stub) vaddr. */
-
-static u32 layout_iplt_alloc_segments(LinkImage* img, u32 nseg) {
- Heap* h = img->heap;
- u32 base = img->nsegments;
- u32 new_nseg = base + nseg;
- LinkSegment* nsegs = (LinkSegment*)h->realloc(
- h, img->segments, sizeof(*img->segments) * img->nsegments,
- sizeof(*img->segments) * new_nseg, _Alignof(LinkSegment));
- u8** nsbufs = (u8**)h->realloc(
- h, img->segment_bytes, sizeof(*img->segment_bytes) * img->nsegments,
- sizeof(*img->segment_bytes) * new_nseg, _Alignof(u8*));
- size_t* nscaps = (size_t*)h->realloc(
- h, img->segment_bytes_cap,
- sizeof(*img->segment_bytes_cap) * img->nsegments,
- sizeof(*img->segment_bytes_cap) * new_nseg, _Alignof(size_t));
- if (!nsegs || !nsbufs || !nscaps)
- compiler_panic(img->c, no_loc(), "link: oom on iplt segments");
- img->segments = nsegs;
- img->segment_bytes = nsbufs;
- img->segment_bytes_cap = nscaps;
- /* Caller fills slots [base..base+nseg). */
- return base;
-}
-
-static u32 layout_iplt_alloc_sections(LinkImage* img, u32 nsec) {
- Heap* h = img->heap;
- u32 base = img->nsections;
- u32 new_nsec = base + nsec;
- LinkSection* nsections = (LinkSection*)h->realloc(
- h, img->sections, sizeof(*img->sections) * img->nsections,
- sizeof(*img->sections) * new_nsec, _Alignof(LinkSection));
- if (!nsections)
- compiler_panic(img->c, no_loc(), "link: oom on iplt sections");
- img->sections = nsections;
- return base;
-}
-
-static void layout_iplt(Linker* l, LinkImage* img) {
- Heap* h = img->heap;
- u32 i;
- u32 nifunc = 0;
- u64 page;
- u64 base_vaddr = 0;
- u64 iplt_vaddr, igot_vaddr, pairs_vaddr;
- u64 iplt_size, igot_size, pairs_size;
- u64 init_vaddr = 0, init_size = 0;
- u32 iplt_seg_idx, igot_seg_idx, pairs_seg_idx;
- u32 init_seg_idx = 0;
- u32 seg_base, sec_base;
- LinkSegment* iplt_seg;
- LinkSegment* igot_seg;
- LinkSegment* pairs_seg;
- LinkSegment* init_seg = NULL;
- LinkSection* iplt_sec;
- LinkSection* igot_sec;
- LinkSection* pairs_sec;
- LinkSection* init_sec = NULL;
- u8* iplt_bytes;
- u32 slot_idx;
- int emit_init_array = l->emit_static_exe;
- LinkSymId ifunc_init_sym = LINK_SYM_NONE;
- Sym ifunc_init_name = 0;
- Sym pairs_section_name;
- Sym init_section_name;
- const LinkArchDesc* arch = link_arch_desc_for(l->c);
- if (!arch)
- compiler_panic(img->c, no_loc(),
- "link: layout_iplt: no arch descriptor for arch %u",
- (u32)l->c->target.arch);
-
- /* Pass A: count canonical IFUNC defs. resolve_undefs copies
- * the def's kind into each cross-TU undef LinkSymbol of the
- * same name, so we'd over-count without the symhash_get check
- * (matches the dedup in pass B). */
- for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
- const LinkSymbol* s = LinkSyms_at(&img->syms, i);
- if (s->kind != SK_IFUNC || !s->defined) continue;
- if (s->name != 0) {
- LinkSymId canonical = symhash_get(&img->globals, s->name);
- if (canonical != LINK_SYM_NONE && canonical != s->id) continue;
- }
- ++nifunc;
- }
- if (nifunc == 0) return;
-
- page = layout_page_size(l);
-
- /* Pick a base vaddr after every existing segment. */
- for (i = 0; i < img->nsegments; ++i) {
- u64 end = img->segments[i].vaddr + img->segments[i].mem_size;
- if (end > base_vaddr) base_vaddr = end;
- }
-
- base_vaddr = ALIGN_UP(base_vaddr, (u64)(page));
- iplt_vaddr = base_vaddr;
- iplt_size = (u64)nifunc * (u64)arch->iplt_stub_size;
- igot_vaddr = ALIGN_UP(iplt_vaddr + iplt_size, (u64)(page));
- igot_size = (u64)nifunc * 8u;
- pairs_vaddr = ALIGN_UP(igot_vaddr + igot_size, (u64)(page));
- pairs_size = (u64)nifunc * 16u;
-
- /* When emitting a static ET_EXEC, locate (or fail-late on) the
- * __cfree_ifunc_init symbol now and reserve a 1-entry
- * .init_array section right after .iplt.pairs in its own
- * page-aligned RW segment. The lookup must succeed: archive
- * pre-seeding in link_ingest_archives ensured the rt member is
- * pulled when any input defines an IFUNC. */
- if (emit_init_array) {
- ifunc_init_name = pool_intern_cstr(l->c->global, "__cfree_ifunc_init");
- ifunc_init_sym = symhash_get(&img->globals, ifunc_init_name);
- if (ifunc_init_sym == LINK_SYM_NONE ||
- !LinkSyms_at(&img->syms, ifunc_init_sym - 1)->defined) {
- compiler_panic(img->c, no_loc(),
- "link: STT_GNU_IFUNC requires '__cfree_ifunc_init' "
- "to be defined (link in libcfree_rt.a or provide "
- "your own implementation)");
- }
- init_vaddr = ALIGN_UP(pairs_vaddr + pairs_size, (u64)(page));
- init_size = 8u;
- }
-
- /* Allocate segments: [iplt RX, igot RW, pairs RW] + optional [init RW]. */
- {
- u32 nseg = emit_init_array ? 4u : 3u;
- seg_base = layout_iplt_alloc_segments(img, nseg);
- }
- iplt_seg_idx = seg_base + 0u;
- igot_seg_idx = seg_base + 1u;
- pairs_seg_idx = seg_base + 2u;
- if (emit_init_array) init_seg_idx = seg_base + 3u;
-
- iplt_seg = &img->segments[iplt_seg_idx];
- memset(iplt_seg, 0, sizeof(*iplt_seg));
- iplt_seg->id = (LinkSegmentId)(iplt_seg_idx + 1u);
- iplt_seg->flags = SF_ALLOC | SF_EXEC;
- iplt_seg->file_offset = iplt_vaddr;
- iplt_seg->vaddr = iplt_vaddr;
- iplt_seg->file_size = iplt_size;
- iplt_seg->mem_size = iplt_size;
- iplt_seg->align = (u32)page;
- iplt_seg->nsections = 1;
- img->segment_bytes[iplt_seg_idx] = (u8*)h->alloc(h, (size_t)iplt_size, 16);
- img->segment_bytes_cap[iplt_seg_idx] = (size_t)iplt_size;
- if (!img->segment_bytes[iplt_seg_idx])
- compiler_panic(img->c, no_loc(), "link: oom on iplt bytes");
- memset(img->segment_bytes[iplt_seg_idx], 0, (size_t)iplt_size);
-
- igot_seg = &img->segments[igot_seg_idx];
- memset(igot_seg, 0, sizeof(*igot_seg));
- igot_seg->id = (LinkSegmentId)(igot_seg_idx + 1u);
- igot_seg->flags = SF_ALLOC | SF_WRITE;
- igot_seg->file_offset = igot_vaddr;
- igot_seg->vaddr = igot_vaddr;
- igot_seg->file_size = igot_size;
- igot_seg->mem_size = igot_size;
- igot_seg->align = (u32)page;
- igot_seg->nsections = 1;
- img->segment_bytes[igot_seg_idx] = (u8*)h->alloc(h, (size_t)igot_size, 16);
- img->segment_bytes_cap[igot_seg_idx] = (size_t)igot_size;
- if (!img->segment_bytes[igot_seg_idx])
- compiler_panic(img->c, no_loc(), "link: oom on igot bytes");
- memset(img->segment_bytes[igot_seg_idx], 0, (size_t)igot_size);
-
- pairs_seg = &img->segments[pairs_seg_idx];
- memset(pairs_seg, 0, sizeof(*pairs_seg));
- pairs_seg->id = (LinkSegmentId)(pairs_seg_idx + 1u);
- pairs_seg->flags = SF_ALLOC | SF_WRITE;
- pairs_seg->file_offset = pairs_vaddr;
- pairs_seg->vaddr = pairs_vaddr;
- pairs_seg->file_size = pairs_size;
- pairs_seg->mem_size = pairs_size;
- pairs_seg->align = (u32)page;
- pairs_seg->nsections = 1;
- img->segment_bytes[pairs_seg_idx] = (u8*)h->alloc(h, (size_t)pairs_size, 16);
- img->segment_bytes_cap[pairs_seg_idx] = (size_t)pairs_size;
- if (!img->segment_bytes[pairs_seg_idx])
- compiler_panic(img->c, no_loc(), "link: oom on iplt.pairs bytes");
- memset(img->segment_bytes[pairs_seg_idx], 0, (size_t)pairs_size);
-
- if (emit_init_array) {
- init_seg = &img->segments[init_seg_idx];
- memset(init_seg, 0, sizeof(*init_seg));
- init_seg->id = (LinkSegmentId)(init_seg_idx + 1u);
- init_seg->flags = SF_ALLOC | SF_WRITE;
- init_seg->file_offset = init_vaddr;
- init_seg->vaddr = init_vaddr;
- init_seg->file_size = init_size;
- init_seg->mem_size = init_size;
- init_seg->align = (u32)page;
- init_seg->nsections = 1;
- img->segment_bytes[init_seg_idx] = (u8*)h->alloc(h, (size_t)init_size, 16);
- img->segment_bytes_cap[init_seg_idx] = (size_t)init_size;
- if (!img->segment_bytes[init_seg_idx])
- compiler_panic(img->c, no_loc(), "link: oom on iplt init_array bytes");
- memset(img->segment_bytes[init_seg_idx], 0, (size_t)init_size);
- }
- img->nsegments += emit_init_array ? 4u : 3u;
-
- /* Allocate sections: same shape, one section per segment. */
- {
- u32 nsec = emit_init_array ? 4u : 3u;
- sec_base = layout_iplt_alloc_sections(img, nsec);
- }
-
- pairs_section_name = pool_intern_cstr(l->c->global, ".iplt.pairs");
- init_section_name = obj_secname_preinit_array(l->c);
-
- iplt_sec = &img->sections[sec_base + 0u];
- memset(iplt_sec, 0, sizeof(*iplt_sec));
- iplt_sec->id = (LinkSectionId)(sec_base + 0u + 1u);
- iplt_sec->input_id = LINK_INPUT_NONE;
- iplt_sec->obj_section_id = OBJ_SEC_NONE;
- iplt_sec->segment_id = iplt_seg->id;
- iplt_sec->input_offset = 0;
- iplt_sec->file_offset = iplt_vaddr;
- iplt_sec->vaddr = iplt_vaddr;
- iplt_sec->size = iplt_size;
- iplt_sec->flags = SF_ALLOC | SF_EXEC;
- iplt_sec->align = 4;
- iplt_sec->name = pool_intern_cstr(l->c->global, ".iplt");
- iplt_sec->sem = SSEM_PROGBITS;
-
- igot_sec = &img->sections[sec_base + 1u];
- memset(igot_sec, 0, sizeof(*igot_sec));
- igot_sec->id = (LinkSectionId)(sec_base + 1u + 1u);
- igot_sec->input_id = LINK_INPUT_NONE;
- igot_sec->obj_section_id = OBJ_SEC_NONE;
- igot_sec->segment_id = igot_seg->id;
- igot_sec->input_offset = 0;
- igot_sec->file_offset = igot_vaddr;
- igot_sec->vaddr = igot_vaddr;
- igot_sec->size = igot_size;
- igot_sec->flags = SF_ALLOC | SF_WRITE;
- igot_sec->align = 8;
- igot_sec->name = pool_intern_cstr(l->c->global, ".igot.plt");
- igot_sec->sem = SSEM_PROGBITS;
-
- pairs_sec = &img->sections[sec_base + 2u];
- memset(pairs_sec, 0, sizeof(*pairs_sec));
- pairs_sec->id = (LinkSectionId)(sec_base + 2u + 1u);
- pairs_sec->input_id = LINK_INPUT_NONE;
- pairs_sec->obj_section_id = OBJ_SEC_NONE;
- pairs_sec->segment_id = pairs_seg->id;
- pairs_sec->input_offset = 0;
- pairs_sec->file_offset = pairs_vaddr;
- pairs_sec->vaddr = pairs_vaddr;
- pairs_sec->size = pairs_size;
- pairs_sec->flags = SF_ALLOC | SF_WRITE;
- pairs_sec->align = 8;
- pairs_sec->name = pairs_section_name;
- pairs_sec->sem = SSEM_PROGBITS;
-
- if (emit_init_array) {
- init_sec = &img->sections[sec_base + 3u];
- memset(init_sec, 0, sizeof(*init_sec));
- init_sec->id = (LinkSectionId)(sec_base + 3u + 1u);
- init_sec->input_id = LINK_INPUT_NONE;
- init_sec->obj_section_id = OBJ_SEC_NONE;
- init_sec->segment_id = init_seg->id;
- init_sec->input_offset = 0;
- init_sec->file_offset = init_vaddr;
- init_sec->vaddr = init_vaddr;
- init_sec->size = init_size;
- init_sec->flags = SF_ALLOC | SF_WRITE;
- init_sec->align = 8;
- init_sec->name = init_section_name;
- init_sec->sem = SSEM_PREINIT_ARRAY;
- }
- img->nsections += emit_init_array ? 4u : 3u;
-
- /* __start_iplt_pairs / __stop_iplt_pairs span the .iplt.pairs
- * section (start inclusive, end exclusive). The rt member's
- * __cfree_ifunc_init iterates this span. */
- emit_boundary_sym(l, img, "__start_iplt_pairs", pairs_vaddr);
- emit_boundary_sym(l, img, "__stop_iplt_pairs", pairs_vaddr + pairs_size);
-
- /* Allocate the in-image iplt_pairs table (resolver_vaddr,
- * slot_vaddr) per IFUNC, in the same iteration order as the stub
- * layout. Used by the JIT path's pre-resolution; the ELF emit
- * path uses the .iplt.pairs data section instead. */
- img->iplt_pairs = (u64*)h->alloc(
- h, sizeof(*img->iplt_pairs) * 2u * (size_t)nifunc, _Alignof(u64));
- if (!img->iplt_pairs)
- compiler_panic(img->c, no_loc(), "link: oom on iplt pairs");
- img->niplt = nifunc;
-
- iplt_bytes = img->segment_bytes[iplt_seg_idx];
- slot_idx = 0;
-
- /* Pass B: per IFUNC def, write the stub bytes, synthesize a
- * slot LinkSymbol + a synthetic resolver-pointer LinkSymbol, and
- * emit the relocs. The IFUNC LinkSymbol is then redirected to
- * the stub so external references call into the trampoline
- * instead of the resolver directly.
- *
- * Per-name dedup: resolve_undefs copies the def's kind into each
- * undef LinkSymbol of the same name, so a cross-TU undef of an
- * IFUNC also reads as SK_IFUNC + defined here. Skip those by
- * keeping only the canonical entry from img->globals — undef
- * copies pick up the post-redirect fields in the propagation
- * pass at the end of this function. */
- for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
- LinkSymbol* s = LinkSyms_at(&img->syms, i);
- u64 stub_vaddr;
- u64 slot_vaddr;
- u64 pair_vaddr;
- u64 resolver_vaddr;
- LinkSectionId resolver_section;
- u64 resolver_value;
- LinkSymbol slot_rec;
- LinkSymbol resolver_rec;
- LinkSymId slot_id;
- LinkSymId resolver_id;
- LinkRelocApply rrec;
- u8* stub_dst;
-
- if (s->kind != SK_IFUNC || !s->defined) continue;
- if (s->name != 0) {
- LinkSymId canonical = symhash_get(&img->globals, s->name);
- if (canonical != LINK_SYM_NONE && canonical != s->id) continue;
- }
-
- stub_vaddr = iplt_vaddr + (u64)slot_idx * 12u;
- slot_vaddr = igot_vaddr + (u64)slot_idx * 8u;
- pair_vaddr = pairs_vaddr + (u64)slot_idx * 16u;
- resolver_vaddr = s->vaddr;
- resolver_section = s->section_id;
- resolver_value = s->value;
-
- img->iplt_pairs[2u * slot_idx + 0] = resolver_vaddr;
- img->iplt_pairs[2u * slot_idx + 1] = slot_vaddr;
-
- /* Stub bytes and any apply-time relocs are arch-specific; the
- * descriptor's emit_iplt_stub returns the relocs (offset / width /
- * kind within the stub) and the caller fills in the section /
- * vaddr fields below. Arches that can encode the stub→slot
- * displacement inline (x64, rv64) report 0 relocs; aa64 reports 2
- * (ADR_PREL_PG_HI21 + LDST64_ABS_LO12_NC). */
- stub_dst = iplt_bytes + (size_t)slot_idx * (size_t)arch->iplt_stub_size;
- LinkArchIPltReloc iplt_relocs[2];
- u32 niplt_relocs =
- arch->emit_iplt_stub(stub_dst, stub_vaddr, slot_vaddr, iplt_relocs);
-
- /* Synthetic local symbol for the .igot.plt slot. */
- memset(&slot_rec, 0, sizeof(slot_rec));
- slot_rec.name = 0;
- slot_rec.kind = SK_OBJ;
- slot_rec.bind = SB_LOCAL;
- slot_rec.defined = 1;
- slot_rec.section_id = igot_sec->id;
- slot_rec.vaddr = slot_vaddr;
- slot_rec.size = 8;
- slot_id = append_symbol(img, &slot_rec);
-
- /* Synthetic local symbol for the resolver address (captured
- * pre-redirect so the .iplt.pairs ABS64 reloc can target
- * something whose vaddr shifts with the image base alongside
- * the section it lives in). */
- memset(&resolver_rec, 0, sizeof(resolver_rec));
- resolver_rec.name = 0;
- resolver_rec.kind = SK_FUNC;
- resolver_rec.bind = SB_LOCAL;
- resolver_rec.defined = 1;
- resolver_rec.section_id = resolver_section;
- resolver_rec.value = resolver_value;
- resolver_rec.vaddr = resolver_vaddr;
- resolver_rec.size = 0;
- resolver_id = append_symbol(img, &resolver_rec);
-
- /* Apply-time fixups for arches that can't encode the stub→slot
- * displacement inline. The arch reported (offset_in_stub, width,
- * kind) for each; everything else (section, target, vaddrs) is
- * the linker's job. */
- {
- u32 ri;
- for (ri = 0; ri < niplt_relocs; ++ri) {
- memset(&rrec, 0, sizeof(rrec));
- rrec.input_id = LINK_INPUT_NONE;
- rrec.section_id = OBJ_SEC_NONE;
- rrec.link_section_id = iplt_sec->id;
- rrec.offset = (u32)(slot_idx * arch->iplt_stub_size) +
- iplt_relocs[ri].offset_in_stub;
- rrec.width = iplt_relocs[ri].width;
- rrec.write_vaddr = stub_vaddr + iplt_relocs[ri].offset_in_stub;
- rrec.write_file_offset = rrec.write_vaddr;
- rrec.kind = iplt_relocs[ri].kind;
- rrec.target = slot_id;
- rrec.addend = 0;
- *append_reloc_slot(img) = rrec;
- }
- }
-
- /* .iplt.pairs[i].resolver = &resolver (R_ABS64) */
- memset(&rrec, 0, sizeof(rrec));
- rrec.input_id = LINK_INPUT_NONE;
- rrec.section_id = OBJ_SEC_NONE;
- rrec.link_section_id = pairs_sec->id;
- rrec.offset = (u32)(slot_idx * 16u);
- rrec.width = 8;
- rrec.write_vaddr = pair_vaddr;
- rrec.write_file_offset = pair_vaddr;
- rrec.kind = R_ABS64;
- rrec.target = resolver_id;
- rrec.addend = 0;
- *append_reloc_slot(img) = rrec;
-
- /* .iplt.pairs[i].slot = &slot (R_ABS64) */
- memset(&rrec, 0, sizeof(rrec));
- rrec.input_id = LINK_INPUT_NONE;
- rrec.section_id = OBJ_SEC_NONE;
- rrec.link_section_id = pairs_sec->id;
- rrec.offset = (u32)(slot_idx * 16u + 8u);
- rrec.width = 8;
- rrec.write_vaddr = pair_vaddr + 8u;
- rrec.write_file_offset = pair_vaddr + 8u;
- rrec.kind = R_ABS64;
- rrec.target = slot_id;
- rrec.addend = 0;
- *append_reloc_slot(img) = rrec;
-
- /* Redirect the IFUNC symbol to the stub. Keep its name +
- * binding so cfree_jit_lookup and external relocs still find
- * it; switch kind to SK_FUNC since the resolver indirection is
- * hidden behind the stub. */
- s->kind = SK_FUNC;
- s->section_id = iplt_sec->id;
- s->value = (u64)slot_idx * (u64)arch->iplt_stub_size;
- s->vaddr = stub_vaddr;
- s->size = arch->iplt_stub_size;
-
- ++slot_idx;
- }
-
- /* .preinit_array entry: one R_ABS64 reloc filling the 8-byte
- * slot with __cfree_ifunc_init's resolved address. Preinit runs
- * strictly before .init_array so user ctors that call IFUNCs see
- * filled .igot.plt slots. */
- if (emit_init_array) {
- LinkRelocApply rrec;
- memset(&rrec, 0, sizeof(rrec));
- rrec.input_id = LINK_INPUT_NONE;
- rrec.section_id = OBJ_SEC_NONE;
- rrec.link_section_id = init_sec->id;
- rrec.offset = 0;
- rrec.width = 8;
- rrec.write_vaddr = init_vaddr;
- rrec.write_file_offset = init_vaddr;
- rrec.kind = R_ABS64;
- rrec.target = ifunc_init_sym;
- rrec.addend = 0;
- *append_reloc_slot(img) = rrec;
- }
-
- /* Pass C: propagate the redirect to every per-input undef
- * LinkSymbol that shares the IFUNC's name. resolve_undefs
- * copied the pre-redirect (resolver) fields into each undef
- * slot; without this fix-up, cross-TU references to the IFUNC
- * (R_ABS64 / GOT-page / direct call) would resolve to the
- * resolver's address, not the iplt stub. Identified by section
- * matching the synthesized .iplt section, which only the
- * canonical IFUNC defs land in (slot syms are LOCAL + nameless). */
- {
- u32 n = LinkSyms_count(&img->syms);
- for (i = 0; i < n; ++i) {
- LinkSymbol* s = LinkSyms_at(&img->syms, i);
- LinkSymId canonical;
- LinkSymbol* def;
- if (s->name == 0) continue;
- canonical = symhash_get(&img->globals, s->name);
- if (canonical == LINK_SYM_NONE || canonical == s->id) continue;
- def = LinkSyms_at(&img->syms, canonical - 1);
- if (def->section_id != iplt_sec->id) continue;
- s->section_id = def->section_id;
- s->value = def->value;
- s->vaddr = def->vaddr;
- s->kind = def->kind;
- s->size = def->size;
- s->defined = 1;
- }
- }
-}
-
-/* ---- entry symbol ---- */
-
-static void resolve_entry(Linker* l, LinkImage* img) {
- LinkSymId id;
- LinkSymbol* s;
- if (l->entry_name == 0) return;
- id = symhash_get(&img->globals, l->entry_name);
- if (id == LINK_SYM_NONE) {
- size_t namelen;
- const char* nm = pool_str(l->c->global, l->entry_name, &namelen);
- compiler_panic(l->c, no_loc(), "link: entry symbol '%.*s' not defined",
- (int)namelen, nm);
- }
- s = LinkSyms_at(&img->syms, id - 1);
- if (!s->defined) {
- size_t namelen;
- const char* nm = pool_str(l->c->global, l->entry_name, &namelen);
- compiler_panic(l->c, no_loc(), "link: entry symbol '%.*s' is undefined",
- (int)namelen, nm);
- }
- img->entry_sym = id;
-}
-
-/* ---- archive ingestion ----
- *
- * Members were parsed up-front by link_add_archive_bytes; this pass
- * decides which ones get pulled into the link. --whole-archive
- * archives include every member; demand archives include any member
- * that defines a global symbol referenced (and not yet defined) by
- * the current input set, iterated to a fixed point so a member that
- * pulls in fresh undefs can drag in further members. */
-
-static void include_archive_member(Linker* l, LinkArchiveMember* mem) {
- LinkInput* in;
- LinkInputId id;
- u32 idx;
- if (mem->included) return;
- in = LinkInputs_push(&l->inputs, &idx);
- if (!in)
- compiler_panic(l->c, no_loc(), "link: oom growing inputs (archive member)");
- id = (LinkInputId)(idx + 1u);
- in->id = id;
- in->kind = LINK_INPUT_OBJ_BYTES; /* the input owns the ObjBuilder now */
- in->obj = mem->obj;
- in->name = mem->name;
- mem->included = 1;
- mem->obj = NULL; /* ownership transferred */
-}
-
-/* Build presence sets across all currently-included inputs. The values
- * stored in the SymHash are dummies (1) — only key presence matters. */
-static void scan_presence(Linker* l, SymHash* defined, SymHash* undefs) {
- u32 ii;
- ObjSymIter* it;
- ObjSymEntry e;
- for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
- ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
- it = obj_symiter_new(ob);
- while (obj_symiter_next(it, &e)) {
- const ObjSym* s = e.sym;
- if (s->name == 0) continue;
- if (s->bind == SB_LOCAL) continue;
- if (s->kind == SK_UNDEF)
- symhash_set(undefs, s->name, 1u);
- else
- symhash_set(defined, s->name, 1u);
- }
- obj_symiter_free(it);
- }
-}
-
-/* True if any currently-included input defines at least one
- * STT_GNU_IFUNC symbol. Used to seed __cfree_ifunc_init into the
- * archive demand-load wanted set when emitting a static ET_EXEC: the
- * synthesized .init_array entry pulls the rt member which carries the
- * startup ctor that fills .igot.plt slots. */
-static int inputs_have_defined_ifunc(Linker* l) {
- u32 ii;
- ObjSymIter* it;
- ObjSymEntry e;
- for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
- ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
- it = obj_symiter_new(ob);
- while (obj_symiter_next(it, &e)) {
- const ObjSym* s = e.sym;
- if (s->kind == SK_IFUNC) {
- obj_symiter_free(it);
- return 1;
- }
- }
- obj_symiter_free(it);
- }
- return 0;
-}
-
-/* True if `mem` defines a non-undef SB_GLOBAL or SB_WEAK symbol that's
- * in `wanted` and not already in `defined`. Both GNU ld and lld pull
- * archive members on weak defs against an unresolved undef — the
- * "weak doesn't drag" rule applies to weak *references*, not weak
- * definitions. (musl's __init_tls is a weak def and must be pulled
- * to satisfy __libc_start_main's hard ref.) */
-static int member_satisfies(LinkArchiveMember* mem, const SymHash* defined,
- const SymHash* wanted) {
- ObjSymIter* it;
- ObjSymEntry e;
- int hit = 0;
- it = obj_symiter_new(mem->obj);
- while (obj_symiter_next(it, &e)) {
- const ObjSym* s = e.sym;
- if (s->name == 0) continue;
- if (s->kind == SK_UNDEF) continue;
- if (s->bind != SB_GLOBAL && s->bind != SB_WEAK) continue;
- if (symhash_get(wanted, s->name) == LINK_SYM_NONE) continue;
- if (symhash_get(defined, s->name) != LINK_SYM_NONE) continue;
- hit = 1;
- break;
- }
- obj_symiter_free(it);
- return hit;
-}
-
-void link_ingest_archives(Linker* l) {
- u32 a, m;
- if (LinkArchives_count(&l->archives) == 0) return;
-
- /* Pass 1: --whole-archive members are pulled unconditionally. */
- for (a = 0; a < LinkArchives_count(&l->archives); ++a) {
- LinkArchive* ar = LinkArchives_at(&l->archives, a);
- if (!ar->whole_archive) continue;
- for (m = 0; m < ar->nmembers; ++m)
- include_archive_member(l, &ar->members[m]);
- }
-
- /* When emitting a static ET_EXEC and any input defines an IFUNC,
- * seed __cfree_ifunc_init into the wanted set so demand-load pulls
- * libcfree_rt's ifunc_init.c. Layout_iplt later synthesizes a
- * .init_array entry referencing this symbol; the rt member's
- * implementation walks .iplt.pairs and fills each slot at startup.
- * Done once before the demand loop — the seed needs to be present
- * on every iteration of the loop's local symhash, so we stash the
- * Sym handle and inject it inside the loop body. */
- Sym want_ifunc_init = 0;
- if (l->emit_static_exe && inputs_have_defined_ifunc(l)) {
- want_ifunc_init = pool_intern_cstr(l->c->global, "__cfree_ifunc_init");
- }
-
- /* Pass 2: demand loop over the remaining archives. Pulling member A
- * may introduce undefs satisfied by member B, so iterate to a
- * fixed point. Bounded by total member count across archives. */
- for (;;) {
- SymHash defined, undefs;
- int changed = 0;
- symhash_init(&defined, l->heap);
- symhash_init(&undefs, l->heap);
- scan_presence(l, &defined, &undefs);
- if (want_ifunc_init != 0 &&
- symhash_get(&defined, want_ifunc_init) == LINK_SYM_NONE)
- symhash_set(&undefs, want_ifunc_init, 1u);
-
- for (a = 0; a < LinkArchives_count(&l->archives); ++a) {
- LinkArchive* ar = LinkArchives_at(&l->archives, a);
- if (ar->whole_archive) continue;
- for (m = 0; m < ar->nmembers; ++m) {
- LinkArchiveMember* mem = &ar->members[m];
- if (mem->included) continue;
- if (!member_satisfies(mem, &defined, &undefs)) continue;
- include_archive_member(l, mem);
- changed = 1;
- }
- }
- symhash_fini(&defined);
- symhash_fini(&undefs);
- if (!changed) break;
- }
-}
-
-/* ---- public ---- */
+/* ---- public orchestration ---- */
LinkImage* link_resolve(Linker* l) {
LinkImage* img;
Heap* h;
- /* Expand archive members into Linker.inputs before any layout
- * machinery runs — once that's done, the rest of the pipeline
- * sees a single flat input list and doesn't care about archives. */
link_ingest_archives(l);
img = link_image_alloc(l->c);
h = img->heap;
img->linker = l;
- /* Per-input map storage. */
img->ninput_maps = LinkInputs_count(&l->inputs);
img->input_maps =
LinkInputs_count(&l->inputs)
@@ -3179,42 +915,21 @@ LinkImage* link_resolve(Linker* l) {
memset(img->input_maps, 0,
sizeof(*img->input_maps) * LinkInputs_count(&l->inputs));
- resolve_symbols(l, img);
+ link_resolve_symbols(l, img);
{
GcLive g = {0};
- gc_live_alloc(&g, l, h);
- gc_compute(l, img, &g);
- layout_sections(l, img, &g);
- layout_commons(l, img);
- emit_segment_bytes(l, img);
- link_symbols_to_sections(l, img);
- emit_array_boundaries(l, img);
- emit_tls_boundaries(l, img);
- emit_encoding_section_boundaries(l, img);
- /* Linker-defined synthetic symbols that may be referenced as
- * undefs (often hidden) by sysroot startfiles / nonshared archives.
- * Pre-defining them here satisfies resolve_undefs' undef sweep so
- * it doesn't panic on hidden-undef references that no object or
- * DSO supplies. vaddr=0 is a placeholder; layout_dyn may refine
- * _DYNAMIC to the actual .dynamic vaddr later. */
- emit_boundary_sym(l, img, "__dso_handle", 0);
- emit_boundary_sym(l, img, "_DYNAMIC", 0);
- /* _GLOBAL_OFFSET_TABLE_ is referenced as a SHN_UNDEF marker by
- * any x86_64 input that uses the GOT (musl/glibc libc.a routinely
- * do). GNU ld auto-defines it at the .got base; cfree-ld doesn't
- * use the symbol for any actual reloc, so a placeholder vaddr=0
- * keeps the undef sweep happy without affecting code that
- * computes GOT addresses through their own GOTPC32 relocs. */
- emit_boundary_sym(l, img, "_GLOBAL_OFFSET_TABLE_", 0);
- /* RISC-V startfiles use `__global_pointer$` to load gp in _start;
- * the RISC-V psABI says it's defined as `.sdata + 0x800` so
- * gp-relative addressing covers [.sdata - 2KiB, .sdata + 2KiB).
- * We don't have .sdata as a distinct section, but any address in
- * the writable data region is functionally adequate when the code
- * doesn't actually use gp-relative addressing (cfree-cc doesn't
- * emit `-mrelax`, and musl's static crt only loads gp without
- * dereferencing through it). Pick the first RW segment base +
- * 0x800. Only relevant for rv64; harmless on other arches. */
+ link_gc_live_alloc(&g, l, h);
+ link_gc_compute(l, img, &g);
+ link_layout_sections(l, img, &g);
+ link_layout_commons(l, img);
+ link_emit_segment_bytes(l, img);
+ link_assign_symbol_vaddrs(l, img);
+ link_emit_array_boundaries(l, img);
+ link_emit_tls_boundaries(l, img);
+ link_emit_encoding_section_boundaries(l, img);
+ link_emit_boundary_sym(l, img, "__dso_handle", 0);
+ link_emit_boundary_sym(l, img, "_DYNAMIC", 0);
+ link_emit_boundary_sym(l, img, "_GLOBAL_OFFSET_TABLE_", 0);
if (l->c->target.arch == CFREE_ARCH_RV64) {
u32 si;
u64 gp_vaddr = 0;
@@ -3224,67 +939,28 @@ LinkImage* link_resolve(Linker* l) {
break;
}
}
- emit_boundary_sym(l, img, "__global_pointer$", gp_vaddr);
+ link_emit_boundary_sym(l, img, "__global_pointer$", gp_vaddr);
}
- resolve_undefs(l, img);
- gc_drop_dead_globals(l, img, &g);
- /* layout_iplt runs last among the symbol-shaping passes: it
- * redirects each defined IFUNC LinkSymbol from the resolver
- * to its iplt stub and (under emit_static_exe) materializes a
- * .init_array entry pointing at __cfree_ifunc_init. We then
- * re-run emit_array_boundaries so __init_array_start/end span
- * the synthetic entry. Cross-TU undefs may retain the
- * pre-redirect (resolver) vaddr — only a concern for
- * GOT-slot fills; not exercised by current tests. */
- layout_iplt(l, img);
- if (img->niplt) emit_array_boundaries(l, img);
+ link_resolve_undefs(l, img);
+ link_gc_drop_dead_globals(l, img, &g);
+ link_layout_iplt(l, img);
+ if (img->niplt) link_emit_array_boundaries(l, img);
{
LinkSymId* got_map = NULL;
LinkSymId* stub_map = NULL;
- /* Both maps are sparse arrays indexed by orig LinkSymId, sized
- * to the symbol count BEFORE either pass appends synthetic
- * entries (stub/slot/resolver_rec from layout_jit_call_stubs;
- * GOT-slot syms from layout_got). Snapshot here so the free
- * size matches the allocation. */
u32 map_size = LinkSyms_count(&img->syms) + 1u;
- /* JIT-only: synthesize per-target stubs for CALL26/JUMP26
- * against resolver-supplied or weak-undef SK_ABS targets so the
- * branch displacement stays within ±128 MiB of .text regardless
- * of where the resolver-returned host pointer lives. Runs
- * before layout_got (the stub's slot reloc is non-GOT) and
- * before emit_reloc_records (which consults stub_map). */
- layout_jit_call_stubs(l, img, map_size, &stub_map);
- /* layout_got synthesizes ELF-shaped .got slots and rewrites
- * GOT-using reloc targets to point at them. Mach-O has its own
- * __DATA_CONST,__got mechanism wired up in link_macho.c for the
- * exe path (driven by collect_imports), so skip the ELF synthesis
- * there. The JIT path has no equivalent — link_jit.c does not
- * run collect_imports — so fall through to layout_got on Mach-O
- * when emit_static_exe is off (cfree_link_jit). Without this,
- * cross-TU GOT_LOAD_PAGE21 / LD64_GOT_LO12_NC relocs would patch
- * with S = symbol value instead of S = slot address (see
- * doc/MACHO.md §3.1). */
+ link_layout_jit_stubs(l, img, map_size, &stub_map);
if (l->c->target.obj != CFREE_OBJ_MACHO || !l->emit_static_exe)
- layout_got(l, img, map_size, &got_map);
- emit_reloc_records(l, img, got_map, stub_map);
+ link_layout_got(l, img, map_size, &got_map);
+ link_emit_relocations(l, img, got_map, stub_map);
if (got_map) h->free(h, got_map, sizeof(*got_map) * map_size);
if (stub_map) h->free(h, stub_map, sizeof(*stub_map) * map_size);
}
- /* Phase 4 dynamic-link tables. Runs after every other layout
- * pass: it depends on import resolution (resolve_undefs), every
- * synthesized section already being on the image (layout_got /
- * layout_iplt), and adds its own segments at the tail. The
- * static-exe path early-outs in layout_dyn (l->emit_pie==0). */
layout_dyn(l, img);
- resolve_entry(l, img);
- gc_live_free(&g, h);
+ link_resolve_entry(l, img);
+ link_gc_live_free(&g, h);
}
- /* Hand the input ObjBuilders to the image so cfree_jit_view can
- * surface .debug_* sections after link_free runs (layout/reloc are
- * complete, so the builders are otherwise idle). Must be the last
- * step before returning — any pass that walks LinkInputs.obj
- * expecting a value would break otherwise. */
link_capture_debug_inputs(l, img);
return img;
diff --git a/src/link/link_reloc_layout.c b/src/link/link_reloc_layout.c
@@ -0,0 +1,1236 @@
+/* link_reloc_layout.c — post-section-placement passes:
+ * link_assign_symbol_vaddrs — symbol→vaddr binding (pass 3)
+ * link_emit_array_boundaries — __init_array_start/end etc.
+ * link_emit_tls_boundaries — __tdata_start/end, __tbss_size
+ * link_emit_encoding_section_boundaries — __start_<X>/__stop_<X>
+ * link_layout_jit_stubs — AArch64 JIT CALL26/JUMP26 stubs
+ * link_layout_got — static-PIC .got
+ * link_layout_iplt — STT_GNU_IFUNC trampoline (.iplt etc.)
+ * link_emit_relocations — emit LinkRelocApply records (pass 4)
+ * link_resolve_entry — entry symbol lookup
+ */
+
+#include <cfree.h>
+#include <string.h>
+
+#include "core/buf.h"
+#include "core/bytes.h"
+#include "core/heap.h"
+#include "core/pool.h"
+#include "core/util.h"
+#include "core/vec.h"
+#include "link/link.h"
+#include "link/link_arch.h"
+#include "link/link_internal.h"
+
+static SrcLoc no_loc(void) {
+ SrcLoc l = {0, 0, 0};
+ return l;
+}
+
+/* ---- pass 3: assign symbol vaddrs ---- */
+
+void link_assign_symbol_vaddrs(Linker* l, LinkImage* img) {
+ u32 ii;
+ for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
+ LinkInput* in = LinkInputs_at(&l->inputs, ii);
+ ObjBuilder* ob = in->obj;
+ InputMap* m = &img->input_maps[ii];
+ ObjSymIter* it;
+ ObjSymEntry e;
+ if (in->kind == LINK_INPUT_DSO_BYTES) continue;
+ it = obj_symiter_new(ob);
+ while (obj_symiter_next(it, &e)) {
+ LinkSymId lsid = m->sym[e.id];
+ LinkSymbol* ls;
+ if (lsid == LINK_SYM_NONE) continue;
+ ls = LinkSyms_at(&img->syms, lsid - 1);
+ if (!ls->defined) continue;
+ if (ls->kind == SK_ABS && ls->vaddr != 0) continue;
+ if (e.sym->section_id == OBJ_SEC_NONE) continue;
+ if (ls->input_id != LinkInputs_at(&l->inputs, ii)->id) continue;
+ ls->section_id = m->section[e.sym->section_id];
+ }
+ obj_symiter_free(it);
+ }
+ {
+ u32 i;
+ for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
+ LinkSymbol* s = LinkSyms_at(&img->syms, i);
+ if (s->kind == SK_ABS && s->vaddr != 0) continue;
+ if (!s->defined) continue;
+ if (s->section_id == LINK_SEC_NONE) continue;
+ s->vaddr = img->sections[s->section_id - 1].vaddr + s->value;
+ }
+ }
+ {
+ u32 i;
+ for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
+ LinkSymbol* s = LinkSyms_at(&img->syms, i);
+ if (s->defined) continue;
+ if (s->name == 0) continue;
+ {
+ LinkSymId hit = symhash_get(&img->globals, s->name);
+ if (hit != LINK_SYM_NONE && hit != s->id) {
+ LinkSymbol* def = LinkSyms_at(&img->syms, hit - 1);
+ if (def->defined) {
+ s->section_id = def->section_id;
+ s->value = def->value;
+ s->vaddr = def->vaddr;
+ s->kind = def->kind;
+ s->defined = 1;
+ }
+ }
+ }
+ }
+ }
+}
+
+/* ---- pass 3b: boundary symbols ---- */
+
+void link_emit_array_boundaries(Linker* l, LinkImage* img) {
+ u32 ii, j;
+ u64 init_start = (u64)-1, init_end = 0;
+ u64 fini_start = (u64)-1, fini_end = 0;
+ u64 preinit_start = (u64)-1, preinit_end = 0;
+
+ for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
+ ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
+ InputMap* m = &img->input_maps[ii];
+ for (j = 1; j < obj_section_count(ob); ++j) {
+ const Section* s = obj_section_get(ob, j);
+ LinkSectionId ls_id;
+ const LinkSection* ls;
+ u64 start, end;
+ if (!s) continue;
+ if (s->sem != SSEM_INIT_ARRAY && s->sem != SSEM_FINI_ARRAY &&
+ s->sem != SSEM_PREINIT_ARRAY)
+ continue;
+ ls_id = m->section[j];
+ if (ls_id == LINK_SEC_NONE) continue;
+ ls = &img->sections[ls_id - 1];
+ start = ls->vaddr;
+ end = ls->vaddr + ls->size;
+ if (s->sem == SSEM_INIT_ARRAY) {
+ if (start < init_start) init_start = start;
+ if (end > init_end) init_end = end;
+ } else if (s->sem == SSEM_FINI_ARRAY) {
+ if (start < fini_start) fini_start = start;
+ if (end > fini_end) fini_end = end;
+ } else {
+ if (start < preinit_start) preinit_start = start;
+ if (end > preinit_end) preinit_end = end;
+ }
+ }
+ }
+
+ {
+ u32 i;
+ for (i = 0; i < img->nsections; ++i) {
+ const LinkSection* ls = &img->sections[i];
+ u64 start, end;
+ if (ls->input_id != LINK_INPUT_NONE) continue;
+ if (ls->sem != SSEM_INIT_ARRAY && ls->sem != SSEM_FINI_ARRAY &&
+ ls->sem != SSEM_PREINIT_ARRAY)
+ continue;
+ start = ls->vaddr;
+ end = ls->vaddr + ls->size;
+ if (ls->sem == SSEM_INIT_ARRAY) {
+ if (start < init_start) init_start = start;
+ if (end > init_end) init_end = end;
+ } else if (ls->sem == SSEM_FINI_ARRAY) {
+ if (start < fini_start) fini_start = start;
+ if (end > fini_end) fini_end = end;
+ } else {
+ if (start < preinit_start) preinit_start = start;
+ if (end > preinit_end) preinit_end = end;
+ }
+ }
+ }
+
+ if (init_start == (u64)-1) {
+ init_start = 0;
+ init_end = 0;
+ }
+ if (fini_start == (u64)-1) {
+ fini_start = 0;
+ fini_end = 0;
+ }
+ if (preinit_start == (u64)-1) {
+ preinit_start = 0;
+ preinit_end = 0;
+ }
+
+ link_emit_boundary_sym(l, img, "__init_array_start", init_start);
+ link_emit_boundary_sym(l, img, "__init_array_end", init_end);
+ link_emit_boundary_sym(l, img, "__fini_array_start", fini_start);
+ link_emit_boundary_sym(l, img, "__fini_array_end", fini_end);
+ link_emit_boundary_sym(l, img, "__preinit_array_start", preinit_start);
+ link_emit_boundary_sym(l, img, "__preinit_array_end", preinit_end);
+}
+
+void link_emit_tls_boundaries(Linker* l, LinkImage* img) {
+ u64 tdata_start = img->tls_vaddr;
+ u64 tdata_end = img->tls_vaddr + img->tls_filesz;
+ u64 tbss_size = img->tls_memsz - img->tls_filesz;
+ Sym sym_size = pool_intern_cstr(l->c->global, "__tbss_size");
+ LinkSymId id;
+ LinkSymbol rec;
+
+ link_emit_boundary_sym(l, img, "__tdata_start", tdata_start);
+ link_emit_boundary_sym(l, img, "__tdata_end", tdata_end);
+
+ id = symhash_get(&img->globals, sym_size);
+ memset(&rec, 0, sizeof(rec));
+ rec.name = sym_size;
+ rec.kind = SK_ABS;
+ rec.bind = SB_GLOBAL;
+ rec.defined = 1;
+ rec.vaddr = tbss_size;
+ if (id != LINK_SYM_NONE) {
+ *LinkSyms_at(&img->syms, id - 1) = rec;
+ LinkSyms_at(&img->syms, id - 1)->id = id;
+ } else {
+ LinkSymId fresh = link_append_symbol(img, &rec);
+ symhash_insert(&img->globals, sym_size, fresh, &id);
+ }
+}
+
+void link_emit_encoding_section_boundaries(Linker* l, LinkImage* img) {
+ u32 i, ii, j;
+ for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
+ LinkSymbol* sym = LinkSyms_at(&img->syms, i);
+ const char* nm;
+ size_t namelen, off, ilen;
+ int is_start;
+ Sym secname;
+ u64 lo = (u64)-1;
+ u64 hi = 0;
+ int found = 0;
+ if (sym->defined) continue;
+ if (sym->name == 0) continue;
+ nm = pool_str(l->c->global, sym->name, &namelen);
+ if (!link_gc_split_start_stop(nm, namelen, &off, &ilen, &is_start)) continue;
+ secname = pool_intern(l->c->global, nm + off, ilen);
+ for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
+ ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
+ InputMap* m = &img->input_maps[ii];
+ for (j = 1; j < obj_section_count(ob); ++j) {
+ const Section* s = obj_section_get(ob, j);
+ LinkSectionId ls_id;
+ const LinkSection* ls;
+ u64 start, end;
+ if (!s || s->name != secname) continue;
+ ls_id = m->section[j];
+ if (ls_id == LINK_SEC_NONE) continue;
+ ls = &img->sections[ls_id - 1];
+ start = ls->vaddr;
+ end = ls->vaddr + ls->size;
+ if (start < lo) lo = start;
+ if (end > hi) hi = end;
+ found = 1;
+ }
+ }
+ if (!found) continue;
+ sym->kind = SK_OBJ;
+ sym->bind = SB_GLOBAL;
+ sym->defined = 1;
+ sym->vaddr = is_start ? lo : hi;
+ }
+}
+
+/* ---- pass 4: reloc records ---- */
+
+static u8 reloc_width(RelocKind k) {
+ switch (k) {
+ case R_ABS32:
+ case R_REL32:
+ case R_PC32:
+ case R_GOT32:
+ case R_PLT32:
+ case R_X64_PLT32:
+ case R_X64_32S:
+ case R_X64_TPOFF32:
+ case R_X64_GOTPCREL:
+ case R_X64_GOTPCRELX:
+ case R_X64_REX_GOTPCRELX:
+ case R_X64_GOTPC32:
+ return 4;
+ case R_ABS64:
+ case R_REL64:
+ case R_PC64:
+ case R_X64_TPOFF64:
+ return 8;
+ case R_AARCH64_ABS16:
+ case R_AARCH64_PREL16:
+ return 2;
+ case R_AARCH64_JUMP26:
+ case R_AARCH64_CALL26:
+ case R_AARCH64_CONDBR19:
+ case R_AARCH64_TSTBR14:
+ case R_AARCH64_LD_PREL_LO19:
+ case R_AARCH64_ADR_PREL_LO21:
+ case R_AARCH64_ADR_PREL_PG_HI21:
+ case R_AARCH64_ADR_PREL_PG_HI21_NC:
+ case R_AARCH64_ADD_ABS_LO12_NC:
+ case R_AARCH64_LDST8_ABS_LO12_NC:
+ case R_AARCH64_LDST16_ABS_LO12_NC:
+ case R_AARCH64_LDST32_ABS_LO12_NC:
+ case R_AARCH64_LDST64_ABS_LO12_NC:
+ case R_AARCH64_LDST128_ABS_LO12_NC:
+ case R_AARCH64_ADR_GOT_PAGE:
+ case R_AARCH64_LD64_GOT_LO12_NC:
+ case R_AARCH64_TLSLE_ADD_TPREL_HI12:
+ case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
+ case R_AARCH64_TLVP_LOAD_PAGE21:
+ case R_AARCH64_TLVP_LOAD_PAGEOFF12:
+ return 4;
+ case R_RV_HI20:
+ case R_RV_LO12_I:
+ case R_RV_LO12_S:
+ case R_RV_BRANCH:
+ case R_RV_JAL:
+ case R_RV_PCREL_HI20:
+ case R_RV_PCREL_LO12_I:
+ case R_RV_PCREL_LO12_S:
+ case R_RV_GOT_HI20:
+ case R_RV_TPREL_HI20:
+ case R_RV_TPREL_LO12_I:
+ case R_RV_TPREL_LO12_S:
+ return 4;
+ case R_RV_CALL:
+ return 8;
+ case R_RV_RVC_BRANCH:
+ case R_RV_RVC_JUMP:
+ return 2;
+ case R_RV_RELAX:
+ case R_RV_TPREL_ADD:
+ return 4;
+ case R_RV_ADD8:
+ case R_RV_SUB8:
+ case R_RV_SUB6:
+ case R_RV_SET6:
+ case R_RV_SET8:
+ return 1;
+ case R_RV_ADD16:
+ case R_RV_SUB16:
+ case R_RV_SET16:
+ return 2;
+ case R_RV_ADD32:
+ case R_RV_SUB32:
+ case R_RV_SET32:
+ return 4;
+ case R_RV_ADD64:
+ case R_RV_SUB64:
+ return 8;
+ default:
+ return 0;
+ }
+}
+
+static int reloc_uses_got(u16 kind) {
+ switch (kind) {
+ case R_AARCH64_ADR_GOT_PAGE:
+ case R_AARCH64_LD64_GOT_LO12_NC:
+ case R_X64_GOTPCREL:
+ case R_X64_GOTPCRELX:
+ case R_X64_REX_GOTPCRELX:
+ case R_RV_GOT_HI20:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/* ---- iplt alloc helpers (used by layout_jit_call_stubs too) ---- */
+
+u32 link_iplt_alloc_segments(LinkImage* img, u32 nseg) {
+ Heap* h = img->heap;
+ u32 base = img->nsegments;
+ u32 new_nseg = base + nseg;
+ LinkSegment* nsegs = (LinkSegment*)h->realloc(
+ h, img->segments, sizeof(*img->segments) * img->nsegments,
+ sizeof(*img->segments) * new_nseg, _Alignof(LinkSegment));
+ u8** nsbufs = (u8**)h->realloc(
+ h, img->segment_bytes, sizeof(*img->segment_bytes) * img->nsegments,
+ sizeof(*img->segment_bytes) * new_nseg, _Alignof(u8*));
+ size_t* nscaps = (size_t*)h->realloc(
+ h, img->segment_bytes_cap,
+ sizeof(*img->segment_bytes_cap) * img->nsegments,
+ sizeof(*img->segment_bytes_cap) * new_nseg, _Alignof(size_t));
+ if (!nsegs || !nsbufs || !nscaps)
+ compiler_panic(img->c, no_loc(), "link: oom on iplt segments");
+ img->segments = nsegs;
+ img->segment_bytes = nsbufs;
+ img->segment_bytes_cap = nscaps;
+ return base;
+}
+
+u32 link_iplt_alloc_sections(LinkImage* img, u32 nsec) {
+ Heap* h = img->heap;
+ u32 base = img->nsections;
+ u32 new_nsec = base + nsec;
+ LinkSection* nsections = (LinkSection*)h->realloc(
+ h, img->sections, sizeof(*img->sections) * img->nsections,
+ sizeof(*img->sections) * new_nsec, _Alignof(LinkSection));
+ if (!nsections)
+ compiler_panic(img->c, no_loc(), "link: oom on iplt sections");
+ img->sections = nsections;
+ return base;
+}
+
+/* ---- pass: JIT call stubs ---- */
+
+void link_layout_jit_stubs(Linker* l, LinkImage* img, u32 map_size,
+ LinkSymId** stub_map_out) {
+ Heap* h = img->heap;
+ const LinkArchDesc* arch;
+ LinkSymId* stub_map;
+ LinkSymId* targets = NULL;
+ u32 ntarget = 0, tcap = 0;
+ u32 ii, k, i;
+ u64 page;
+ u64 base_vaddr = 0;
+ u64 stubs_vaddr, slots_vaddr;
+ u64 stubs_size, slots_size;
+ u32 stubs_seg_idx, slots_seg_idx;
+ u32 seg_base, sec_base;
+ LinkSegment* stubs_seg;
+ LinkSegment* slots_seg;
+ LinkSection* stubs_sec;
+ LinkSection* slots_sec;
+ u8* stubs_bytes;
+
+ *stub_map_out = NULL;
+ if (l->emit_static_exe) return;
+ if (l->c->target.arch != CFREE_ARCH_ARM_64) return;
+
+ arch = link_arch_desc_for(l->c);
+ if (!arch) return;
+
+ stub_map = (LinkSymId*)h->alloc(h, sizeof(*stub_map) * map_size,
+ _Alignof(LinkSymId));
+ if (!stub_map) compiler_panic(img->c, no_loc(), "link: oom on stub map");
+ memset(stub_map, 0, sizeof(*stub_map) * map_size);
+
+ for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
+ ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
+ InputMap* m = &img->input_maps[ii];
+ u32 total = obj_reloc_total(ob);
+ if (!total) continue;
+ for (k = 0; k < total; ++k) {
+ const Reloc* r = obj_reloc_at(ob, k);
+ const Section* s = obj_section_get(ob, r->section_id);
+ LinkSymId target;
+ const LinkSymbol* tgt;
+ if (!s || !link_section_kept(s)) continue;
+ if (m->section[r->section_id] == LINK_SEC_NONE) continue;
+ if (r->kind != R_AARCH64_CALL26 && r->kind != R_AARCH64_JUMP26) continue;
+ if (r->sym == OBJ_SYM_NONE || r->sym >= m->nsym) continue;
+ target = m->sym[r->sym];
+ if (target == LINK_SYM_NONE) continue;
+ tgt = LinkSyms_at(&img->syms, target - 1);
+ if (!tgt || tgt->kind != SK_ABS) continue;
+ if (stub_map[target] != LINK_SYM_NONE) continue;
+ if (VEC_GROW(h, targets, tcap, ntarget + 1u))
+ compiler_panic(img->c, no_loc(), "link: oom on stub target list");
+ targets[ntarget] = target;
+ stub_map[target] = (LinkSymId)(ntarget + 1u);
+ ntarget++;
+ }
+ }
+
+ if (ntarget == 0) {
+ if (targets) h->free(h, targets, sizeof(*targets) * tcap);
+ h->free(h, stub_map, sizeof(*stub_map) * map_size);
+ return;
+ }
+ for (i = 0; i < ntarget; ++i) stub_map[targets[i]] = LINK_SYM_NONE;
+
+ page = link_layout_page_size(l);
+ for (i = 0; i < img->nsegments; ++i) {
+ u64 end = img->segments[i].vaddr + img->segments[i].mem_size;
+ if (end > base_vaddr) base_vaddr = end;
+ }
+ base_vaddr = ALIGN_UP(base_vaddr, (u64)page);
+ stubs_vaddr = base_vaddr;
+ stubs_size = (u64)ntarget * (u64)arch->iplt_stub_size;
+ slots_vaddr = ALIGN_UP(stubs_vaddr + stubs_size, (u64)page);
+ slots_size = (u64)ntarget * 8u;
+
+ seg_base = link_iplt_alloc_segments(img, 2u);
+ stubs_seg_idx = seg_base + 0u;
+ slots_seg_idx = seg_base + 1u;
+
+ stubs_seg = &img->segments[stubs_seg_idx];
+ memset(stubs_seg, 0, sizeof(*stubs_seg));
+ stubs_seg->id = (LinkSegmentId)(stubs_seg_idx + 1u);
+ stubs_seg->flags = SF_ALLOC | SF_EXEC;
+ stubs_seg->file_offset = stubs_vaddr;
+ stubs_seg->vaddr = stubs_vaddr;
+ stubs_seg->file_size = stubs_size;
+ stubs_seg->mem_size = stubs_size;
+ stubs_seg->align = (u32)page;
+ stubs_seg->nsections = 1;
+ img->segment_bytes[stubs_seg_idx] = (u8*)h->alloc(h, (size_t)stubs_size, 16);
+ img->segment_bytes_cap[stubs_seg_idx] = (size_t)stubs_size;
+ if (!img->segment_bytes[stubs_seg_idx])
+ compiler_panic(img->c, no_loc(), "link: oom on jit stubs bytes");
+ memset(img->segment_bytes[stubs_seg_idx], 0, (size_t)stubs_size);
+
+ slots_seg = &img->segments[slots_seg_idx];
+ memset(slots_seg, 0, sizeof(*slots_seg));
+ slots_seg->id = (LinkSegmentId)(slots_seg_idx + 1u);
+ slots_seg->flags = SF_ALLOC | SF_WRITE;
+ slots_seg->file_offset = slots_vaddr;
+ slots_seg->vaddr = slots_vaddr;
+ slots_seg->file_size = slots_size;
+ slots_seg->mem_size = slots_size;
+ slots_seg->align = (u32)page;
+ slots_seg->nsections = 1;
+ img->segment_bytes[slots_seg_idx] = (u8*)h->alloc(h, (size_t)slots_size, 16);
+ img->segment_bytes_cap[slots_seg_idx] = (size_t)slots_size;
+ if (!img->segment_bytes[slots_seg_idx])
+ compiler_panic(img->c, no_loc(), "link: oom on jit stub slots bytes");
+ memset(img->segment_bytes[slots_seg_idx], 0, (size_t)slots_size);
+ img->nsegments += 2u;
+
+ sec_base = link_iplt_alloc_sections(img, 2u);
+ stubs_sec = &img->sections[sec_base + 0u];
+ memset(stubs_sec, 0, sizeof(*stubs_sec));
+ stubs_sec->id = (LinkSectionId)(sec_base + 0u + 1u);
+ stubs_sec->input_id = LINK_INPUT_NONE;
+ stubs_sec->obj_section_id = OBJ_SEC_NONE;
+ stubs_sec->segment_id = stubs_seg->id;
+ stubs_sec->input_offset = 0;
+ stubs_sec->file_offset = stubs_vaddr;
+ stubs_sec->vaddr = stubs_vaddr;
+ stubs_sec->size = stubs_size;
+ stubs_sec->flags = SF_ALLOC | SF_EXEC;
+ stubs_sec->align = 4;
+ stubs_sec->name = pool_intern_cstr(l->c->global, ".cfree_jit_call_stubs");
+ stubs_sec->sem = SSEM_PROGBITS;
+
+ slots_sec = &img->sections[sec_base + 1u];
+ memset(slots_sec, 0, sizeof(*slots_sec));
+ slots_sec->id = (LinkSectionId)(sec_base + 1u + 1u);
+ slots_sec->input_id = LINK_INPUT_NONE;
+ slots_sec->obj_section_id = OBJ_SEC_NONE;
+ slots_sec->segment_id = slots_seg->id;
+ slots_sec->input_offset = 0;
+ slots_sec->file_offset = slots_vaddr;
+ slots_sec->vaddr = slots_vaddr;
+ slots_sec->size = slots_size;
+ slots_sec->flags = SF_ALLOC | SF_WRITE;
+ slots_sec->align = 8;
+ slots_sec->name = pool_intern_cstr(l->c->global, ".cfree_jit_call_slots");
+ slots_sec->sem = SSEM_PROGBITS;
+ img->nsections += 2u;
+
+ stubs_bytes = img->segment_bytes[stubs_seg_idx];
+ for (i = 0; i < ntarget; ++i) {
+ LinkSymId orig = targets[i];
+ LinkSymbol* orig_sym = LinkSyms_at(&img->syms, orig - 1);
+ u64 stub_vaddr = stubs_vaddr + (u64)i * (u64)arch->iplt_stub_size;
+ u64 slot_vaddr = slots_vaddr + (u64)i * 8u;
+ LinkSymbol slot_rec, resolver_rec, stub_rec;
+ LinkSymId slot_id, resolver_id, stub_id;
+ LinkArchIPltReloc stub_relocs[2];
+ u32 nstub_relocs;
+ LinkRelocApply rrec;
+ u8* stub_dst = stubs_bytes + (size_t)i * (size_t)arch->iplt_stub_size;
+ u32 ri;
+
+ nstub_relocs =
+ arch->emit_iplt_stub(stub_dst, stub_vaddr, slot_vaddr, stub_relocs);
+
+ memset(&slot_rec, 0, sizeof(slot_rec));
+ slot_rec.kind = SK_OBJ;
+ slot_rec.bind = SB_LOCAL;
+ slot_rec.defined = 1;
+ slot_rec.section_id = slots_sec->id;
+ slot_rec.vaddr = slot_vaddr;
+ slot_rec.size = 8;
+ slot_id = link_append_symbol(img, &slot_rec);
+
+ memset(&resolver_rec, 0, sizeof(resolver_rec));
+ resolver_rec.kind = SK_ABS;
+ resolver_rec.bind = SB_LOCAL;
+ resolver_rec.defined = 1;
+ resolver_rec.vaddr = orig_sym->vaddr;
+ resolver_id = link_append_symbol(img, &resolver_rec);
+
+ memset(&stub_rec, 0, sizeof(stub_rec));
+ stub_rec.kind = SK_FUNC;
+ stub_rec.bind = SB_LOCAL;
+ stub_rec.defined = 1;
+ stub_rec.section_id = stubs_sec->id;
+ stub_rec.vaddr = stub_vaddr;
+ stub_rec.size = arch->iplt_stub_size;
+ stub_id = link_append_symbol(img, &stub_rec);
+ stub_map[orig] = stub_id;
+
+ for (ri = 0; ri < nstub_relocs; ++ri) {
+ memset(&rrec, 0, sizeof(rrec));
+ rrec.input_id = LINK_INPUT_NONE;
+ rrec.section_id = OBJ_SEC_NONE;
+ rrec.link_section_id = stubs_sec->id;
+ rrec.offset = (u32)(i * arch->iplt_stub_size) +
+ stub_relocs[ri].offset_in_stub;
+ rrec.width = stub_relocs[ri].width;
+ rrec.write_vaddr = stub_vaddr + stub_relocs[ri].offset_in_stub;
+ rrec.write_file_offset = rrec.write_vaddr;
+ rrec.kind = stub_relocs[ri].kind;
+ rrec.target = slot_id;
+ rrec.addend = 0;
+ *link_append_reloc_slot(img) = rrec;
+ }
+
+ memset(&rrec, 0, sizeof(rrec));
+ rrec.input_id = LINK_INPUT_NONE;
+ rrec.section_id = OBJ_SEC_NONE;
+ rrec.link_section_id = slots_sec->id;
+ rrec.offset = (u32)(i * 8u);
+ rrec.width = 8;
+ rrec.write_vaddr = slot_vaddr;
+ rrec.write_file_offset = slot_vaddr;
+ rrec.kind = R_ABS64;
+ rrec.target = resolver_id;
+ rrec.addend = 0;
+ *link_append_reloc_slot(img) = rrec;
+ }
+
+ if (targets) h->free(h, targets, sizeof(*targets) * tcap);
+ *stub_map_out = stub_map;
+}
+
+/* ---- pass 3c: GOT layout ---- */
+
+void link_layout_got(Linker* l, LinkImage* img, u32 map_size,
+ LinkSymId** got_map_out) {
+ Heap* h = img->heap;
+ LinkSymId* got_map;
+ LinkSymId* slot_targets = NULL;
+ u32 slot_cap = 0;
+ u32 nslot = 0;
+ u32 ii, j, k;
+ u64 page;
+ u64 base_vaddr = 0;
+ u64 got_size;
+ LinkSegment* gotseg;
+ LinkSection* gotsec;
+ u32 gotseg_idx;
+ u32 si;
+
+ *got_map_out = NULL;
+
+ got_map = (LinkSymId*)h->alloc(h, sizeof(*got_map) * map_size,
+ _Alignof(LinkSymId));
+ if (!got_map) compiler_panic(img->c, no_loc(), "link: oom on got map");
+ memset(got_map, 0, sizeof(*got_map) * map_size);
+
+ for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
+ ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
+ InputMap* m = &img->input_maps[ii];
+ u32 total = obj_reloc_total(ob);
+ if (!total) continue;
+ for (k = 0; k < total; ++k) {
+ const Reloc* r = obj_reloc_at(ob, k);
+ const Section* s = obj_section_get(ob, r->section_id);
+ LinkSymId target;
+ if (!s || !link_section_kept(s)) continue;
+ if (m->section[r->section_id] == LINK_SEC_NONE) continue;
+ if (!reloc_uses_got(r->kind)) continue;
+ if (r->sym == OBJ_SYM_NONE || r->sym >= m->nsym) continue;
+ target = m->sym[r->sym];
+ if (target == LINK_SYM_NONE) continue;
+ if (got_map[target] != LINK_SYM_NONE) continue;
+ if (VEC_GROW(h, slot_targets, slot_cap, nslot + 1u))
+ compiler_panic(img->c, no_loc(), "link: oom on got slot list");
+ slot_targets[nslot] = target;
+ got_map[target] = (LinkSymId)(nslot + 1u);
+ nslot++;
+ }
+ }
+
+ if (nslot == 0) {
+ if (slot_targets)
+ h->free(h, slot_targets, sizeof(*slot_targets) * slot_cap);
+ h->free(h, got_map, sizeof(*got_map) * map_size);
+ return;
+ }
+
+ for (si = 0; si < nslot; ++si) got_map[slot_targets[si]] = LINK_SYM_NONE;
+
+ page = link_layout_page_size(l);
+ for (j = 0; j < img->nsegments; ++j) {
+ u64 end = img->segments[j].vaddr + img->segments[j].mem_size;
+ if (end > base_vaddr) base_vaddr = end;
+ }
+ base_vaddr = ALIGN_UP(base_vaddr, (u64)(page));
+ got_size = (u64)nslot * 8u;
+
+ {
+ u32 new_nseg = img->nsegments + 1u;
+ LinkSegment* nsegs = (LinkSegment*)h->realloc(
+ h, img->segments, sizeof(*img->segments) * img->nsegments,
+ sizeof(*img->segments) * new_nseg, _Alignof(LinkSegment));
+ u8** nsbufs = (u8**)h->realloc(
+ h, img->segment_bytes, sizeof(*img->segment_bytes) * img->nsegments,
+ sizeof(*img->segment_bytes) * new_nseg, _Alignof(u8*));
+ size_t* nscaps = (size_t*)h->realloc(
+ h, img->segment_bytes_cap,
+ sizeof(*img->segment_bytes_cap) * img->nsegments,
+ sizeof(*img->segment_bytes_cap) * new_nseg, _Alignof(size_t));
+ if (!nsegs || !nsbufs || !nscaps)
+ compiler_panic(img->c, no_loc(), "link: oom on got segment");
+ img->segments = nsegs;
+ img->segment_bytes = nsbufs;
+ img->segment_bytes_cap = nscaps;
+ }
+
+ gotseg_idx = img->nsegments;
+ gotseg = &img->segments[gotseg_idx];
+ memset(gotseg, 0, sizeof(*gotseg));
+ gotseg->id = (LinkSegmentId)(gotseg_idx + 1u);
+ gotseg->flags = SF_ALLOC | SF_WRITE;
+ gotseg->file_offset = base_vaddr;
+ gotseg->vaddr = base_vaddr;
+ gotseg->file_size = got_size;
+ gotseg->mem_size = got_size;
+ gotseg->align = (u32)page;
+ gotseg->nsections = 1;
+
+ img->segment_bytes[gotseg_idx] = (u8*)h->alloc(h, (size_t)got_size, 16);
+ img->segment_bytes_cap[gotseg_idx] = (size_t)got_size;
+ if (!img->segment_bytes[gotseg_idx])
+ compiler_panic(img->c, no_loc(), "link: oom on got bytes");
+ memset(img->segment_bytes[gotseg_idx], 0, (size_t)got_size);
+ img->nsegments++;
+
+ {
+ u32 new_nsec = img->nsections + 1u;
+ LinkSection* nsections = (LinkSection*)h->realloc(
+ h, img->sections, sizeof(*img->sections) * img->nsections,
+ sizeof(*img->sections) * new_nsec, _Alignof(LinkSection));
+ if (!nsections)
+ compiler_panic(img->c, no_loc(), "link: oom on got section");
+ img->sections = nsections;
+ }
+ gotsec = &img->sections[img->nsections];
+ memset(gotsec, 0, sizeof(*gotsec));
+ gotsec->id = (LinkSectionId)(img->nsections + 1u);
+ gotsec->input_id = LINK_INPUT_NONE;
+ gotsec->obj_section_id = OBJ_SEC_NONE;
+ gotsec->segment_id = gotseg->id;
+ gotsec->input_offset = 0;
+ gotsec->file_offset = base_vaddr;
+ gotsec->vaddr = base_vaddr;
+ gotsec->size = got_size;
+ gotsec->flags = SF_ALLOC | SF_WRITE;
+ gotsec->align = 8;
+ gotsec->name = pool_intern_cstr(img->c->global, ".got");
+ gotsec->sem = SSEM_PROGBITS;
+ img->nsections++;
+
+ for (si = 0; si < nslot; ++si) {
+ LinkSymId orig = slot_targets[si];
+ u64 slot_vaddr = base_vaddr + (u64)si * 8u;
+ LinkSymbol sym_rec;
+ LinkRelocApply rrec;
+ LinkSymId slot_id;
+
+ memset(&sym_rec, 0, sizeof(sym_rec));
+ sym_rec.name = 0;
+ sym_rec.kind = SK_OBJ;
+ sym_rec.bind = SB_LOCAL;
+ sym_rec.defined = 1;
+ sym_rec.section_id = gotsec->id;
+ sym_rec.vaddr = slot_vaddr;
+ sym_rec.size = 8;
+ slot_id = link_append_symbol(img, &sym_rec);
+ got_map[orig] = slot_id;
+
+ memset(&rrec, 0, sizeof(rrec));
+ rrec.input_id = LINK_INPUT_NONE;
+ rrec.section_id = OBJ_SEC_NONE;
+ rrec.link_section_id = gotsec->id;
+ rrec.offset = (u32)(si * 8u);
+ rrec.width = 8;
+ rrec.write_vaddr = slot_vaddr;
+ rrec.write_file_offset = base_vaddr + (u64)si * 8u;
+ rrec.kind = R_ABS64;
+ rrec.target = orig;
+ rrec.addend = 0;
+ *link_append_reloc_slot(img) = rrec;
+ }
+
+ if (slot_targets) h->free(h, slot_targets, sizeof(*slot_targets) * slot_cap);
+
+ *got_map_out = got_map;
+}
+
+/* ---- pass 3d: STT_GNU_IFUNC trampoline ---- */
+
+void link_layout_iplt(Linker* l, LinkImage* img) {
+ Heap* h = img->heap;
+ u32 i;
+ u32 nifunc = 0;
+ u64 page;
+ u64 base_vaddr = 0;
+ u64 iplt_vaddr, igot_vaddr, pairs_vaddr;
+ u64 iplt_size, igot_size, pairs_size;
+ u64 init_vaddr = 0, init_size = 0;
+ u32 iplt_seg_idx, igot_seg_idx, pairs_seg_idx;
+ u32 init_seg_idx = 0;
+ u32 seg_base, sec_base;
+ LinkSegment* iplt_seg;
+ LinkSegment* igot_seg;
+ LinkSegment* pairs_seg;
+ LinkSegment* init_seg = NULL;
+ LinkSection* iplt_sec;
+ LinkSection* igot_sec;
+ LinkSection* pairs_sec;
+ LinkSection* init_sec = NULL;
+ u8* iplt_bytes;
+ u32 slot_idx;
+ int emit_init_array = l->emit_static_exe;
+ LinkSymId ifunc_init_sym = LINK_SYM_NONE;
+ Sym ifunc_init_name = 0;
+ Sym pairs_section_name;
+ Sym init_section_name;
+ const LinkArchDesc* arch = link_arch_desc_for(l->c);
+ if (!arch)
+ compiler_panic(img->c, no_loc(),
+ "link: layout_iplt: no arch descriptor for arch %u",
+ (u32)l->c->target.arch);
+
+ for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
+ const LinkSymbol* s = LinkSyms_at(&img->syms, i);
+ if (s->kind != SK_IFUNC || !s->defined) continue;
+ if (s->name != 0) {
+ LinkSymId canonical = symhash_get(&img->globals, s->name);
+ if (canonical != LINK_SYM_NONE && canonical != s->id) continue;
+ }
+ ++nifunc;
+ }
+ if (nifunc == 0) return;
+
+ page = link_layout_page_size(l);
+
+ for (i = 0; i < img->nsegments; ++i) {
+ u64 end = img->segments[i].vaddr + img->segments[i].mem_size;
+ if (end > base_vaddr) base_vaddr = end;
+ }
+
+ base_vaddr = ALIGN_UP(base_vaddr, (u64)(page));
+ iplt_vaddr = base_vaddr;
+ iplt_size = (u64)nifunc * (u64)arch->iplt_stub_size;
+ igot_vaddr = ALIGN_UP(iplt_vaddr + iplt_size, (u64)(page));
+ igot_size = (u64)nifunc * 8u;
+ pairs_vaddr = ALIGN_UP(igot_vaddr + igot_size, (u64)(page));
+ pairs_size = (u64)nifunc * 16u;
+
+ if (emit_init_array) {
+ ifunc_init_name = pool_intern_cstr(l->c->global, "__cfree_ifunc_init");
+ ifunc_init_sym = symhash_get(&img->globals, ifunc_init_name);
+ if (ifunc_init_sym == LINK_SYM_NONE ||
+ !LinkSyms_at(&img->syms, ifunc_init_sym - 1)->defined) {
+ compiler_panic(img->c, no_loc(),
+ "link: STT_GNU_IFUNC requires '__cfree_ifunc_init' "
+ "to be defined (link in libcfree_rt.a or provide "
+ "your own implementation)");
+ }
+ init_vaddr = ALIGN_UP(pairs_vaddr + pairs_size, (u64)(page));
+ init_size = 8u;
+ }
+
+ {
+ u32 nseg = emit_init_array ? 4u : 3u;
+ seg_base = link_iplt_alloc_segments(img, nseg);
+ }
+ iplt_seg_idx = seg_base + 0u;
+ igot_seg_idx = seg_base + 1u;
+ pairs_seg_idx = seg_base + 2u;
+ if (emit_init_array) init_seg_idx = seg_base + 3u;
+
+ iplt_seg = &img->segments[iplt_seg_idx];
+ memset(iplt_seg, 0, sizeof(*iplt_seg));
+ iplt_seg->id = (LinkSegmentId)(iplt_seg_idx + 1u);
+ iplt_seg->flags = SF_ALLOC | SF_EXEC;
+ iplt_seg->file_offset = iplt_vaddr;
+ iplt_seg->vaddr = iplt_vaddr;
+ iplt_seg->file_size = iplt_size;
+ iplt_seg->mem_size = iplt_size;
+ iplt_seg->align = (u32)page;
+ iplt_seg->nsections = 1;
+ img->segment_bytes[iplt_seg_idx] = (u8*)h->alloc(h, (size_t)iplt_size, 16);
+ img->segment_bytes_cap[iplt_seg_idx] = (size_t)iplt_size;
+ if (!img->segment_bytes[iplt_seg_idx])
+ compiler_panic(img->c, no_loc(), "link: oom on iplt bytes");
+ memset(img->segment_bytes[iplt_seg_idx], 0, (size_t)iplt_size);
+
+ igot_seg = &img->segments[igot_seg_idx];
+ memset(igot_seg, 0, sizeof(*igot_seg));
+ igot_seg->id = (LinkSegmentId)(igot_seg_idx + 1u);
+ igot_seg->flags = SF_ALLOC | SF_WRITE;
+ igot_seg->file_offset = igot_vaddr;
+ igot_seg->vaddr = igot_vaddr;
+ igot_seg->file_size = igot_size;
+ igot_seg->mem_size = igot_size;
+ igot_seg->align = (u32)page;
+ igot_seg->nsections = 1;
+ img->segment_bytes[igot_seg_idx] = (u8*)h->alloc(h, (size_t)igot_size, 16);
+ img->segment_bytes_cap[igot_seg_idx] = (size_t)igot_size;
+ if (!img->segment_bytes[igot_seg_idx])
+ compiler_panic(img->c, no_loc(), "link: oom on igot bytes");
+ memset(img->segment_bytes[igot_seg_idx], 0, (size_t)igot_size);
+
+ pairs_seg = &img->segments[pairs_seg_idx];
+ memset(pairs_seg, 0, sizeof(*pairs_seg));
+ pairs_seg->id = (LinkSegmentId)(pairs_seg_idx + 1u);
+ pairs_seg->flags = SF_ALLOC | SF_WRITE;
+ pairs_seg->file_offset = pairs_vaddr;
+ pairs_seg->vaddr = pairs_vaddr;
+ pairs_seg->file_size = pairs_size;
+ pairs_seg->mem_size = pairs_size;
+ pairs_seg->align = (u32)page;
+ pairs_seg->nsections = 1;
+ img->segment_bytes[pairs_seg_idx] = (u8*)h->alloc(h, (size_t)pairs_size, 16);
+ img->segment_bytes_cap[pairs_seg_idx] = (size_t)pairs_size;
+ if (!img->segment_bytes[pairs_seg_idx])
+ compiler_panic(img->c, no_loc(), "link: oom on iplt.pairs bytes");
+ memset(img->segment_bytes[pairs_seg_idx], 0, (size_t)pairs_size);
+
+ if (emit_init_array) {
+ init_seg = &img->segments[init_seg_idx];
+ memset(init_seg, 0, sizeof(*init_seg));
+ init_seg->id = (LinkSegmentId)(init_seg_idx + 1u);
+ init_seg->flags = SF_ALLOC | SF_WRITE;
+ init_seg->file_offset = init_vaddr;
+ init_seg->vaddr = init_vaddr;
+ init_seg->file_size = init_size;
+ init_seg->mem_size = init_size;
+ init_seg->align = (u32)page;
+ init_seg->nsections = 1;
+ img->segment_bytes[init_seg_idx] = (u8*)h->alloc(h, (size_t)init_size, 16);
+ img->segment_bytes_cap[init_seg_idx] = (size_t)init_size;
+ if (!img->segment_bytes[init_seg_idx])
+ compiler_panic(img->c, no_loc(), "link: oom on iplt init_array bytes");
+ memset(img->segment_bytes[init_seg_idx], 0, (size_t)init_size);
+ }
+ img->nsegments += emit_init_array ? 4u : 3u;
+
+ {
+ u32 nsec = emit_init_array ? 4u : 3u;
+ sec_base = link_iplt_alloc_sections(img, nsec);
+ }
+
+ pairs_section_name = pool_intern_cstr(l->c->global, ".iplt.pairs");
+ init_section_name = obj_secname_preinit_array(l->c);
+
+ iplt_sec = &img->sections[sec_base + 0u];
+ memset(iplt_sec, 0, sizeof(*iplt_sec));
+ iplt_sec->id = (LinkSectionId)(sec_base + 0u + 1u);
+ iplt_sec->input_id = LINK_INPUT_NONE;
+ iplt_sec->obj_section_id = OBJ_SEC_NONE;
+ iplt_sec->segment_id = iplt_seg->id;
+ iplt_sec->input_offset = 0;
+ iplt_sec->file_offset = iplt_vaddr;
+ iplt_sec->vaddr = iplt_vaddr;
+ iplt_sec->size = iplt_size;
+ iplt_sec->flags = SF_ALLOC | SF_EXEC;
+ iplt_sec->align = 4;
+ iplt_sec->name = pool_intern_cstr(l->c->global, ".iplt");
+ iplt_sec->sem = SSEM_PROGBITS;
+
+ igot_sec = &img->sections[sec_base + 1u];
+ memset(igot_sec, 0, sizeof(*igot_sec));
+ igot_sec->id = (LinkSectionId)(sec_base + 1u + 1u);
+ igot_sec->input_id = LINK_INPUT_NONE;
+ igot_sec->obj_section_id = OBJ_SEC_NONE;
+ igot_sec->segment_id = igot_seg->id;
+ igot_sec->input_offset = 0;
+ igot_sec->file_offset = igot_vaddr;
+ igot_sec->vaddr = igot_vaddr;
+ igot_sec->size = igot_size;
+ igot_sec->flags = SF_ALLOC | SF_WRITE;
+ igot_sec->align = 8;
+ igot_sec->name = pool_intern_cstr(l->c->global, ".igot.plt");
+ igot_sec->sem = SSEM_PROGBITS;
+
+ pairs_sec = &img->sections[sec_base + 2u];
+ memset(pairs_sec, 0, sizeof(*pairs_sec));
+ pairs_sec->id = (LinkSectionId)(sec_base + 2u + 1u);
+ pairs_sec->input_id = LINK_INPUT_NONE;
+ pairs_sec->obj_section_id = OBJ_SEC_NONE;
+ pairs_sec->segment_id = pairs_seg->id;
+ pairs_sec->input_offset = 0;
+ pairs_sec->file_offset = pairs_vaddr;
+ pairs_sec->vaddr = pairs_vaddr;
+ pairs_sec->size = pairs_size;
+ pairs_sec->flags = SF_ALLOC | SF_WRITE;
+ pairs_sec->align = 8;
+ pairs_sec->name = pairs_section_name;
+ pairs_sec->sem = SSEM_PROGBITS;
+
+ if (emit_init_array) {
+ init_sec = &img->sections[sec_base + 3u];
+ memset(init_sec, 0, sizeof(*init_sec));
+ init_sec->id = (LinkSectionId)(sec_base + 3u + 1u);
+ init_sec->input_id = LINK_INPUT_NONE;
+ init_sec->obj_section_id = OBJ_SEC_NONE;
+ init_sec->segment_id = init_seg->id;
+ init_sec->input_offset = 0;
+ init_sec->file_offset = init_vaddr;
+ init_sec->vaddr = init_vaddr;
+ init_sec->size = init_size;
+ init_sec->flags = SF_ALLOC | SF_WRITE;
+ init_sec->align = 8;
+ init_sec->name = init_section_name;
+ init_sec->sem = SSEM_PREINIT_ARRAY;
+ }
+ img->nsections += emit_init_array ? 4u : 3u;
+
+ link_emit_boundary_sym(l, img, "__start_iplt_pairs", pairs_vaddr);
+ link_emit_boundary_sym(l, img, "__stop_iplt_pairs", pairs_vaddr + pairs_size);
+
+ img->iplt_pairs = (u64*)h->alloc(
+ h, sizeof(*img->iplt_pairs) * 2u * (size_t)nifunc, _Alignof(u64));
+ if (!img->iplt_pairs)
+ compiler_panic(img->c, no_loc(), "link: oom on iplt pairs");
+ img->niplt = nifunc;
+
+ iplt_bytes = img->segment_bytes[iplt_seg_idx];
+ slot_idx = 0;
+
+ for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
+ LinkSymbol* s = LinkSyms_at(&img->syms, i);
+ u64 stub_vaddr;
+ u64 slot_vaddr;
+ u64 pair_vaddr;
+ u64 resolver_vaddr;
+ LinkSectionId resolver_section;
+ u64 resolver_value;
+ LinkSymbol slot_rec;
+ LinkSymbol resolver_rec;
+ LinkSymId slot_id;
+ LinkSymId resolver_id;
+ LinkRelocApply rrec;
+ u8* stub_dst;
+
+ if (s->kind != SK_IFUNC || !s->defined) continue;
+ if (s->name != 0) {
+ LinkSymId canonical = symhash_get(&img->globals, s->name);
+ if (canonical != LINK_SYM_NONE && canonical != s->id) continue;
+ }
+
+ stub_vaddr = iplt_vaddr + (u64)slot_idx * 12u;
+ slot_vaddr = igot_vaddr + (u64)slot_idx * 8u;
+ pair_vaddr = pairs_vaddr + (u64)slot_idx * 16u;
+ resolver_vaddr = s->vaddr;
+ resolver_section = s->section_id;
+ resolver_value = s->value;
+
+ img->iplt_pairs[2u * slot_idx + 0] = resolver_vaddr;
+ img->iplt_pairs[2u * slot_idx + 1] = slot_vaddr;
+
+ stub_dst = iplt_bytes + (size_t)slot_idx * (size_t)arch->iplt_stub_size;
+ LinkArchIPltReloc iplt_relocs[2];
+ u32 niplt_relocs =
+ arch->emit_iplt_stub(stub_dst, stub_vaddr, slot_vaddr, iplt_relocs);
+
+ memset(&slot_rec, 0, sizeof(slot_rec));
+ slot_rec.name = 0;
+ slot_rec.kind = SK_OBJ;
+ slot_rec.bind = SB_LOCAL;
+ slot_rec.defined = 1;
+ slot_rec.section_id = igot_sec->id;
+ slot_rec.vaddr = slot_vaddr;
+ slot_rec.size = 8;
+ slot_id = link_append_symbol(img, &slot_rec);
+
+ memset(&resolver_rec, 0, sizeof(resolver_rec));
+ resolver_rec.name = 0;
+ resolver_rec.kind = SK_FUNC;
+ resolver_rec.bind = SB_LOCAL;
+ resolver_rec.defined = 1;
+ resolver_rec.section_id = resolver_section;
+ resolver_rec.value = resolver_value;
+ resolver_rec.vaddr = resolver_vaddr;
+ resolver_rec.size = 0;
+ resolver_id = link_append_symbol(img, &resolver_rec);
+
+ {
+ u32 ri;
+ for (ri = 0; ri < niplt_relocs; ++ri) {
+ memset(&rrec, 0, sizeof(rrec));
+ rrec.input_id = LINK_INPUT_NONE;
+ rrec.section_id = OBJ_SEC_NONE;
+ rrec.link_section_id = iplt_sec->id;
+ rrec.offset = (u32)(slot_idx * arch->iplt_stub_size) +
+ iplt_relocs[ri].offset_in_stub;
+ rrec.width = iplt_relocs[ri].width;
+ rrec.write_vaddr = stub_vaddr + iplt_relocs[ri].offset_in_stub;
+ rrec.write_file_offset = rrec.write_vaddr;
+ rrec.kind = iplt_relocs[ri].kind;
+ rrec.target = slot_id;
+ rrec.addend = 0;
+ *link_append_reloc_slot(img) = rrec;
+ }
+ }
+
+ memset(&rrec, 0, sizeof(rrec));
+ rrec.input_id = LINK_INPUT_NONE;
+ rrec.section_id = OBJ_SEC_NONE;
+ rrec.link_section_id = pairs_sec->id;
+ rrec.offset = (u32)(slot_idx * 16u);
+ rrec.width = 8;
+ rrec.write_vaddr = pair_vaddr;
+ rrec.write_file_offset = pair_vaddr;
+ rrec.kind = R_ABS64;
+ rrec.target = resolver_id;
+ rrec.addend = 0;
+ *link_append_reloc_slot(img) = rrec;
+
+ memset(&rrec, 0, sizeof(rrec));
+ rrec.input_id = LINK_INPUT_NONE;
+ rrec.section_id = OBJ_SEC_NONE;
+ rrec.link_section_id = pairs_sec->id;
+ rrec.offset = (u32)(slot_idx * 16u + 8u);
+ rrec.width = 8;
+ rrec.write_vaddr = pair_vaddr + 8u;
+ rrec.write_file_offset = pair_vaddr + 8u;
+ rrec.kind = R_ABS64;
+ rrec.target = slot_id;
+ rrec.addend = 0;
+ *link_append_reloc_slot(img) = rrec;
+
+ s->kind = SK_FUNC;
+ s->section_id = iplt_sec->id;
+ s->value = (u64)slot_idx * (u64)arch->iplt_stub_size;
+ s->vaddr = stub_vaddr;
+ s->size = arch->iplt_stub_size;
+
+ ++slot_idx;
+ }
+
+ if (emit_init_array) {
+ LinkRelocApply rrec;
+ memset(&rrec, 0, sizeof(rrec));
+ rrec.input_id = LINK_INPUT_NONE;
+ rrec.section_id = OBJ_SEC_NONE;
+ rrec.link_section_id = init_sec->id;
+ rrec.offset = 0;
+ rrec.width = 8;
+ rrec.write_vaddr = init_vaddr;
+ rrec.write_file_offset = init_vaddr;
+ rrec.kind = R_ABS64;
+ rrec.target = ifunc_init_sym;
+ rrec.addend = 0;
+ *link_append_reloc_slot(img) = rrec;
+ }
+
+ {
+ u32 n = LinkSyms_count(&img->syms);
+ for (i = 0; i < n; ++i) {
+ LinkSymbol* s = LinkSyms_at(&img->syms, i);
+ LinkSymId canonical;
+ LinkSymbol* def;
+ if (s->name == 0) continue;
+ canonical = symhash_get(&img->globals, s->name);
+ if (canonical == LINK_SYM_NONE || canonical == s->id) continue;
+ def = LinkSyms_at(&img->syms, canonical - 1);
+ if (def->section_id != iplt_sec->id) continue;
+ s->section_id = def->section_id;
+ s->value = def->value;
+ s->vaddr = def->vaddr;
+ s->kind = def->kind;
+ s->size = def->size;
+ s->defined = 1;
+ }
+ }
+}
+
+/* ---- entry symbol ---- */
+
+void link_resolve_entry(Linker* l, LinkImage* img) {
+ LinkSymId id;
+ LinkSymbol* s;
+ if (l->entry_name == 0) return;
+ id = symhash_get(&img->globals, l->entry_name);
+ if (id == LINK_SYM_NONE) {
+ size_t namelen;
+ const char* nm = pool_str(l->c->global, l->entry_name, &namelen);
+ compiler_panic(l->c, no_loc(), "link: entry symbol '%.*s' not defined",
+ (int)namelen, nm);
+ }
+ s = LinkSyms_at(&img->syms, id - 1);
+ if (!s->defined) {
+ size_t namelen;
+ const char* nm = pool_str(l->c->global, l->entry_name, &namelen);
+ compiler_panic(l->c, no_loc(), "link: entry symbol '%.*s' is undefined",
+ (int)namelen, nm);
+ }
+ img->entry_sym = id;
+}
+
+/* ---- pass 4: emit reloc records ---- */
+
+void link_emit_relocations(Linker* l, LinkImage* img,
+ const LinkSymId* got_map,
+ const LinkSymId* stub_map) {
+ u32 ii;
+ for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
+ ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
+ InputMap* m = &img->input_maps[ii];
+ u32 total = obj_reloc_total(ob);
+ u32 k;
+ if (total == 0) continue;
+ for (k = 0; k < total; ++k) {
+ const Reloc* r = obj_reloc_at(ob, k);
+ const Section* s = obj_section_get(ob, r->section_id);
+ LinkSymId target;
+ LinkSection* ls;
+ LinkRelocApply rec;
+ if (!s || !link_section_kept(s)) continue;
+ if (m->section[r->section_id] == LINK_SEC_NONE) continue;
+ if (r->kind == R_RV_RELAX || r->kind == R_RV_TPREL_ADD ||
+ r->kind == R_RV_ALIGN)
+ continue;
+ if (r->sym == OBJ_SYM_NONE || r->sym >= m->nsym)
+ compiler_panic(l->c, no_loc(), "link: reloc references unknown symbol");
+ target = m->sym[r->sym];
+ if (target == LINK_SYM_NONE)
+ compiler_panic(l->c, no_loc(),
+ "link: reloc references unmapped symbol");
+ if (got_map && reloc_uses_got(r->kind)) {
+ LinkSymId slot = got_map[target];
+ if (slot == LINK_SYM_NONE)
+ compiler_panic(l->c, no_loc(), "link: GOT slot missing for symbol");
+ target = slot;
+ }
+ if (stub_map && (r->kind == R_AARCH64_CALL26 ||
+ r->kind == R_AARCH64_JUMP26)) {
+ LinkSymId stub = stub_map[target];
+ if (stub != LINK_SYM_NONE) target = stub;
+ }
+ ls = &img->sections[m->section[r->section_id] - 1];
+ memset(&rec, 0, sizeof(rec));
+ rec.input_id = LinkInputs_at(&l->inputs, ii)->id;
+ rec.section_id = r->section_id;
+ rec.link_section_id = ls->id;
+ rec.offset = r->offset;
+ rec.width = reloc_width((RelocKind)r->kind);
+ rec.write_vaddr = ls->vaddr + r->offset;
+ rec.write_file_offset = ls->file_offset + r->offset;
+ rec.kind = (RelocKind)r->kind;
+ rec.target = target;
+ rec.addend = r->addend;
+ if (rec.width == 0)
+ compiler_panic(l->c, no_loc(), "link: unsupported reloc kind %u",
+ (unsigned)r->kind);
+ *link_append_reloc_slot(img) = rec;
+ }
+ }
+}
diff --git a/src/link/link_resolve.c b/src/link/link_resolve.c
@@ -0,0 +1,597 @@
+/* link_resolve.c — archive ingest, symbol resolution, --gc-sections liveness.
+ *
+ * Phase 1 of the link pipeline:
+ * link_ingest_archives — pull archive members into l->inputs
+ * link_resolve_symbols — register every ObjSym, build img->globals
+ * link_resolve_undefs — satisfy remaining undefs (globals/DSOs/resolver)
+ * link_gc_compute — mark live sections (or mark all live if disabled)
+ * link_gc_drop_dead_globals — clear `defined` on syms in dropped sections
+ */
+
+#include <cfree.h>
+#include <string.h>
+
+#include "core/buf.h"
+#include "core/bytes.h"
+#include "core/heap.h"
+#include "core/pool.h"
+#include "core/util.h"
+#include "core/vec.h"
+#include "link/link.h"
+#include "link/link_arch.h"
+#include "link/link_internal.h"
+
+static SrcLoc no_loc(void) {
+ SrcLoc l = {0, 0, 0};
+ return l;
+}
+
+/* ---- per-input symbol/section maps ---- */
+
+static void map_alloc(LinkImage* img, InputMap* m, u32 nsym, u32 nsection) {
+ Heap* h = img->heap;
+ m->nsym = nsym;
+ m->sym = (LinkSymId*)h->alloc(h, sizeof(*m->sym) * nsym, _Alignof(LinkSymId));
+ if (!m->sym)
+ compiler_panic(img->c, no_loc(), "link: oom on input symbol map");
+ memset(m->sym, 0, sizeof(*m->sym) * nsym);
+ m->nsection = nsection;
+ m->section = (LinkSectionId*)h->alloc(h, sizeof(*m->section) * nsection,
+ _Alignof(LinkSectionId));
+ if (!m->section)
+ compiler_panic(img->c, no_loc(), "link: oom on input section map");
+ memset(m->section, 0, sizeof(*m->section) * nsection);
+}
+
+/* ---- pass 1: collect symbols ---- */
+
+/* Defined-symbol replacement policy: a stronger binding wins. */
+static int bind_strength(u8 bind) {
+ switch (bind) {
+ case SB_GLOBAL:
+ return 3;
+ case SB_WEAK:
+ return 2;
+ case SB_LOCAL:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+void link_resolve_symbols(Linker* l, LinkImage* img) {
+ u32 ii;
+ for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
+ LinkInput* in = LinkInputs_at(&l->inputs, ii);
+ ObjBuilder* ob = in->obj;
+ InputMap* m = &img->input_maps[ii];
+ u32 nsym = obj_section_count(ob);
+ (void)nsym;
+ ObjSymIter* it;
+ ObjSymEntry e;
+
+ if (in->kind == LINK_INPUT_DSO_BYTES) continue;
+
+ u32 nsyms_in_input = 0;
+ it = obj_symiter_new(ob);
+ while (obj_symiter_next(it, &e)) ++nsyms_in_input;
+ obj_symiter_free(it);
+
+ map_alloc(img, m, nsyms_in_input + 1u /* +1 for id-0 slot */,
+ obj_section_count(ob));
+
+ it = obj_symiter_new(ob);
+ while (obj_symiter_next(it, &e)) {
+ const ObjSym* s = e.sym;
+ LinkSymbol rec;
+ LinkSymId existing;
+ {
+ int is_logical_undef = (s->section_id == OBJ_SEC_NONE) &&
+ (s->kind != SK_ABS) && (s->kind != SK_COMMON);
+ if (is_logical_undef && !s->referenced &&
+ (s->bind == SB_GLOBAL || s->bind == SB_WEAK)) {
+ continue;
+ }
+ }
+ int is_def = (s->kind != SK_UNDEF) &&
+ (s->kind == SK_ABS || s->kind == SK_COMMON ||
+ s->kind == SK_FILE ||
+ s->section_id != OBJ_SEC_NONE);
+
+ memset(&rec, 0, sizeof(rec));
+ rec.name = s->name;
+ rec.input_id = in->id;
+ rec.obj_sym = e.id;
+ rec.section_id = LINK_SEC_NONE;
+ rec.value = s->value;
+ rec.size = s->size;
+ rec.common_align = (s->kind == SK_COMMON) ? (u32)s->common_align : 0u;
+ rec.bind = (u8)s->bind;
+ rec.kind = (u8)s->kind;
+ rec.defined = (u8)is_def;
+ rec.vaddr = 0;
+
+ if (is_def && (s->bind == SB_GLOBAL || s->bind == SB_WEAK) &&
+ s->name != 0) {
+ LinkSymId fresh = (LinkSymId)(LinkSyms_count(&img->syms) + 1u);
+ if (symhash_insert(&img->globals, s->name, fresh, &existing)) {
+ m->sym[e.id] = link_append_symbol(img, &rec);
+ } else {
+ LinkSymbol* prev = LinkSyms_at(&img->syms, existing - 1);
+ int new_strength = bind_strength((u8)s->bind);
+ int old_strength = bind_strength(prev->bind);
+ if (prev->kind == SK_COMMON && rec.kind == SK_COMMON) {
+ if (rec.size > prev->size) {
+ u32 new_align = (rec.common_align > prev->common_align)
+ ? rec.common_align
+ : prev->common_align;
+ rec.id = existing;
+ rec.common_align = new_align;
+ *prev = rec;
+ }
+ m->sym[e.id] = existing;
+ } else if (rec.kind == SK_COMMON) {
+ m->sym[e.id] = existing;
+ } else if (prev->kind == SK_COMMON) {
+ rec.id = existing;
+ *prev = rec;
+ m->sym[e.id] = existing;
+ } else if (new_strength > old_strength) {
+ rec.id = existing;
+ *prev = rec;
+ m->sym[e.id] = existing;
+ } else if (new_strength == old_strength &&
+ new_strength == bind_strength(SB_GLOBAL)) {
+ size_t namelen;
+ const char* nm = pool_str(l->c->global, s->name, &namelen);
+ compiler_panic(l->c, no_loc(),
+ "link: duplicate definition of "
+ "global symbol '%.*s'",
+ (int)namelen, nm);
+ } else {
+ m->sym[e.id] = existing;
+ }
+ }
+ } else {
+ m->sym[e.id] = link_append_symbol(img, &rec);
+ }
+ }
+ obj_symiter_free(it);
+ }
+}
+
+/* Search DSO inputs for an exported symbol matching `name`. */
+static LinkInputId find_dso_export(Linker* l, Sym name) {
+ u32 ii;
+ ObjSymIter* it;
+ ObjSymEntry e;
+ if (name == 0) return LINK_INPUT_NONE;
+ for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
+ LinkInput* in = LinkInputs_at(&l->inputs, ii);
+ if (in->kind != LINK_INPUT_DSO_BYTES) continue;
+ it = obj_symiter_new(in->obj);
+ while (obj_symiter_next(it, &e)) {
+ const ObjSym* s = e.sym;
+ if (s->name != name) continue;
+ if (s->kind == SK_UNDEF) continue;
+ if (s->bind == SB_LOCAL) continue;
+ obj_symiter_free(it);
+ return in->id;
+ }
+ obj_symiter_free(it);
+ }
+ return LINK_INPUT_NONE;
+}
+
+void link_resolve_undefs(Linker* l, LinkImage* img) {
+ u32 i;
+ for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
+ LinkSymbol* s = LinkSyms_at(&img->syms, i);
+ if (s->defined) continue;
+ if (s->name != 0) {
+ LinkSymId hit = symhash_get(&img->globals, s->name);
+ if (hit != LINK_SYM_NONE && hit != s->id) {
+ LinkSymbol* def = LinkSyms_at(&img->syms, hit - 1);
+ if (def->defined) {
+ s->section_id = def->section_id;
+ s->value = def->value;
+ s->vaddr = def->vaddr;
+ s->kind = def->kind;
+ s->bind = def->bind;
+ s->defined = 1;
+ continue;
+ }
+ }
+ }
+ if (s->name != 0) {
+ LinkInputId dso = find_dso_export(l, s->name);
+ if (dso != LINK_INPUT_NONE) {
+ s->imported = 1;
+ s->dso_input_id = dso;
+ continue;
+ }
+ }
+ if (l->resolver && s->name != 0) {
+ size_t namelen;
+ const char* nm = pool_str(l->c->global, s->name, &namelen);
+ (void)namelen;
+ void* p = l->resolver(l->resolver_user, nm);
+ if (p) {
+ s->kind = SK_ABS;
+ s->vaddr = (u64)(uintptr_t)p;
+ s->defined = 1;
+ continue;
+ }
+ }
+ if (s->bind == SB_WEAK) {
+ s->kind = SK_ABS;
+ s->vaddr = 0;
+ s->defined = 1;
+ continue;
+ }
+ {
+ size_t namelen;
+ const char* nm = s->name ? pool_str(l->c->global, s->name, &namelen)
+ : (namelen = 0, "");
+ obj_format_demangle_c(l->c, &nm, &namelen);
+ compiler_panic(l->c, no_loc(), "link: undefined reference to '%.*s'",
+ (int)namelen, nm);
+ }
+ }
+}
+
+/* ---- pass 1b: --gc-sections liveness ---- */
+
+#define GC_PACK(ii, j) (((u64)(u32)(ii) << 32) | (u32)(j))
+#define GC_II(p) ((u32)((p) >> 32))
+#define GC_J(p) ((ObjSecId)((p) & 0xffffffffu))
+
+static void gc_queue_push(GcQueue* q, Heap* h, u32 ii, ObjSecId j) {
+ if (VEC_GROW(h, q->items, q->cap, q->n + 1u))
+ return;
+ q->items[q->n++] = GC_PACK(ii, j);
+}
+
+void link_gc_live_alloc(GcLive* g, Linker* l, Heap* h) {
+ u32 ii;
+ g->ninputs = LinkInputs_count(&l->inputs);
+ g->marks =
+ LinkInputs_count(&l->inputs)
+ ? (u8**)h->alloc(h, sizeof(*g->marks) * LinkInputs_count(&l->inputs),
+ _Alignof(u8*))
+ : NULL;
+ g->nsec =
+ LinkInputs_count(&l->inputs)
+ ? (u32*)h->alloc(h, sizeof(*g->nsec) * LinkInputs_count(&l->inputs),
+ _Alignof(u32))
+ : NULL;
+ if (LinkInputs_count(&l->inputs) && (!g->marks || !g->nsec))
+ compiler_panic(l->c, no_loc(), "link: oom on gc live map");
+ for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
+ u32 nsec = obj_section_count(LinkInputs_at(&l->inputs, ii)->obj);
+ g->nsec[ii] = nsec;
+ g->marks[ii] = (u8*)h->alloc(h, nsec ? nsec : 1u, 1);
+ if (!g->marks[ii]) compiler_panic(l->c, no_loc(), "link: oom on gc marks");
+ memset(g->marks[ii], 0, nsec);
+ }
+}
+
+void link_gc_live_free(GcLive* g, Heap* h) {
+ u32 ii;
+ if (g->marks) {
+ for (ii = 0; ii < g->ninputs; ++ii)
+ if (g->marks[ii])
+ h->free(h, g->marks[ii], g->nsec[ii] ? g->nsec[ii] : 1u);
+ h->free(h, g->marks, sizeof(*g->marks) * g->ninputs);
+ }
+ if (g->nsec) h->free(h, g->nsec, sizeof(*g->nsec) * g->ninputs);
+}
+
+int link_gc_live_get(const GcLive* g, u32 ii, ObjSecId j) {
+ if (ii >= g->ninputs || j == OBJ_SEC_NONE || j >= g->nsec[ii]) return 0;
+ return g->marks[ii][j];
+}
+
+static void gc_mark(GcLive* g, GcQueue* q, Heap* h, u32 ii, ObjSecId j) {
+ if (ii >= g->ninputs || j == OBJ_SEC_NONE || j >= g->nsec[ii]) return;
+ if (g->marks[ii][j]) return;
+ g->marks[ii][j] = 1;
+ gc_queue_push(q, h, ii, j);
+}
+
+/* From a LinkSymId, find the (input_idx, obj_sec_id) of its defining section.
+ * Returns 1 on hit. */
+static int gc_def_site(LinkImage* img, Linker* l, LinkSymId id, u32* out_ii,
+ ObjSecId* out_sid) {
+ const LinkSymbol* s;
+ ObjBuilder* ob;
+ const ObjSym* osym;
+ if (id == LINK_SYM_NONE || id > LinkSyms_count(&img->syms)) return 0;
+ s = LinkSyms_at(&img->syms, id - 1);
+ if (!s->defined) {
+ LinkSymId hit;
+ if (s->name == 0) return 0;
+ hit = symhash_get(&img->globals, s->name);
+ if (hit == LINK_SYM_NONE || hit == s->id) return 0;
+ return gc_def_site(img, l, hit, out_ii, out_sid);
+ }
+ if (s->kind == SK_ABS || s->kind == SK_COMMON) return 0;
+ if (s->input_id == LINK_INPUT_NONE) return 0;
+ ob = LinkInputs_at(&l->inputs, s->input_id - 1)->obj;
+ osym = obj_symbol_get(ob, s->obj_sym);
+ if (!osym || osym->section_id == OBJ_SEC_NONE) return 0;
+ *out_ii = (u32)(s->input_id - 1u);
+ *out_sid = osym->section_id;
+ return 1;
+}
+
+/* Detect __start_<X> / __stop_<X> with <X> a valid C identifier. */
+int link_gc_split_start_stop(const char* s, size_t n, size_t* out_off,
+ size_t* out_len, int* out_is_start) {
+ static const char START[] = "__start_";
+ static const char STOP[] = "__stop_";
+ size_t off, len, i;
+ int is_start;
+ if (n > sizeof(START) - 1u && memcmp(s, START, sizeof(START) - 1u) == 0) {
+ off = sizeof(START) - 1u;
+ is_start = 1;
+ } else if (n > sizeof(STOP) - 1u && memcmp(s, STOP, sizeof(STOP) - 1u) == 0) {
+ off = sizeof(STOP) - 1u;
+ is_start = 0;
+ } else {
+ return 0;
+ }
+ len = n - off;
+ if (len == 0) return 0;
+ {
+ char c = s[off];
+ if (!(c == '_' || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')))
+ return 0;
+ }
+ for (i = 1; i < len; ++i) {
+ char c = s[off + i];
+ if (!(c == '_' || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
+ (c >= '0' && c <= '9')))
+ return 0;
+ }
+ *out_off = off;
+ *out_len = len;
+ if (out_is_start) *out_is_start = is_start;
+ return 1;
+}
+
+static void gc_promote_by_section_name(Linker* l, GcLive* g, GcQueue* q,
+ Heap* h, Sym section_name) {
+ u32 ii, j;
+ for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
+ ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
+ u32 nsec = obj_section_count(ob);
+ for (j = 1; j < nsec; ++j) {
+ const Section* s = obj_section_get(ob, j);
+ if (!s || !link_section_kept(s)) continue;
+ if (s->name != section_name) continue;
+ gc_mark(g, q, h, ii, j);
+ }
+ }
+}
+
+void link_gc_compute(Linker* l, LinkImage* img, GcLive* g) {
+ u32 ii, j, k;
+ GcQueue q;
+ Heap* h = img->heap;
+
+ if (!l->gc_sections) {
+ for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
+ ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
+ u32 nsec = obj_section_count(ob);
+ for (j = 1; j < nsec; ++j) {
+ const Section* s = obj_section_get(ob, j);
+ if (s && link_section_kept(s)) g->marks[ii][j] = 1;
+ }
+ }
+ return;
+ }
+
+ memset(&q, 0, sizeof(q));
+
+ for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
+ ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
+ u32 nsec = obj_section_count(ob);
+ for (j = 1; j < nsec; ++j) {
+ const Section* s = obj_section_get(ob, j);
+ int root;
+ if (!s || !link_section_kept(s)) continue;
+ root = (s->flags & SF_RETAIN) || s->sem == SSEM_INIT_ARRAY ||
+ s->sem == SSEM_FINI_ARRAY || s->sem == SSEM_PREINIT_ARRAY;
+ if (root) gc_mark(g, &q, h, ii, j);
+ }
+ }
+
+ if (l->entry_name != 0) {
+ LinkSymId id = symhash_get(&img->globals, l->entry_name);
+ u32 tii;
+ ObjSecId tsid;
+ if (gc_def_site(img, l, id, &tii, &tsid)) gc_mark(g, &q, h, tii, tsid);
+ }
+
+ while (q.n > 0) {
+ u64 v = q.items[--q.n];
+ u32 cii = GC_II(v);
+ ObjSecId cj = GC_J(v);
+ ObjBuilder* ob = LinkInputs_at(&l->inputs, cii)->obj;
+ InputMap* m = &img->input_maps[cii];
+ u32 total = obj_reloc_total(ob);
+ (void)obj_section_count;
+ if (!total) continue;
+ for (k = 0; k < total; ++k) {
+ const Reloc* r = obj_reloc_at(ob, k);
+ LinkSymId target;
+ const LinkSymbol* tsym;
+ u32 tii;
+ ObjSecId tsid;
+ if (r->section_id != cj) continue;
+ if (r->sym == OBJ_SYM_NONE || r->sym >= m->nsym) continue;
+ target = m->sym[r->sym];
+ if (target == LINK_SYM_NONE) continue;
+ tsym = LinkSyms_at(&img->syms, target - 1);
+
+ if (tsym->name != 0) {
+ size_t namelen, off, ilen;
+ const char* nm = pool_str(l->c->global, tsym->name, &namelen);
+ if (link_gc_split_start_stop(nm, namelen, &off, &ilen, NULL)) {
+ Sym secname = pool_intern(l->c->global, nm + off, ilen);
+ gc_promote_by_section_name(l, g, &q, h, secname);
+ }
+ }
+
+ if (gc_def_site(img, l, target, &tii, &tsid))
+ gc_mark(g, &q, h, tii, tsid);
+ }
+ }
+
+ if (q.items) h->free(h, q.items, sizeof(*q.items) * q.cap);
+}
+
+void link_gc_drop_dead_globals(Linker* l, LinkImage* img, const GcLive* g) {
+ u32 i;
+ if (!l->gc_sections) return;
+ for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
+ LinkSymbol* s = LinkSyms_at(&img->syms, i);
+ ObjBuilder* ob;
+ const ObjSym* osym;
+ ObjSecId osid;
+ if (!s->defined) continue;
+ if (s->kind == SK_ABS || s->kind == SK_COMMON) continue;
+ if (s->input_id == LINK_INPUT_NONE) continue;
+ ob = LinkInputs_at(&l->inputs, s->input_id - 1)->obj;
+ osym = obj_symbol_get(ob, s->obj_sym);
+ if (!osym) continue;
+ osid = osym->section_id;
+ if (osid == OBJ_SEC_NONE) continue;
+ if (link_gc_live_get(g, (u32)(s->input_id - 1u), osid)) continue;
+ s->defined = 0;
+ s->vaddr = 0;
+ s->section_id = LINK_SEC_NONE;
+ }
+}
+
+/* ---- archive ingestion ---- */
+
+static void include_archive_member(Linker* l, LinkArchiveMember* mem) {
+ LinkInput* in;
+ LinkInputId id;
+ u32 idx;
+ if (mem->included) return;
+ in = LinkInputs_push(&l->inputs, &idx);
+ if (!in)
+ compiler_panic(l->c, no_loc(), "link: oom growing inputs (archive member)");
+ id = (LinkInputId)(idx + 1u);
+ in->id = id;
+ in->kind = LINK_INPUT_OBJ_BYTES;
+ in->obj = mem->obj;
+ in->name = mem->name;
+ mem->included = 1;
+ mem->obj = NULL;
+}
+
+static void scan_presence(Linker* l, SymHash* defined, SymHash* undefs) {
+ u32 ii;
+ ObjSymIter* it;
+ ObjSymEntry e;
+ for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
+ ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
+ it = obj_symiter_new(ob);
+ while (obj_symiter_next(it, &e)) {
+ const ObjSym* s = e.sym;
+ if (s->name == 0) continue;
+ if (s->bind == SB_LOCAL) continue;
+ if (s->kind == SK_UNDEF)
+ symhash_set(undefs, s->name, 1u);
+ else
+ symhash_set(defined, s->name, 1u);
+ }
+ obj_symiter_free(it);
+ }
+}
+
+static int inputs_have_defined_ifunc(Linker* l) {
+ u32 ii;
+ ObjSymIter* it;
+ ObjSymEntry e;
+ for (ii = 0; ii < LinkInputs_count(&l->inputs); ++ii) {
+ ObjBuilder* ob = LinkInputs_at(&l->inputs, ii)->obj;
+ it = obj_symiter_new(ob);
+ while (obj_symiter_next(it, &e)) {
+ const ObjSym* s = e.sym;
+ if (s->kind == SK_IFUNC) {
+ obj_symiter_free(it);
+ return 1;
+ }
+ }
+ obj_symiter_free(it);
+ }
+ return 0;
+}
+
+static int member_satisfies(LinkArchiveMember* mem, const SymHash* defined,
+ const SymHash* wanted) {
+ ObjSymIter* it;
+ ObjSymEntry e;
+ int hit = 0;
+ it = obj_symiter_new(mem->obj);
+ while (obj_symiter_next(it, &e)) {
+ const ObjSym* s = e.sym;
+ if (s->name == 0) continue;
+ if (s->kind == SK_UNDEF) continue;
+ if (s->bind != SB_GLOBAL && s->bind != SB_WEAK) continue;
+ if (symhash_get(wanted, s->name) == LINK_SYM_NONE) continue;
+ if (symhash_get(defined, s->name) != LINK_SYM_NONE) continue;
+ hit = 1;
+ break;
+ }
+ obj_symiter_free(it);
+ return hit;
+}
+
+void link_ingest_archives(Linker* l) {
+ u32 a, m;
+ if (LinkArchives_count(&l->archives) == 0) return;
+
+ for (a = 0; a < LinkArchives_count(&l->archives); ++a) {
+ LinkArchive* ar = LinkArchives_at(&l->archives, a);
+ if (!ar->whole_archive) continue;
+ for (m = 0; m < ar->nmembers; ++m)
+ include_archive_member(l, &ar->members[m]);
+ }
+
+ Sym want_ifunc_init = 0;
+ if (l->emit_static_exe && inputs_have_defined_ifunc(l)) {
+ want_ifunc_init = pool_intern_cstr(l->c->global, "__cfree_ifunc_init");
+ }
+
+ for (;;) {
+ SymHash defined, undefs;
+ int changed = 0;
+ symhash_init(&defined, l->heap);
+ symhash_init(&undefs, l->heap);
+ scan_presence(l, &defined, &undefs);
+ if (want_ifunc_init != 0 &&
+ symhash_get(&defined, want_ifunc_init) == LINK_SYM_NONE)
+ symhash_set(&undefs, want_ifunc_init, 1u);
+
+ for (a = 0; a < LinkArchives_count(&l->archives); ++a) {
+ LinkArchive* ar = LinkArchives_at(&l->archives, a);
+ if (ar->whole_archive) continue;
+ for (m = 0; m < ar->nmembers; ++m) {
+ LinkArchiveMember* mem = &ar->members[m];
+ if (mem->included) continue;
+ if (!member_satisfies(mem, &defined, &undefs)) continue;
+ include_archive_member(l, mem);
+ changed = 1;
+ }
+ }
+ symhash_fini(&defined);
+ symhash_fini(&undefs);
+ if (!changed) break;
+ }
+}
diff --git a/src/parse/parse.c b/src/parse/parse.c
@@ -1,119 +1,28 @@
-/* C11 recursive-descent parser. No AST; the parser drives DeclTable for C
- * declaration semantics and CG for executable code in a single pass.
+/* parse.c — residual C11 parser core.
*
- * Module shape (DESIGN §5):
- * - lex / pp produce a token stream; we keep one token of lookahead.
- * - The parser maintains its own scope stack (block/file scope) for
- * identifier resolution. DeclTable owns DeclId/ObjSymId allocation.
- * - Statements drive CG: cg_func_begin/end, cg_local, cg_set_loc,
- * cg_label_*, cg_branch_*, cg_jump, cg_ret. Expressions drive CG's
- * value stack: cg_push_*, cg_load, cg_store, cg_binop, cg_cmp.
- * - One Tok of lookahead is enough for C11; at decision points we use
- * the keyword/punctuator directly.
+ * Contains:
+ * - kw_names[] table (used by parse_c to intern keywords)
+ * - Diagnostics/token helpers (perr, advance, peek1, fetch_tok, ...)
+ * - Scope/tag operations
+ * - Type helpers (ty_int, ty_size_t)
+ * - Local-variable slot allocation (make_local, make_local_aligned)
+ * - Static-local symbol naming (mint_static_local_sym)
+ * - Declaration driver (parse_init_declarator, parse_local_decl)
+ * - TU-level driver (parse_param_list, declare_function,
+ * parse_function_body, parse_external_decl, parse_translation_unit,
+ * parse_c)
*
- * v1 slice: single-TU; functions returning int; int locals (with comma-
- * separated initializers); compound, if/else, while, for, return,
- * expression statements; expressions covering the §6.5 spine
- * (additive/multiplicative/relational/equality, unary, parens, post/pre
- * inc-dec, simple assignment + compound assignment). The grammar is
- * organized so each higher-level production gets its own function — the
- * full C grammar slots in the same shape, one production at a time. */
+ * All expression, type, initializer, and statement code lives in
+ * parse_expr.c, parse_type.c, parse_init.c, and parse_stmt.c. */
-#include "parse/parse.h"
+#include "parse/parse_priv.h"
#include <stdarg.h>
#include <string.h>
-#include "abi/abi.h"
-#include "arch/arch.h"
-#include "cg/cg.h"
-#include "core/arena.h"
-#include "core/core.h"
-#include "core/heap.h"
-#include "core/pool.h"
-#include "debug/debug.h"
-#include "decl/decl.h"
-#include "decl/decl_attrs.h"
-#include "lex/lex.h"
-#include "obj/obj.h"
-#include "parse/attr.h"
-#include "pp/pp.h"
-#include "type/type.h"
-
-/* Type-aware push for locals — exposed by cg.c, not in cg.h. */
-extern void cg_push_local_typed(CG*, FrameSlot, const Type*);
-/* Pop pointer rvalue, push INDIRECT lvalue of given pointee. */
-extern void cg_deref(CG*, const Type* pointee);
-/* Read SValue.type at top of stack without popping. */
-extern const Type* cg_top_type(CG*);
-/* Read SValue.type at second-from-top; used for pointer-arith dispatch when
- * both operands are already on the stack. */
-extern const Type* cg_top2_type(CG*);
-/* Replace the type tag on the top SValue without emitting code (used for
- * pointer-to-pointer casts which are no-ops at the value level). */
-extern void cg_retag_top(CG*, const Type*);
-/* Recycle the backend's scratch-register pool when no value-stack entry
- * holds a live register. Called at statement boundaries to avoid
- * exhausting the fixed scratch window over the course of a function. */
-
/* ============================================================
* Keywords
- * ============================================================
- * Lex emits TOK_IDENT; the parser bucketizes idents into keywords by
- * comparing the interned Sym against a fixed table populated at parser
- * init. The table covers C11 plus a handful of common GCC-style
- * extensions the runtime headers use. Adding a new keyword is one entry
- * here plus one parser branch; the lexer never changes. */
-typedef enum CKw {
- KW_NONE = 0,
- KW_AUTO,
- KW_BREAK,
- KW_CASE,
- KW_CHAR,
- KW_CONST,
- KW_CONTINUE,
- KW_DEFAULT,
- KW_DO,
- KW_DOUBLE,
- KW_ELSE,
- KW_ENUM,
- KW_EXTERN,
- KW_FLOAT,
- KW_FOR,
- KW_GOTO,
- KW_IF,
- KW_INLINE,
- KW_INT,
- KW_LONG,
- KW_REGISTER,
- KW_RESTRICT,
- KW_RETURN,
- KW_SHORT,
- KW_SIGNED,
- KW_SIZEOF,
- KW_STATIC,
- KW_STRUCT,
- KW_SWITCH,
- KW_TYPEDEF,
- KW_UNION,
- KW_UNSIGNED,
- KW_VOID,
- KW_VOLATILE,
- KW_WHILE,
- KW_BOOL, /* _Bool */
- KW_COMPLEX, /* _Complex */
- KW_IMAGINARY, /* _Imaginary */
- KW_ALIGNAS, /* _Alignas */
- KW_ALIGNOF, /* _Alignof */
- KW_ATOMIC, /* _Atomic */
- KW_GENERIC, /* _Generic */
- KW_NORETURN, /* _Noreturn */
- KW_STATIC_ASSERT, /* _Static_assert */
- KW_THREAD_LOCAL, /* _Thread_local */
- KW_ASM, /* GNU `asm` */
- KW_BUILTIN_ASM, /* GNU `__asm__` */
- KW_COUNT
-} CKw;
+ * ============================================================ */
static const char* const kw_names[KW_COUNT] = {
NULL, "auto", "break", "case", "char",
@@ -129,250 +38,12 @@ static const char* const kw_names[KW_COUNT] = {
};
/* ============================================================
- * Scope stack
- * ============================================================
- * One ScopeEntry per declared identifier; chained in declaration order
- * within a Scope. Block scopes are pushed/popped around every compound
- * statement, parameter list, and `for`-init. Lookup walks parent chains. */
-
-typedef enum SymEntryKind {
- SEK_LOCAL, /* local variable, OPK_LOCAL via FrameSlot */
- SEK_GLOBAL, /* global var, OPK_GLOBAL via ObjSymId */
- SEK_FUNC, /* function decl, OPK_GLOBAL via ObjSymId */
- SEK_TYPEDEF, /* typedef name */
- SEK_ENUM_CST, /* enumeration constant */
-} SymEntryKind;
-
-typedef struct SymEntry SymEntry;
-struct SymEntry {
- Sym name;
- u8 kind; /* SymEntryKind */
- u8 pad[3];
- const Type* type;
- union {
- FrameSlot slot;
- ObjSymId sym;
- i64 enum_value;
- } v;
- /* For VLAs (SEK_LOCAL or SEK_TYPEDEF): a frame slot holding the array's
- * byte size, captured at declaration / typedef site. FRAME_SLOT_NONE
- * for non-VLA entries. Used by sizeof on VLA-bound IDENTs and by
- * VLA-typedef variable declarations. */
- FrameSlot vla_byte_slot;
- /* Phase 1: parsed __attribute__((...)) list attached to this entry.
- * Populated for SEK_GLOBAL / SEK_FUNC declarators (used, section,
- * noreturn, alias, weak, visibility, aligned). NULL otherwise.
- * Phase 2 reads this; nothing in Phase 1 does. */
- struct Attr* attrs;
- SymEntry* next;
-};
-
-/* Tag namespace (struct/union/enum). Lives parallel to the ordinary
- * identifier scope on the same Scope chain — the spec puts them in
- * separate namespaces (§6.2.3). The `type` field is a Type* (mutable so
- * forward declarations can be completed in place); for enums it is the
- * complete TY_ENUM type. `complete` mirrors `type->rec.incomplete` for
- * struct/union and is set immediately for enums. */
-typedef struct TagEntry TagEntry;
-struct TagEntry {
- Sym name;
- u8 kind; /* TagDeclKind */
- u8 complete;
- u16 pad;
- Type* type;
- /* Phase 1: record-level __attribute__((...)) list (packed, aligned).
- * Both leading-position (between keyword and tag/body) and trailing
- * (after `}`) attrs are chained here. Phase 2 reads this; Phase 1
- * does not. */
- struct Attr* attrs;
- TagEntry* next;
-};
-
-typedef struct Scope Scope;
-struct Scope {
- SymEntry* entries; /* LIFO */
- TagEntry* tags; /* LIFO */
- Scope* parent;
-};
-
-/* ============================================================
- * Parser context
- * ============================================================ */
-
-/* Switch dispatch: each `case K:` records (value, label) into the innermost
- * switch context. After the body, the dispatch chain at L_dispatch loads the
- * saved switch value, compares against each entry, and branches to its label. */
-typedef struct CaseEntry CaseEntry;
-struct CaseEntry {
- i64 value;
- CGLabel label;
- CaseEntry* next; /* LIFO; reverse-walked at dispatch emit time */
-};
-
-typedef struct SwitchCtx SwitchCtx;
-struct SwitchCtx {
- CaseEntry* cases; /* LIFO, nodes arena-allocated */
- CGLabel default_label; /* 0 if no `default:` seen */
- FrameSlot value_slot; /* holds the switch expression value */
- const Type* value_type; /* type of the switch expression */
- SwitchCtx* parent;
-};
-
-/* Labels live in a per-function namespace separate from ordinary identifiers
- * (§6.2.3 ¶1). One entry per unique label name; CGLabel is allocated lazily
- * on first reference (whether goto-forward or label-place comes first). */
-typedef struct GotoLabel GotoLabel;
-struct GotoLabel {
- Sym name;
- CGLabel label;
- u8 placed; /* the matching `name:` was seen */
- u8 pad[3];
- SrcLoc first_use;
- GotoLabel* next;
-};
-
-typedef struct Parser {
- Compiler* c;
- Pp* pp;
- DeclTable* decls;
- CG* cg;
- Debug* debug;
- TargetABI* abi;
- Pool* pool;
-
- Tok cur; /* one token of lookahead */
- Tok next; /* second slot, populated lazily by peek1() */
- int has_next;
-
- /* String-literal fusion (C11 §6.4.5 ¶5) is performed at the pp-pull
- * boundary: a run of adjacent TOK_STR tokens collapses into one before
- * landing in `cur`/`next`. To peek past the run we have to read the
- * first non-TOK_STR from pp; `pending` parks it for the next pull. */
- Tok pending;
- int has_pending;
-
- Sym kw_sym[KW_COUNT];
-
- /* Interned spellings for the __builtin_* / __atomic_* family routed through
- * try_parse_builtin_call (Phase 9). __builtin_va_list is recognized as a
- * type-name in parse_decl_specs / starts_type_name. */
- Sym sym_b_alloca;
- Sym sym_b_ctz;
- Sym sym_b_expect;
- Sym sym_b_offsetof;
- Sym sym_b_va_list;
- Sym sym_b_va_start;
- Sym sym_b_va_arg;
- Sym sym_b_va_end;
- Sym sym_b_va_copy;
- /* GNU `__attribute__` keyword spelling (Phase 1). Not a real C keyword,
- * so it lives outside kw_names[] — matched by IDENT comparison just like
- * the __builtin_* family. */
- Sym sym_attribute;
- /* GNU `__volatile__` alias for `volatile` inside asm() qualifiers.
- * `volatile`/`KW_VOLATILE` already lives in kw_names[]; the doubled-
- * underscore spelling is sym-compared in parse_asm_stmt. */
- Sym sym_volatile_alias;
- /* GNU `__alignof__` alias for `_Alignof`. Routed through `ident_kw` so
- * every `KW_ALIGNOF` consumer accepts both spellings without per-site
- * checks. */
- Sym sym_alignof_alias;
- Sym sym_a_load_n;
- Sym sym_a_store_n;
- Sym sym_a_exchange_n;
- Sym sym_a_fetch_add;
- Sym sym_a_fetch_sub;
- Sym sym_a_fetch_and;
- Sym sym_a_fetch_or;
- Sym sym_a_fetch_xor;
- Sym sym_a_cas_n;
- Sym sym_a_thread_fence;
- Sym sym_a_signal_fence;
-
- Scope* scope; /* top of stack; file scope is the root */
-
- ObjSecId text_sec;
-
- /* Loop/switch context for break/continue. CGLabel 0 means none. */
- CGLabel cur_break;
- CGLabel cur_continue;
-
- /* Innermost switch (`case`/`default` bind here). NULL outside any switch.
- * `break` still goes through `cur_break`, which the switch sets. */
- SwitchCtx* cur_switch;
-
- /* Per-function label chain. Reset across each function definition. */
- GotoLabel* goto_labels;
-
- /* VLA bookkeeping. parse_decl_suffix emits the size-expression code at
- * suffix-parse time (because the tokens are about to vanish) and stashes
- * the i64 count in `vla_pending_count_slot`; parse_init_declarator picks
- * it up to drive cg_alloca. v1 supports only one VLA dimension per
- * declarator; nested cases panic in apply_decl_suffix. */
- u8 vla_pending;
- FrameSlot vla_pending_count_slot;
-
- /* Tracks the most recent IDENT-lvalue push that resolved to a VLA-bound
- * SEK_LOCAL. Lets `sizeof IDENT` and `sizeof(IDENT)` swap the constant
- * `abi_sizeof(ptr)` (8) for a runtime load of the array's byte-size
- * slot. Cleared by sizeof before parse_unary; the IDENT handler sets
- * it. FRAME_SLOT_NONE when the last push was not a VLA-bound IDENT. */
- FrameSlot last_pushed_vla_slot;
-
- /* Counter raised while parsing a function-prototype parameter declarator.
- * Per §6.7.6.3 ¶7, an array parameter `T x[expr]` is adjusted to `T *x`
- * regardless of `expr`, so the size expression's value is irrelevant —
- * including `[*]` (§6.7.6.2 ¶4) and identifier-bearing forms like
- * `int a[n]`. While >0, parse_decl_suffix consumes the bracket contents
- * without evaluating them. Counter (not bool) so nested function-typed
- * parameters re-enter cleanly. */
- u8 in_param_decl;
-
- /* Counter used to mint unique linker-visible names for static locals so
- * that two functions can each have their own `static int s = ...`. */
- u32 static_local_counter;
-
- /* Counter used to mint anonymous local names for compound literals
- * (`(T){...}`). Each compound literal becomes a hidden frame slot whose
- * name is reserved here purely for diagnostics; the symbol is never
- * visible to user code. */
- u32 compound_literal_counter;
-
- /* Replay buffer for two-pass scans of brace-enclosed initializers.
- * Used when a compound literal or initializer needs to size an
- * incomplete array (`(int[]){10, 32}`): we record tokens through the
- * matching `}`, count items, then rewind to re-parse. While
- * `replay_active`, advance()/peek1() pull from `replay` instead of pp;
- * once exhausted, they fall back to the regular pp source so the
- * post-brace token is fetched fresh. The buffer lives in arena
- * storage. */
- Tok* replay;
- u32 replay_cap;
- u32 replay_len;
- u32 replay_pos;
- u8 replay_active;
-
- /* Pending relocations collected while parsing a static-storage
- * initializer (pointer constants like `&g` or `arr + 3`). The
- * caller (`define_static_object`) flushes these via obj_reloc after
- * the section has been pinned. Reset before each top-level init. */
- struct {
- u32 offset;
- u32 size; /* 4 or 8 bytes */
- ObjSymId target;
- i64 addend;
- } *static_relocs;
- u32 static_relocs_len;
- u32 static_relocs_cap;
-} Parser;
-
-/* ============================================================
* Diagnostics
* ============================================================ */
static SrcLoc tok_loc(const Tok* t) { return t->loc; }
-static _Noreturn void perr(Parser* p, const char* fmt, ...) {
+_Noreturn void perr(Parser* p, const char* fmt, ...) {
va_list ap;
SrcLoc loc = tok_loc(&p->cur);
va_start(ap, fmt);
@@ -384,8 +55,7 @@ static _Noreturn void perr(Parser* p, const char* fmt, ...) {
* ============================================================ */
/* Width of an encoding prefix on a string-literal spelling: 0 for ordinary,
- * 1 for L/u/U, 2 for u8. Driven by the TF_STR_* flag bits set by the lexer
- * so we don't re-scan the spelling. */
+ * 1 for L/u/U, 2 for u8. */
static size_t str_prefix_len(u16 flags) {
if (flags & TF_STR_U8) return 2;
if (flags & (TF_STR_WIDE | TF_STR_U16 | TF_STR_U32)) return 1;
@@ -395,13 +65,7 @@ static size_t str_prefix_len(u16 flags) {
#define STR_ENC_MASK \
(TF_STR_WIDE | TF_STR_U8 | TF_STR_U16 | TF_STR_U32)
-/* Fuse two adjacent TOK_STR tokens into one per C11 §6.4.5 ¶5:
- * - same encoding prefix (or both ordinary): keep that encoding;
- * - one ordinary + one prefixed: the prefixed encoding wins;
- * - two different non-ordinary prefixes: ill-formed.
- * The combined spelling is `<prefix>"<content-of-a><content-of-b>"`,
- * interned into the global pool; `loc` stays at the first token's loc
- * so diagnostics still point at the start of the run. */
+/* Fuse two adjacent TOK_STR tokens into one per C11 §6.4.5 ¶5. */
static Tok fuse_string_lits(Parser* p, Tok a, Tok b) {
u16 ae = (u16)(a.flags & STR_ENC_MASK);
u16 be = (u16)(b.flags & STR_ENC_MASK);
@@ -425,16 +89,12 @@ static Tok fuse_string_lits(Parser* p, Tok a, Tok b) {
fused_enc = ae ? ae : be;
apfx = str_prefix_len(a.flags);
bpfx = str_prefix_len(b.flags);
- /* Each spelling is `<prefix>"...content..."`; strip prefix and the two
- * delimiting quotes. lexer guarantees at least the prefix + 2 quotes. */
if (alen < apfx + 2 || as[apfx] != '"' || as[alen - 1] != '"' ||
blen < bpfx + 2 || bs[bpfx] != '"' || bs[blen - 1] != '"') {
perr(p, "malformed string literal in concatenation");
}
a_content_len = alen - apfx - 2;
b_content_len = blen - bpfx - 2;
- /* Output prefix: pick from whichever token contributed the surviving
- * encoding (a if a was prefixed, else b — also covers both-ordinary). */
out_pfx_len = ae ? apfx : bpfx;
out_len = out_pfx_len + 1 + a_content_len + b_content_len + 1;
buf = (char*)h->alloc(h, out_len, 1);
@@ -457,17 +117,12 @@ static Tok fuse_string_lits(Parser* p, Tok a, Tok b) {
out = a;
out.spelling = pool_intern(p->pool, buf, k);
out.flags = (u16)((a.flags & ~STR_ENC_MASK) | fused_enc);
- /* The fused token is freshly minted from the pool; LitId from the lexer
- * pertained only to the first piece. Clear it so any future LitInfo
- * lookups don't return stale per-token data. */
out.lit = LIT_NONE;
h->free(h, buf, 0);
return out;
}
-/* Pull one logical token from pp, collapsing adjacent TOK_STR runs into a
- * single fused TOK_STR. The first non-TOK_STR token that terminates a run
- * is parked in `pending` for the next call. */
+/* Pull one logical token from pp, collapsing adjacent TOK_STR runs. */
static Tok fetch_tok(Parser* p) {
Tok t;
if (p->has_pending) {
@@ -488,15 +143,12 @@ static Tok fetch_tok(Parser* p) {
}
}
-static void advance(Parser* p) {
+void advance(Parser* p) {
if (p->replay_active) {
if (p->replay_pos < p->replay_len) {
p->cur = p->replay[p->replay_pos++];
return;
}
- /* Replay exhausted; fall back to the underlying source. The pp stream
- * sits exactly past the recorded `}` (record_braced_block left it
- * there), so fetching the next token resumes parsing after the brace. */
p->replay_active = 0;
}
if (p->has_next) {
@@ -507,8 +159,7 @@ static void advance(Parser* p) {
}
}
-/* One-token lookahead beyond p->cur. Lazily populated. */
-static Tok peek1(Parser* p) {
+Tok peek1(Parser* p) {
if (p->replay_active && p->replay_pos < p->replay_len) {
return p->replay[p->replay_pos];
}
@@ -519,30 +170,13 @@ static Tok peek1(Parser* p) {
return p->next;
}
-static int is_punct(const Tok* t, u32 punct) {
- return t->kind == TOK_PUNCT && t->v.punct == punct;
-}
-
-static int is_pp_hash(const Tok* t) { return t->kind == TOK_PP_HASH; }
-
-static int is_kw(const Parser* p, const Tok* t, CKw k) {
- if (t->kind != TOK_IDENT) return 0;
- if (t->v.ident == p->kw_sym[k]) return 1;
- if (k == KW_ALIGNOF && t->v.ident == p->sym_alignof_alias) return 1;
- return 0;
-}
-
-static CKw ident_kw(const Parser* p, Sym name) {
- /* Linear scan; KW_COUNT is small. */
- CKw i;
- for (i = (CKw)1; i < KW_COUNT; ++i) {
- if (p->kw_sym[i] == name) return i;
+void expect_punct(Parser* p, u32 punct, const char* what) {
+ if (!accept_punct(p, punct)) {
+ perr(p, "expected %s", what);
}
- if (name == p->sym_alignof_alias) return KW_ALIGNOF;
- return KW_NONE;
}
-static int accept_punct(Parser* p, u32 punct) {
+int accept_punct(Parser* p, u32 punct) {
if (is_punct(&p->cur, punct)) {
advance(p);
return 1;
@@ -550,26 +184,9 @@ static int accept_punct(Parser* p, u32 punct) {
return 0;
}
-static int accept_kw(Parser* p, CKw k) {
- if (is_kw(p, &p->cur, k)) {
- advance(p);
- return 1;
- }
- return 0;
-}
-
-static void expect_punct(Parser* p, u32 punct, const char* what) {
- if (!accept_punct(p, punct)) {
- perr(p, "expected %s", what);
- }
-}
-
/* Record tokens from the current `{` through the matching `}` into the
- * parser's replay buffer. Pre: p->cur is `{`. Post: p->cur is the closing
- * `}` (not yet advanced past); replay buffer holds [`{`, ..., `}`]. The
- * caller must subsequently call replay_rewind() to re-scan, or simply
- * advance() to skip past the brace. */
-static void record_braced_block(Parser* p) {
+ * parser's replay buffer. */
+void record_braced_block(Parser* p) {
int depth = 0;
if (!is_punct(&p->cur, '{')) perr(p, "internal: record on non-'{'");
p->replay_len = 0;
@@ -595,15 +212,10 @@ static void record_braced_block(Parser* p) {
}
advance(p);
}
- /* cur is the recorded closing `}`. Caller decides what to do next. */
}
-/* After record_braced_block, rewind so subsequent advance()/peek1() pull
- * tokens from the replay buffer starting at index 0. Discards any
- * lazily-buffered `next` since tokens within the recorded range are now
- * served from the buffer. The post-`}` token will be fetched via pp_next
- * once the replay finishes draining. */
-static void replay_rewind(Parser* p) {
+/* After record_braced_block, rewind to replay from the start. */
+void replay_rewind(Parser* p) {
if (p->replay_len == 0) perr(p, "internal: replay_rewind with empty buffer");
p->cur = p->replay[0];
p->replay_pos = 1;
@@ -611,12 +223,8 @@ static void replay_rewind(Parser* p) {
p->has_next = 0;
}
-/* Count top-level items in a recorded brace list (positional or designator-
- * led). The recording starts with `{` at index 0 and ends with the matching
- * `}` at len-1. Top-level commas separate items; a trailing comma before
- * the closing `}` does not introduce an extra item. Used to size incomplete
- * arrays initialized with `{...}`. */
-static u32 count_recorded_top_level_items(const Tok* vec, u32 len) {
+/* Count top-level items in a recorded brace list. */
+u32 count_recorded_top_level_items(const Tok* vec, u32 len) {
u32 count;
u32 i;
int depth = 0;
@@ -629,22 +237,15 @@ static u32 count_recorded_top_level_items(const Tok* vec, u32 len) {
else if (is_punct(t, '}') || is_punct(t, ')') || is_punct(t, ']')) --depth;
else if (depth == 0 && is_punct(t, ',')) ++count;
}
- /* If the last interior token is `,` it was a trailing separator; back off. */
if (is_punct(&vec[len - 2], ',')) --count;
return count;
}
-/* expect_kw is wired up but unused at this slice — `void` consumption
- * goes through accept_kw already. Kept commented as a documentation hook
- * for the next slice that needs it (e.g. `_Static_assert`).
- *
- * static void expect_kw(Parser*, CKw, const char* what); */
-
/* ============================================================
* Scopes
* ============================================================ */
-static Scope* scope_new(Parser* p, Scope* parent) {
+Scope* scope_new(Parser* p, Scope* parent) {
Scope* s = arena_new(p->c->tu, Scope);
if (!s) perr(p, "out of memory in scope_new");
s->entries = NULL;
@@ -653,14 +254,14 @@ static Scope* scope_new(Parser* p, Scope* parent) {
return s;
}
-static void scope_push(Parser* p) { p->scope = scope_new(p, p->scope); }
+void scope_push(Parser* p) { p->scope = scope_new(p, p->scope); }
-static void scope_pop(Parser* p) {
+void scope_pop(Parser* p) {
if (p->scope) p->scope = p->scope->parent;
}
-static SymEntry* scope_define(Parser* p, Sym name, SymEntryKind kind,
- const Type* type) {
+SymEntry* scope_define(Parser* p, Sym name, SymEntryKind kind,
+ const Type* type) {
SymEntry* e = arena_new(p->c->tu, SymEntry);
if (!e) perr(p, "out of memory in scope_define");
memset(e, 0, sizeof *e);
@@ -672,7 +273,7 @@ static SymEntry* scope_define(Parser* p, Sym name, SymEntryKind kind,
return e;
}
-static SymEntry* scope_lookup(Parser* p, Sym name) {
+SymEntry* scope_lookup(Parser* p, Sym name) {
Scope* s;
for (s = p->scope; s; s = s->parent) {
SymEntry* e;
@@ -683,12 +284,8 @@ static SymEntry* scope_lookup(Parser* p, Sym name) {
return NULL;
}
-/* Tag scope ops. Tag lookup walks parent chains; tag definition lives in the
- * current scope so an inner scope can shadow an outer tag of the same name
- * (§6.2.1 ¶4). `tag_lookup_local` is used to detect redeclaration in the
- * same scope and to complete a forward-declared tag in place. */
-static TagEntry* tag_define(Parser* p, Sym name, TagDeclKind kind, Type* type,
- int complete) {
+TagEntry* tag_define(Parser* p, Sym name, TagDeclKind kind, Type* type,
+ int complete) {
TagEntry* e = arena_new(p->c->tu, TagEntry);
if (!e) perr(p, "out of memory in tag_define");
memset(e, 0, sizeof *e);
@@ -701,7 +298,7 @@ static TagEntry* tag_define(Parser* p, Sym name, TagDeclKind kind, Type* type,
return e;
}
-static TagEntry* tag_lookup(Parser* p, Sym name) {
+TagEntry* tag_lookup(Parser* p, Sym name) {
Scope* s;
for (s = p->scope; s; s = s->parent) {
TagEntry* e;
@@ -712,7 +309,7 @@ static TagEntry* tag_lookup(Parser* p, Sym name) {
return NULL;
}
-static TagEntry* tag_lookup_local(Parser* p, Sym name) {
+TagEntry* tag_lookup_local(Parser* p, Sym name) {
TagEntry* e;
for (e = p->scope->tags; e; e = e->next) {
if (e->name == name) return e;
@@ -724,4460 +321,44 @@ static TagEntry* tag_lookup_local(Parser* p, Sym name) {
* Type helpers
* ============================================================ */
-static const Type* ty_int(Parser* p) { return type_prim(p->pool, TY_INT); }
static const Type* ty_size_t(Parser* p) {
return abi_size_type(p->abi, p->pool);
}
/* ============================================================
- * GNU __attribute__ (Phase 1 — parse + carry; no semantic wire-up)
- * ============================================================
- * Surface grammar:
- * __attribute__ '(' '(' attr-list ')' ')'
- * attr-list := attr (',' attr)* | <empty>
- * attr := attr-name | attr-name '(' balanced-tokens ')'
- *
- * Both `name` and `__name__` map to the same attribute. Phase 1 stores
- * the parsed list on DeclSpecs.attrs; other carrier sites consume tokens
- * cleanly via parse_and_discard_attributes until Phase 2 wires them up. */
-/* AttrKind / AttrArgShape / Attr live in src/parse/attr.h so the Phase 2
- * decl consumers (src/decl/decl_attrs.c) can decode the same nodes. */
-
-static const struct {
- const char* name;
- AttrKind kind;
- AttrArgShape shape;
-} kAttrTable[] = {
- {"packed", ATTR_PACKED, AS_NONE},
- {"aligned", ATTR_ALIGNED, AS_INT_OPT},
- {"section", ATTR_SECTION, AS_STRING},
- {"used", ATTR_USED, AS_NONE},
- {"noreturn", ATTR_NORETURN, AS_NONE},
- {"alias", ATTR_ALIAS, AS_STRING},
- {"weak", ATTR_WEAK, AS_NONE},
- {"visibility", ATTR_VISIBILITY, AS_STRING},
- {"always_inline", ATTR_ALWAYS_INLINE, AS_NONE},
- {"noinline", ATTR_NOINLINE, AS_NONE},
- {"unused", ATTR_UNUSED, AS_NONE},
- {"deprecated", ATTR_DEPRECATED, AS_OPAQUE},
- {"warn_unused_result", ATTR_WARN_UNUSED_RESULT, AS_NONE},
- {"format", ATTR_FORMAT, AS_FORMAT},
- {"nonnull", ATTR_NONNULL, AS_OPAQUE},
- {"returns_nonnull", ATTR_RETURNS_NONNULL, AS_NONE},
- {"pure", ATTR_PURE, AS_NONE},
- {"const", ATTR_CONST, AS_NONE},
- {"malloc", ATTR_MALLOC, AS_OPAQUE},
- {"nothrow", ATTR_NOTHROW, AS_NONE},
- {"leaf", ATTR_LEAF, AS_NONE},
- {"cold", ATTR_COLD, AS_NONE},
- {"hot", ATTR_HOT, AS_NONE},
- {"constructor", ATTR_CONSTRUCTOR, AS_INT_OPT},
- {"destructor", ATTR_DESTRUCTOR, AS_INT_OPT},
- {"cleanup", ATTR_CLEANUP, AS_IDENT},
- {"mode", ATTR_MODE, AS_IDENT},
- {"vector_size", ATTR_VECTOR_SIZE, AS_INT},
- {"transparent_union", ATTR_TRANSPARENT_UNION, AS_NONE},
- {"gnu_inline", ATTR_GNU_INLINE, AS_NONE},
- {"fallthrough", ATTR_FALLTHROUGH, AS_NONE},
- {"sentinel", ATTR_SENTINEL, AS_OPAQUE},
- {"no_instrument_function", ATTR_NO_INSTRUMENT_FUNCTION, AS_NONE},
- {"no_sanitize", ATTR_NO_SANITIZE, AS_OPAQUE},
-};
-
-static int starts_attr(const Parser* p);
-static Attr* parse_attribute_spec_list(Parser* p);
-static void parse_and_discard_attributes(Parser* p);
-static u8* decode_string_literal(Parser* p, const Tok* t, size_t* nlen_out);
-/* Append `add` to the end of `*head` (linked via Attr.next). Both args
- * are in source order; result preserves source order. */
-static void attr_list_append(Attr** head, Attr* add) {
- if (!add) return;
- if (!*head) { *head = add; return; }
- Attr* tail = *head;
- while (tail->next) tail = tail->next;
- tail->next = add;
-}
-/* If `starts_attr`, parse and append to `*sink`. No-op otherwise. */
-static void parse_attrs_into(Parser* p, Attr** sink) {
- if (starts_attr(p)) attr_list_append(sink, parse_attribute_spec_list(p));
-}
-
-/* DeclSpecs and the matching parser landed up in the declaration section
- * historically; we hoist it before expression parsing because
- * sizeof / _Alignof / cast need to consume a type-name from inside
- * parse_unary. */
-typedef struct DeclSpecs {
- const Type* type;
- DeclStorage storage;
- u32 flags; /* DeclFlag */
- u16 quals; /* TypeQual bits seen in the decl-spec list */
- u32 align; /* explicit alignment from `_Alignas`; 0 if none */
- /* When `type` came from a VLA typedef-name, propagates the typedef's
- * captured byte-size slot so init_declarator can alloca the right
- * runtime size. FRAME_SLOT_NONE otherwise. */
- FrameSlot vla_byte_slot;
- /* Phase 1: parsed __attribute__((...)) list attached to this decl. */
- Attr* attrs;
-} DeclSpecs;
-
-static int parse_decl_specs(Parser* p, DeclSpecs* out);
-/* `*anon_attrs_out` receives any record-level attrs when the record is
- * anonymous (no TagEntry to attach to). For tagged records the attrs go
- * on the TagEntry directly. May be NULL if caller doesn't care. */
-static const Type* parse_struct_or_union(Parser* p, TypeKind kind,
- Attr** anon_attrs_out);
-static const Type* parse_enum(Parser* p, Attr** anon_attrs_out);
-static void parse_assign_expr(Parser* p);
-static i64 eval_const_int(Parser* p, SrcLoc loc); /* tiny constant evaluator */
-static const Type* parse_pointer_layer(Parser* p, const Type* base);
-static const Type* parse_declarator_full(Parser* p, const Type* base,
- int allow_abstract, Sym* name_out,
- SrcLoc* loc_out);
-/* Variant that also returns the attributes seen at the post-declarator-id
- * position (after the IDENT, between/after suffixes). Callers that care
- * about per-declarator attrs (struct members; ordinary declarators in
- * decl-listings) pass an Attr** sink; pass NULL to drop them. */
-static const Type* parse_declarator_full_ex(Parser* p, const Type* base,
- int allow_abstract, Sym* name_out,
- SrcLoc* loc_out,
- Attr** attrs_out);
-static int starts_type_name(const Parser* p, const Tok* t);
-static const Type* parse_type_name(Parser* p);
-static i64 parse_int_literal(Parser* p, const Tok* t);
-static i64 decode_char_literal(Parser* p, const Tok* t);
-
-/* Resolve the type implied by a multiset of type-specifier tokens
- * (unsigned, signed, short, long, char, int, ...). C allows most orders
- * (`unsigned long int` ≡ `int unsigned long`), so we collect everything
- * first and pick the canonical TY_* tag at the end. Phase 1 covers the
- * combinations the corpus needs; the float family (`long double`) is
- * Phase 7's job and falls through to a "conflicting" diagnostic if
- * combined with the integer keywords here. */
-typedef struct TypeSpecAccum {
- u8 saw_void;
- u8 saw_char;
- u8 saw_int;
- u8 saw_short;
- u8 long_count; /* 0/1/2 */
- u8 saw_signed;
- u8 saw_unsigned;
- u8 saw_bool;
- u8 saw_float;
- u8 saw_double;
- u8 saw_explicit_type; /* any of the above? */
-} TypeSpecAccum;
-
-static const Type* resolve_type_specs(Parser* p, const TypeSpecAccum* a,
- SrcLoc loc) {
- if (!a->saw_explicit_type) return NULL;
- if (a->saw_void) {
- if (a->saw_char || a->saw_int || a->saw_short || a->long_count ||
- a->saw_signed || a->saw_unsigned || a->saw_bool || a->saw_float ||
- a->saw_double) {
- compiler_panic(p->c, loc, "conflicting type specifiers (void mixed)");
- }
- return type_void(p->pool);
- }
- if (a->saw_bool) {
- return type_prim(p->pool, TY_BOOL);
- }
- if (a->saw_char) {
- if (a->saw_unsigned) return type_prim(p->pool, TY_UCHAR);
- if (a->saw_signed) return type_prim(p->pool, TY_SCHAR);
- return type_prim(p->pool, TY_CHAR);
- }
- if (a->saw_float) return type_prim(p->pool, TY_FLOAT);
- if (a->saw_double) {
- return type_prim(p->pool, a->long_count ? TY_LDOUBLE : TY_DOUBLE);
- }
- if (a->saw_short) {
- return type_prim(p->pool, a->saw_unsigned ? TY_USHORT : TY_SHORT);
- }
- if (a->long_count == 2) {
- return type_prim(p->pool, a->saw_unsigned ? TY_ULLONG : TY_LLONG);
- }
- if (a->long_count == 1) {
- return type_prim(p->pool, a->saw_unsigned ? TY_ULONG : TY_LONG);
- }
- if (a->saw_unsigned) return type_prim(p->pool, TY_UINT);
- if (a->saw_signed || a->saw_int) return type_prim(p->pool, TY_INT);
- return type_prim(p->pool, TY_INT);
-}
-
-static int parse_decl_specs(Parser* p, DeclSpecs* out) {
- /* Tracks integer/void/char type specifiers in any order, plus the
- * storage-class and qualifier keywords. Returns 0 if no specifier was
- * consumed (caller treats that as "not a declaration"). */
- TypeSpecAccum acc;
- SrcLoc loc;
- int seen = 0;
- const Type* tagged_ty = NULL; /* set when struct/union/enum consumed */
- memset(&acc, 0, sizeof acc);
- out->type = NULL;
- out->storage = DS_AUTO;
- out->flags = DF_NONE;
- out->quals = 0;
- out->align = 0;
- out->vla_byte_slot = FRAME_SLOT_NONE;
- out->attrs = NULL;
- loc = tok_loc(&p->cur);
- for (;;) {
- Tok t = p->cur;
- /* GNU __attribute__((...)) may appear anywhere among decl-specifiers
- * and may repeat. Chain onto out->attrs (in source order). */
- if (starts_attr(p)) {
- Attr* a = parse_attribute_spec_list(p);
- if (a) {
- Attr* tail = a;
- while (tail->next) tail = tail->next;
- tail->next = out->attrs;
- out->attrs = a;
- }
- seen = 1;
- continue;
- }
- if (is_kw(p, &t, KW_STRUCT) || is_kw(p, &t, KW_UNION)) {
- TypeKind kind = is_kw(p, &t, KW_STRUCT) ? TY_STRUCT : TY_UNION;
- Attr* anon_attrs = NULL;
- if (tagged_ty || acc.saw_explicit_type) {
- perr(p, "conflicting type specifiers (struct/union mixed)");
- }
- advance(p);
- tagged_ty = parse_struct_or_union(p, kind, &anon_attrs);
- attr_list_append(&out->attrs, anon_attrs);
- acc.saw_explicit_type = 1;
- seen = 1;
- continue;
- }
- if (is_kw(p, &t, KW_ENUM)) {
- Attr* anon_attrs = NULL;
- if (tagged_ty || acc.saw_explicit_type) {
- perr(p, "conflicting type specifiers (enum mixed)");
- }
- advance(p);
- tagged_ty = parse_enum(p, &anon_attrs);
- attr_list_append(&out->attrs, anon_attrs);
- acc.saw_explicit_type = 1;
- seen = 1;
- continue;
- }
- if (is_kw(p, &t, KW_VOID)) {
- acc.saw_void = 1; acc.saw_explicit_type = 1; advance(p); seen = 1;
- } else if (is_kw(p, &t, KW_CHAR)) {
- acc.saw_char = 1; acc.saw_explicit_type = 1; advance(p); seen = 1;
- } else if (is_kw(p, &t, KW_INT)) {
- acc.saw_int = 1; acc.saw_explicit_type = 1; advance(p); seen = 1;
- } else if (is_kw(p, &t, KW_SHORT)) {
- acc.saw_short = 1; acc.saw_explicit_type = 1; advance(p); seen = 1;
- } else if (is_kw(p, &t, KW_LONG)) {
- acc.long_count++; acc.saw_explicit_type = 1; advance(p); seen = 1;
- } else if (is_kw(p, &t, KW_SIGNED)) {
- acc.saw_signed = 1; acc.saw_explicit_type = 1; advance(p); seen = 1;
- } else if (is_kw(p, &t, KW_UNSIGNED)) {
- acc.saw_unsigned = 1; acc.saw_explicit_type = 1; advance(p); seen = 1;
- } else if (is_kw(p, &t, KW_BOOL)) {
- acc.saw_bool = 1; acc.saw_explicit_type = 1; advance(p); seen = 1;
- } else if (is_kw(p, &t, KW_FLOAT)) {
- acc.saw_float = 1; acc.saw_explicit_type = 1; advance(p); seen = 1;
- } else if (is_kw(p, &t, KW_DOUBLE)) {
- acc.saw_double = 1; acc.saw_explicit_type = 1; advance(p); seen = 1;
- } else if (is_kw(p, &t, KW_STATIC)) {
- out->storage = DS_STATIC; advance(p); seen = 1;
- } else if (is_kw(p, &t, KW_EXTERN)) {
- out->storage = DS_EXTERN; advance(p); seen = 1;
- } else if (is_kw(p, &t, KW_CONST)) {
- out->quals |= Q_CONST; advance(p); seen = 1;
- } else if (is_kw(p, &t, KW_VOLATILE)) {
- out->quals |= Q_VOLATILE; advance(p); seen = 1;
- } else if (is_kw(p, &t, KW_RESTRICT)) {
- out->quals |= Q_RESTRICT; advance(p); seen = 1;
- } else if (is_kw(p, &t, KW_ATOMIC)) {
- /* `_Atomic(type-name)` is a type specifier; bare `_Atomic` is a
- * qualifier (§6.7.2.4). Disambiguate on the next token. */
- Tok n = peek1(p);
- if (is_punct(&n, '(')) {
- const Type* inner;
- if (tagged_ty || acc.saw_explicit_type) {
- perr(p, "conflicting type specifiers (_Atomic(T) mixed)");
- }
- advance(p); /* `_Atomic` */
- advance(p); /* `(` */
- inner = parse_type_name(p);
- expect_punct(p, ')', "')' after _Atomic type");
- tagged_ty = type_qualified(p->pool, inner, Q_ATOMIC);
- acc.saw_explicit_type = 1;
- seen = 1;
- continue;
- }
- out->quals |= Q_ATOMIC; advance(p); seen = 1;
- } else if (is_kw(p, &t, KW_TYPEDEF)) {
- out->storage = DS_TYPEDEF; advance(p); seen = 1;
- } else if (is_kw(p, &t, KW_ALIGNAS)) {
- /* `_Alignas(N)` or `_Alignas(type-name)`. Either form yields a
- * byte alignment that overrides the natural alignment of the
- * declared object. Multiple specifiers take the strictest. */
- u32 a = 0;
- advance(p); /* `_Alignas` */
- expect_punct(p, '(', "'(' after _Alignas");
- if (starts_type_name(p, &p->cur)) {
- const Type* tn = parse_type_name(p);
- a = abi_alignof(p->abi, tn);
- } else {
- i64 v = eval_const_int(p, tok_loc(&p->cur));
- if (v < 0) perr(p, "_Alignas requires a non-negative alignment");
- /* §6.7.5 ¶6: `_Alignas(0)` is a no-op (use the natural
- * alignment), so leave `a` at 0 and skip the bump below. */
- a = (u32)v;
- }
- expect_punct(p, ')', "')' after _Alignas argument");
- if (a > out->align) out->align = a;
- seen = 1;
- } else if (is_kw(p, &t, KW_INLINE)) {
- out->flags |= DF_INLINE; advance(p); seen = 1;
- } else if (is_kw(p, &t, KW_THREAD_LOCAL)) {
- out->flags |= DF_THREAD; advance(p); seen = 1;
- } else if (is_kw(p, &t, KW_NORETURN) || is_kw(p, &t, KW_REGISTER) ||
- is_kw(p, &t, KW_AUTO)) {
- /* Recognized but currently no-op at this slice. */
- advance(p); seen = 1;
- } else if (!acc.saw_explicit_type && !tagged_ty &&
- t.kind == TOK_IDENT && ident_kw(p, t.v.ident) == KW_NONE) {
- /* `__builtin_va_list` resolves to the per-ABI va_list type. */
- if (t.v.ident == p->sym_b_va_list) {
- tagged_ty = abi_va_list_type(p->abi, p->pool);
- acc.saw_explicit_type = 1;
- advance(p);
- seen = 1;
- continue;
- }
- /* Typedef-name as a type specifier. Only consumed when no other
- * type specifier has been seen — otherwise this IDENT is the
- * declarator name. */
- SymEntry* e = scope_lookup(p, t.v.ident);
- if (e && e->kind == SEK_TYPEDEF) {
- tagged_ty = e->type;
- if (e->vla_byte_slot != FRAME_SLOT_NONE) {
- out->vla_byte_slot = e->vla_byte_slot;
- }
- acc.saw_explicit_type = 1;
- advance(p);
- seen = 1;
- continue;
- }
- break;
- } else {
- break;
- }
- }
- if (seen) {
- if (tagged_ty) {
- out->type = tagged_ty;
- } else {
- out->type = resolve_type_specs(p, &acc, loc);
- if (!out->type) {
- /* Storage class without a type — default to int per pre-C99. */
- out->type = ty_int(p);
- }
- }
- }
- return seen;
-}
-
-/* ============================================================
- * struct / union / enum
- * ============================================================
- * Recognized inside parse_decl_specs. The shapes are:
- * struct-or-union-specifier =
- * (`struct`|`union`) IDENT? (`{` member-decl+ `}`)?
- * enum-specifier =
- * `enum` IDENT? (`{` enumerator (`,` enumerator)* `,`? `}`)?
- *
- * Tag scope: an IDENT after `struct`/`union`/`enum` names a tag. Tags share
- * the same scope chain as ordinary identifiers but live in a separate slot
- * (TagEntry vs SymEntry). A `struct S` reference without `{...}` looks up
- * `S` in the tag scope chain; if not found and the use is permissible
- * (currently always — we don't yet distinguish struct-specifier-as-decl
- * from struct-tag-only) we install an incomplete tag in the current scope.
- *
- * Forward decl + completion: `struct S;` followed later by `struct S { ... }`
- * are joined by reusing the same Type* node (mutated in place by
- * type_record_install). Self-referential pointers `struct N { struct N
- * *next; }` work because the pointer type is constructed from the same
- * (still-incomplete) Type* during member parsing — completion only changes
- * the fields/incomplete bits, never the pointer's target identity. */
-
-/* Tiny constant evaluator. Used by enum values and array sizes that may grow
- * beyond bare TOK_NUM. Phase 3 only handles signed-int forms because the
- * corpus never references float or pointer constants from these positions.
- * Recognizes:
- * integer literal | character literal | enumerator
- * '+' cexpr | '-' cexpr | '~' cexpr | '!' cexpr
- * '(' cexpr ')'
- * plus '+ - * / % & | ^ << >>' between integer constants
- * Returns the evaluated value; on parse-fail or non-constant operand it
- * panics with `loc` as the diagnostic site. */
-static i64 cexpr_unary(Parser* p, SrcLoc loc);
-static void parse_unary(Parser* p);
-static i64 cexpr_mul(Parser* p, SrcLoc loc) {
- i64 v = cexpr_unary(p, loc);
- for (;;) {
- if (accept_punct(p, '*')) v = v * cexpr_unary(p, loc);
- else if (accept_punct(p, '/')) {
- i64 r = cexpr_unary(p, loc);
- if (r == 0) compiler_panic(p->c, loc, "division by zero in constant");
- v = v / r;
- } else if (accept_punct(p, '%')) {
- i64 r = cexpr_unary(p, loc);
- if (r == 0) compiler_panic(p->c, loc, "modulo by zero in constant");
- v = v % r;
- } else break;
- }
- return v;
-}
-static i64 cexpr_add(Parser* p, SrcLoc loc) {
- i64 v = cexpr_mul(p, loc);
- for (;;) {
- if (accept_punct(p, '+')) v = v + cexpr_mul(p, loc);
- else if (accept_punct(p, '-')) v = v - cexpr_mul(p, loc);
- else break;
- }
- return v;
-}
-static i64 cexpr_shift(Parser* p, SrcLoc loc) {
- i64 v = cexpr_add(p, loc);
- for (;;) {
- if (accept_punct(p, P_SHL)) v = v << cexpr_add(p, loc);
- else if (accept_punct(p, P_SHR)) v = v >> cexpr_add(p, loc);
- else break;
- }
- return v;
-}
-static i64 cexpr_rel(Parser* p, SrcLoc loc) {
- i64 v = cexpr_shift(p, loc);
- for (;;) {
- if (accept_punct(p, P_LE)) v = v <= cexpr_shift(p, loc);
- else if (accept_punct(p, P_GE)) v = v >= cexpr_shift(p, loc);
- else if (is_punct(&p->cur, '<')) {
- advance(p); v = v < cexpr_shift(p, loc);
- } else if (is_punct(&p->cur, '>')) {
- advance(p); v = v > cexpr_shift(p, loc);
- } else break;
- }
- return v;
-}
-static i64 cexpr_eq(Parser* p, SrcLoc loc) {
- i64 v = cexpr_rel(p, loc);
- for (;;) {
- if (accept_punct(p, P_EQ)) v = (v == cexpr_rel(p, loc));
- else if (accept_punct(p, P_NE)) v = (v != cexpr_rel(p, loc));
- else break;
- }
- return v;
-}
-static i64 cexpr_band(Parser* p, SrcLoc loc) {
- i64 v = cexpr_eq(p, loc);
- while (is_punct(&p->cur, '&') && !is_punct(&p->cur, P_AND)) {
- advance(p);
- v = v & cexpr_eq(p, loc);
- }
- return v;
-}
-static i64 cexpr_bxor(Parser* p, SrcLoc loc) {
- i64 v = cexpr_band(p, loc);
- while (accept_punct(p, '^')) v = v ^ cexpr_band(p, loc);
- return v;
-}
-static i64 cexpr_bor(Parser* p, SrcLoc loc) {
- i64 v = cexpr_bxor(p, loc);
- while (is_punct(&p->cur, '|') && !is_punct(&p->cur, P_OR)) {
- advance(p);
- v = v | cexpr_bxor(p, loc);
- }
- return v;
-}
-static i64 cexpr_unary(Parser* p, SrcLoc loc) {
- if (accept_punct(p, '+')) return cexpr_unary(p, loc);
- if (accept_punct(p, '-')) return -cexpr_unary(p, loc);
- if (accept_punct(p, '~')) return ~cexpr_unary(p, loc);
- if (accept_punct(p, '!')) return cexpr_unary(p, loc) ? 0 : 1;
- if (accept_kw(p, KW_SIZEOF)) {
- /* `sizeof ( type-name )` resolves to a pure type-name lookup. The
- * `sizeof ( expression )` and `sizeof unary-expression` forms run the
- * operand through parse_unary (operand is not evaluated per §6.5.3.4,
- * matching parse_unary's handling) and read the resulting type off the
- * cg stack, then drop. */
- if (is_punct(&p->cur, '(')) {
- Tok n = peek1(p);
- if (starts_type_name(p, &n)) {
- advance(p);
- {
- const Type* t = parse_type_name(p);
- expect_punct(p, ')', "')' after sizeof type-name");
- return (i64)abi_sizeof(p->abi, t);
- }
- }
- }
- parse_unary(p);
- {
- const Type* ty = cg_top_type(p->cg);
- i64 sz = (i64)abi_sizeof(p->abi, ty);
- cg_drop(p->cg);
- return sz;
- }
- }
- if (accept_kw(p, KW_ALIGNOF)) {
- /* `_Alignof` is type-name only per §6.5.3.4. The GNU `__alignof__`
- * alias additionally accepts an expression, mirroring sizeof. We
- * disambiguate at the `(`: type-name → parse_type_name, otherwise
- * route through parse_unary and read the operand type. */
- if (is_punct(&p->cur, '(')) {
- Tok n = peek1(p);
- if (starts_type_name(p, &n)) {
- advance(p);
- {
- const Type* t = parse_type_name(p);
- expect_punct(p, ')', "')' after _Alignof type-name");
- return (i64)abi_alignof(p->abi, t);
- }
- }
- }
- parse_unary(p);
- {
- const Type* ty = cg_top_type(p->cg);
- i64 al = (i64)abi_alignof(p->abi, ty);
- cg_drop(p->cg);
- return al;
- }
- }
- if (accept_punct(p, '(')) {
- /* `(type-name) cexpr` is an explicit cast in a constant context; for
- * the §6.7.10 corpus the casts we see are integer→integer, so the
- * mask-to-width is sufficient. Otherwise the parens enclose a
- * sub-expression. */
- if (starts_type_name(p, &p->cur)) {
- const Type* t = parse_type_name(p);
- expect_punct(p, ')', "')' after cast type-name");
- {
- i64 v = cexpr_unary(p, loc);
- u32 sz = abi_sizeof(p->abi, t);
- int is_signed = abi_type_info(p->abi, t).signed_;
- if (sz < 8) {
- u64 mask = (1ull << (sz * 8u)) - 1ull;
- u64 uv = (u64)v & mask;
- if (is_signed) {
- u64 sign = 1ull << (sz * 8u - 1u);
- v = (i64)((uv ^ sign) - sign);
- } else {
- v = (i64)uv;
- }
- }
- return v;
- }
- }
- {
- i64 v = cexpr_bor(p, loc);
- expect_punct(p, ')', "')' in constant expression");
- return v;
- }
- }
- if (p->cur.kind == TOK_NUM) {
- i64 v = parse_int_literal(p, &p->cur);
- advance(p);
- return v;
- }
- if (p->cur.kind == TOK_CHR) {
- /* Character literals are integer constants per §6.4.4.4. */
- i64 v = decode_char_literal(p, &p->cur);
- advance(p);
- return v;
- }
- if (p->cur.kind == TOK_IDENT) {
- SymEntry* e = scope_lookup(p, p->cur.v.ident);
- if (e && e->kind == SEK_ENUM_CST) {
- advance(p);
- return e->v.enum_value;
- }
- compiler_panic(p->c, loc, "non-constant identifier in constant expression");
- }
- compiler_panic(p->c, loc, "expected constant expression");
-}
-static i64 eval_const_int(Parser* p, SrcLoc loc) { return cexpr_bor(p, loc); }
-
-/* ============================================================
- * __attribute__ helpers
+ * Local-variable slot allocation
* ============================================================ */
-static int starts_attr(const Parser* p) {
- return p->cur.kind == TOK_IDENT && p->cur.v.ident == p->sym_attribute;
-}
-
-/* Canonicalize an attribute name spelling: strip exactly one leading `__`
- * and one trailing `__` pair if both are present (GCC compat). Returns the
- * (start, len) of the canonical substring within the original spelling. */
-static void attr_canon_range(const char* s, size_t len,
- const char** out_p, size_t* out_len) {
- if (len >= 4 && s[0] == '_' && s[1] == '_' &&
- s[len - 1] == '_' && s[len - 2] == '_') {
- *out_p = s + 2;
- *out_len = len - 4;
- return;
- }
- *out_p = s;
- *out_len = len;
-}
-
-static AttrKind classify_attr(Parser* p, Sym name, AttrArgShape* shape_out) {
- size_t len = 0;
- const char* s = pool_str(p->pool, name, &len);
- const char* cs;
- size_t clen;
- size_t i;
- if (!s) {
- *shape_out = AS_OPAQUE;
- return ATTR_UNKNOWN;
- }
- attr_canon_range(s, len, &cs, &clen);
- for (i = 0; i < sizeof(kAttrTable) / sizeof(kAttrTable[0]); ++i) {
- const char* tn = kAttrTable[i].name;
- size_t tlen = strlen(tn);
- if (tlen == clen && memcmp(tn, cs, clen) == 0) {
- *shape_out = kAttrTable[i].shape;
- return kAttrTable[i].kind;
- }
- }
- *shape_out = AS_OPAQUE;
- return ATTR_UNKNOWN;
-}
-
-/* Skip a balanced parenthesized token group. Pre: p->cur is `(`. Post:
- * p->cur is the token immediately after the matching `)`. */
-static void skip_balanced_parens(Parser* p) {
- int depth;
- if (!is_punct(&p->cur, '(')) perr(p, "internal: skip_balanced_parens");
- depth = 1;
- advance(p);
- while (depth > 0) {
- if (p->cur.kind == TOK_EOF) {
- perr(p, "unexpected EOF inside attribute arguments");
- }
- if (is_punct(&p->cur, '(')) ++depth;
- else if (is_punct(&p->cur, ')')) {
- --depth;
- if (depth == 0) { advance(p); return; }
- }
- advance(p);
- }
-}
-
-/* Parse the body between `(` and `)` for one attribute according to its
- * shape. Pre: p->cur is `(`. Post: p->cur is the token after the matching
- * `)`. For unrecognized shapes or ATTR_UNKNOWN, swallows balanced tokens. */
-static void parse_attr_args(Parser* p, Attr* a, AttrArgShape shape,
- const char* attr_diag_name) {
- if (!is_punct(&p->cur, '(')) {
- if (shape == AS_NONE || shape == AS_OPTIONAL || shape == AS_INT_OPT ||
- shape == AS_OPAQUE) {
- return;
- }
- perr(p, "attribute '%s' expects '(' arguments", attr_diag_name);
- }
- switch (shape) {
- case AS_NONE: {
- /* Tolerate empty `()`. */
- advance(p); /* '(' */
- if (!accept_punct(p, ')')) {
- perr(p, "attribute '%s' takes no arguments", attr_diag_name);
- }
- return;
- }
- case AS_OPTIONAL: {
- skip_balanced_parens(p);
- return;
- }
- case AS_INT:
- case AS_INT_OPT: {
- SrcLoc loc;
- advance(p); /* '(' */
- if (is_punct(&p->cur, ')')) {
- if (shape == AS_INT) {
- perr(p, "attribute '%s' expects an integer argument",
- attr_diag_name);
- }
- advance(p);
- return;
- }
- loc = tok_loc(&p->cur);
- a->v.i = eval_const_int(p, loc);
- a->nargs = 1;
- expect_punct(p, ')', "')' after attribute integer argument");
- return;
- }
- case AS_STRING: {
- advance(p); /* '(' */
- if (p->cur.kind != TOK_STR) {
- perr(p, "attribute '%s' expects a string literal", attr_diag_name);
- }
- /* Decode the literal so consumers (`section`, `alias`, `visibility`)
- * see the content without surrounding quotes or escape sequences. */
- {
- Tok t = p->cur;
- size_t nlen = 0;
- u8* bytes = decode_string_literal(p, &t, &nlen);
- /* nlen includes a trailing NUL — intern without it. */
- u32 ilen = (nlen > 0) ? (u32)(nlen - 1) : 0;
- a->v.sym = pool_intern(p->c->global, (const char*)bytes, ilen);
- p->c->env->heap->free(p->c->env->heap, bytes, 0);
- }
- a->nargs = 1;
- advance(p);
- expect_punct(p, ')', "')' after attribute string argument");
- return;
- }
- case AS_IDENT: {
- advance(p); /* '(' */
- if (p->cur.kind != TOK_IDENT) {
- perr(p, "attribute '%s' expects an identifier", attr_diag_name);
- }
- a->v.sym = p->cur.v.ident;
- a->nargs = 1;
- advance(p);
- expect_punct(p, ')', "')' after attribute identifier argument");
- return;
- }
- case AS_FORMAT: {
- /* format(archetype, m, n) — archetype is an identifier, m and n are
- * positive integers. */
- SrcLoc mloc, nloc;
- i64 mv, nv;
- advance(p); /* '(' */
- if (p->cur.kind != TOK_IDENT) {
- perr(p, "attribute 'format' expects (archetype, m, n)");
- }
- advance(p);
- expect_punct(p, ',', "',' after format archetype");
- mloc = tok_loc(&p->cur);
- mv = eval_const_int(p, mloc);
- expect_punct(p, ',', "',' after format string-index");
- nloc = tok_loc(&p->cur);
- nv = eval_const_int(p, nloc);
- if (mv < 0 || mv > 0xFFFF || nv < 0 || nv > 0xFFFF) {
- perr(p, "attribute 'format' indices out of range");
- }
- a->v.format.fmt_idx = (u16)mv;
- a->v.format.first = (u16)nv;
- a->nargs = 3;
- expect_punct(p, ')', "')' after format arguments");
- return;
- }
- case AS_OPAQUE:
- default: {
- skip_balanced_parens(p);
- return;
- }
- }
-}
-
-/* Parse one or more `__attribute__((...))` runs starting at p->cur (must
- * satisfy starts_attr). Returns a linked list of Attr* in source order. */
-static Attr* parse_attribute_spec_list(Parser* p) {
- Attr* head = NULL;
- Attr* tail = NULL;
- while (starts_attr(p)) {
- SrcLoc kw_loc = tok_loc(&p->cur);
- advance(p); /* __attribute__ */
- expect_punct(p, '(', "'(' after __attribute__");
- expect_punct(p, '(', "'((' after __attribute__");
- /* attr-list: zero or more attr, comma-separated. Empty list and
- * trailing/leading bare commas are accepted (GCC compat). */
- for (;;) {
- Sym aname;
- AttrArgShape shape;
- Attr* a;
- const char* diag_name;
- size_t diag_len;
- const char* canon;
- size_t canon_len;
- /* Accept stray commas and an empty list. */
- while (accept_punct(p, ',')) { /* skip */ }
- if (is_punct(&p->cur, ')')) break;
- if (p->cur.kind != TOK_IDENT) {
- perr(p, "expected attribute name");
- }
- aname = p->cur.v.ident;
- a = arena_new(p->c->tu, Attr);
- if (!a) perr(p, "out of memory in parse_attribute_spec_list");
- memset(a, 0, sizeof *a);
- a->loc = tok_loc(&p->cur);
- a->name = aname;
- a->kind = (u16)classify_attr(p, aname, &shape);
- advance(p);
- diag_name = pool_str(p->pool, aname, &diag_len);
- attr_canon_range(diag_name, diag_len, &canon, &canon_len);
- (void)canon; (void)canon_len;
- parse_attr_args(p, a, shape, diag_name ? diag_name : "<unknown>");
- if (tail) tail->next = a; else head = a;
- tail = a;
- if (!accept_punct(p, ',')) break;
- }
- expect_punct(p, ')', "')' after attribute list");
- expect_punct(p, ')', "'))' after attribute list");
- (void)kw_loc;
- }
- return head;
-}
-
-static void parse_and_discard_attributes(Parser* p) {
- (void)parse_attribute_spec_list(p);
-}
-
-/* Bare `__attribute__((aligned))` (no argument) means "biggest scalar
- * alignment". Same default as decl_attrs.c uses. */
-#define PARSE_ATTR_ALIGNED_DEFAULT 16u
-
-/* Scan an attribute chain and merge record-level packed / aligned(N) into
- * the supplied TypeRecordOpts. */
-static void attrs_to_record_opts(const Attr* a, TypeRecordOpts* opts) {
- for (; a; a = a->next) {
- if (a->kind == ATTR_PACKED) {
- opts->packed = 1;
- } else if (a->kind == ATTR_ALIGNED) {
- u32 v = (a->nargs == 0) ? PARSE_ATTR_ALIGNED_DEFAULT : (u32)a->v.i;
- if (v > opts->align_override) opts->align_override = (u16)v;
- }
- }
-}
-
-/* Scan an attribute chain and merge per-member packed / aligned(N) into a
- * Field's carriers. */
-static void attrs_to_field(const Attr* a, Field* f) {
- for (; a; a = a->next) {
- if (a->kind == ATTR_PACKED) {
- f->packed = 1;
- } else if (a->kind == ATTR_ALIGNED) {
- u32 v = (a->nargs == 0) ? PARSE_ATTR_ALIGNED_DEFAULT : (u32)a->v.i;
- if (v > f->align_override) f->align_override = (u16)v;
- }
- }
-}
-
-/* Walk attrs looking for ATTR_ALIGNED; returns 0 if absent. */
-static u32 attrs_pick_aligned(const Attr* a) {
- u32 best = 0;
- for (; a; a = a->next) {
- if (a->kind == ATTR_ALIGNED) {
- u32 v = (a->nargs == 0) ? PARSE_ATTR_ALIGNED_DEFAULT : (u32)a->v.i;
- if (v > best) best = v;
- }
- }
- return best;
-}
-
-/* Parse a struct/union member-declaration list. The `{` has already been
- * consumed. Fills `b` with each member's Field; bumps anonymous flags as
- * needed. Bitfields are diagnosed (cg lacks the codegen for them in this
- * slice). */
-static void parse_member_decls(Parser* p, TypeRecordBuilder* b) {
- while (!is_punct(&p->cur, '}') && p->cur.kind != TOK_EOF) {
- DeclSpecs specs;
- if (!parse_decl_specs(p, &specs)) {
- perr(p, "expected member declaration");
- }
- /* Anonymous struct/union member: `struct { int y; };` or
- * `union { int a, b; };` directly inside another aggregate (C11
- * §6.7.2.1 ¶13). The shape is decl-specs immediately followed by
- * `;` with no declarator. */
- if (is_punct(&p->cur, ';')) {
- if (specs.type && (specs.type->kind == TY_STRUCT ||
- specs.type->kind == TY_UNION)) {
- Field f;
- memset(&f, 0, sizeof f);
- f.name = 0;
- f.type = specs.type;
- f.flags = FIELD_ANON;
- type_record_field(b, f);
- advance(p);
- continue;
- }
- perr(p, "declaration without declarator must be anonymous aggregate");
- }
- /* One or more declarators separated by `,`. */
- for (;;) {
- Sym mname = 0;
- SrcLoc mloc = tok_loc(&p->cur);
- const Type* mty;
- Field f;
- memset(&f, 0, sizeof f);
- /* Anonymous bitfield: `unsigned : N;` — no declarator, just the
- * width. Width 0 forces alignment to the next storage unit per
- * §6.7.2.1 ¶12. We don't actually lay out the unit yet (the abi
- * layout assumes named fields), but recording the entry keeps
- * downstream layout/init consistent. */
- if (is_punct(&p->cur, ':')) {
- advance(p);
- i64 w = eval_const_int(p, mloc);
- f.name = 0;
- f.type = specs.type;
- f.bitfield_width = (u16)w;
- f.flags = FIELD_BITFIELD;
- if (w == 0) f.flags |= FIELD_ZERO_WIDTH;
- attrs_to_field(specs.attrs, &f);
- type_record_field(b, f);
- if (!accept_punct(p, ',')) break;
- continue;
- }
- Attr* mattrs = NULL;
- mty = parse_declarator_full_ex(p, specs.type, /*allow_abstract=*/0,
- &mname, &mloc, &mattrs);
- /* Bitfield form `: width` after the declarator name (or after the
- * type with no name). Recognized to keep the parser unstuck on
- * member lists with bitfields, but defers actual codegen — the
- * field is still recorded and abi_record_layout treats it as a
- * full storage unit, which is wrong for any cross-member reference
- * but right enough for the bitfield row to land later (Phase 3
- * follow-up alongside cg_bitfield_load/store). */
- if (accept_punct(p, ':')) {
- i64 w = eval_const_int(p, mloc);
- f.name = mname;
- f.type = mty;
- f.bitfield_width = (u16)w;
- f.flags = FIELD_BITFIELD;
- if (w == 0) f.flags |= FIELD_ZERO_WIDTH;
- } else {
- f.name = mname;
- f.type = mty;
- f.flags = FIELD_NONE;
- }
- /* Decl-spec attrs apply to each declarator in this declaration.
- * In-declarator and trailing attrs attach to this field only. */
- attrs_to_field(specs.attrs, &f);
- attrs_to_field(mattrs, &f);
- {
- Attr* trailing = NULL;
- parse_attrs_into(p, &trailing);
- attrs_to_field(trailing, &f);
- }
- type_record_field(b, f);
- if (!accept_punct(p, ',')) break;
- }
- expect_punct(p, ';', "';' after struct member declaration");
- }
-}
-
-/* Parse `struct/union [tag] [{ members }]` after the keyword has been
- * consumed. Returns the (possibly incomplete) record type. */
-static const Type* parse_struct_or_union(Parser* p, TypeKind kind,
- Attr** anon_attrs_out) {
- Sym tag_name = 0;
- SrcLoc tag_loc;
- TagDeclKind tdk = (kind == TY_STRUCT) ? TAG_STRUCT : TAG_UNION;
- Attr* rec_attrs = NULL;
- /* Attributes between `struct`/`union` keyword and the tag/body. */
- parse_attrs_into(p, &rec_attrs);
- tag_loc = tok_loc(&p->cur);
- if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) {
- tag_name = p->cur.v.ident;
- advance(p);
- }
- int has_body = is_punct(&p->cur, '{');
- if (!has_body && tag_name == 0) {
- perr(p, "expected tag name or '{' after struct/union");
- }
- if (!has_body) {
- /* Tag reference: `struct S` used as a type in a declaration. Look up
- * the tag in the chain; if not found, install an incomplete tag in
- * the current scope (§6.7.2.3 ¶7 — a forward declaration). */
- TagEntry* e = tag_lookup(p, tag_name);
- if (e) {
- if (e->kind != tdk) {
- perr(p, "use of tag with wrong kind (struct vs union)");
- }
- attr_list_append(&e->attrs, rec_attrs);
- return e->type;
- }
- {
- TagId tid = type_tag_new(p->pool, tdk, tag_name, tag_loc);
- Type* t = type_record_forward(p->pool, kind, tid, tag_name);
- TagEntry* te = tag_define(p, tag_name, tdk, t, /*complete=*/0);
- attr_list_append(&te->attrs, rec_attrs);
- return t;
- }
- }
- /* Body: definition. If the tag was forward-declared in the same scope,
- * complete that node in place; otherwise create a fresh forward node and
- * complete it. The Type* identity is stable across completion so any
- * pointer type built off the forward node automatically updates. */
- Type* target = NULL;
- TagEntry* existing = tag_name ? tag_lookup_local(p, tag_name) : NULL;
- TagEntry* te = NULL;
- if (existing) {
- if (existing->kind != tdk) {
- perr(p, "tag redeclared with wrong kind");
- }
- if (existing->complete) {
- perr(p, "redefinition of tag");
- }
- target = existing->type;
- te = existing;
- } else {
- TagId tid = type_tag_new(p->pool, tdk, tag_name, tag_loc);
- target = type_record_forward(p->pool, kind, tid, tag_name);
- if (tag_name) {
- te = tag_define(p, tag_name, tdk, target, /*complete=*/0);
- }
- }
- expect_punct(p, '{', "'{' to start aggregate body");
- TypeRecordBuilder* b =
- type_record_begin(p->pool, kind, target->rec.tag_id, tag_name);
- parse_member_decls(p, b);
- expect_punct(p, '}', "'}' after aggregate body");
- /* Trailing attributes (after `}`) attach to the record type. */
- parse_attrs_into(p, &rec_attrs);
- if (te) {
- attr_list_append(&te->attrs, rec_attrs);
- } else if (anon_attrs_out) {
- /* Anonymous record — let the caller hoist record attrs onto the
- * surrounding declaration (e.g. into DeclSpecs.attrs). */
- attr_list_append(anon_attrs_out, rec_attrs);
- }
- /* Pull the accumulated fields out of the builder and install them on the
- * target node so any pre-existing pointer-to-target types see complete
- * fields. The builder's Type* (returned by type_record_end) is discarded;
- * we keep `target` as the canonical Type*. */
- {
- /* type_record_end allocates a fresh Type and exposes only the public
- * Type*. We need access to the builder's accumulated `fields/nfields`.
- * Doing it via type_record_end and reading back through `Type` would
- * produce two equivalent records; the harmless cost is one extra
- * Type node in the pool (struct types aren't interned). */
- const Type* fresh = type_record_end(p->pool, b);
- type_record_install(target, (Field*)fresh->rec.fields,
- fresh->rec.nfields);
- }
- /* Honor record-level packed / aligned(N). target is the canonical Type*
- * (forward node completed in place), so writing to its rec.* is what
- * abi_record_layout will read. */
- {
- TypeRecordOpts opts;
- memset(&opts, 0, sizeof opts);
- attrs_to_record_opts(rec_attrs, &opts);
- if (opts.packed) target->rec.packed = 1;
- if (opts.align_override > target->rec.align_override)
- target->rec.align_override = opts.align_override;
- }
- if (existing) {
- existing->complete = 1;
- }
- return target;
-}
-
-/* Parse `enum [tag] [{ K [= cexpr] (, K [= cexpr])* [,] }]` after the
- * `enum` keyword has been consumed. Returns the enum type (interned). */
-static const Type* parse_enum(Parser* p, Attr** anon_attrs_out) {
- Sym tag_name = 0;
- SrcLoc tag_loc;
- Attr* rec_attrs = NULL;
- /* Attributes between `enum` keyword and tag/body. */
- parse_attrs_into(p, &rec_attrs);
- tag_loc = tok_loc(&p->cur);
- if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) {
- tag_name = p->cur.v.ident;
- advance(p);
- }
- int has_body = is_punct(&p->cur, '{');
- if (!has_body && tag_name == 0) {
- perr(p, "expected tag name or '{' after enum");
- }
- if (!has_body) {
- /* Tag reference. Per §6.7.2.3 ¶3 enum types must be defined where
- * referenced; the tag lookup is mostly to keep the type identity
- * consistent. If the tag isn't registered, treat the enum as
- * synonymous with `int` — simplest behavior consistent with
- * §6.7.2.2 ¶4 (enum compatible with int). */
- TagEntry* e = tag_lookup(p, tag_name);
- if (e && e->kind == TAG_ENUM) {
- attr_list_append(&e->attrs, rec_attrs);
- return e->type;
- }
- /* Forward enum: install an incomplete enum-type at int width. */
- TagId tid = type_tag_new(p->pool, TAG_ENUM, tag_name, tag_loc);
- const Type* et = type_enum(p->pool, tid, tag_name, ty_int(p));
- {
- TagEntry* te = tag_define(p, tag_name, TAG_ENUM, (Type*)et,
- /*complete=*/0);
- attr_list_append(&te->attrs, rec_attrs);
- }
- return et;
- }
- /* Definition: parse enumerator list, bind each into the ordinary scope
- * as SEK_ENUM_CST (§6.7.2.2 ¶3). Values default to 0 and increment by
- * one; an `= cexpr` resets the running counter. */
- TagId tid = type_tag_new(p->pool, TAG_ENUM, tag_name, tag_loc);
- const Type* et = type_enum(p->pool, tid, tag_name, ty_int(p));
- expect_punct(p, '{', "'{'");
- i64 next_val = 0;
- for (;;) {
- Sym name;
- SrcLoc nloc = tok_loc(&p->cur);
- SymEntry* e;
- if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) {
- perr(p, "expected enumerator name");
- }
- name = p->cur.v.ident;
- advance(p);
- i64 val = next_val;
- if (accept_punct(p, '=')) {
- val = eval_const_int(p, nloc);
- }
- e = scope_define(p, name, SEK_ENUM_CST, et);
- e->v.enum_value = val;
- next_val = val + 1;
- if (!accept_punct(p, ',')) break;
- if (is_punct(&p->cur, '}')) break; /* trailing comma */
- }
- expect_punct(p, '}', "'}' after enumerator list");
- /* Trailing attributes after `}` attach to the enum type. */
- parse_attrs_into(p, &rec_attrs);
- if (tag_name) {
- /* Replace any incomplete forward entry; otherwise install fresh. */
- TagEntry* existing = tag_lookup_local(p, tag_name);
- if (existing) {
- if (existing->kind != TAG_ENUM) {
- perr(p, "tag redeclared with wrong kind");
- }
- existing->complete = 1;
- attr_list_append(&existing->attrs, rec_attrs);
- } else {
- TagEntry* te = tag_define(p, tag_name, TAG_ENUM, (Type*)et,
- /*complete=*/1);
- attr_list_append(&te->attrs, rec_attrs);
- }
- } else if (anon_attrs_out) {
- attr_list_append(anon_attrs_out, rec_attrs);
- }
- return et;
-}
-
-/* Member lookup with anonymous-aggregate flattening (C11 §6.7.2.1 ¶13).
- * Walks the record's fields; on a hit returns the field type and the byte
- * offset from the search root. On an anonymous struct/union member, recurses
- * into that field's type, accumulating the field's own offset.
- *
- * Returns 1 on success (out_type/out_offset filled), 0 if the name is not a
- * member. Bitfields are signaled via *out_bitfield (parser then panics —
- * cg_bitfield_load/store are stubs). */
-static int find_field(TargetABI* abi, const Type* rec, Sym name,
- const Type** out_type, u32* out_offset,
- const Field** out_field) {
- if (!rec || (rec->kind != TY_STRUCT && rec->kind != TY_UNION)) return 0;
- const ABIRecordLayout* L = abi_record_layout(abi, rec);
- if (!L) return 0;
- for (u16 i = 0; i < rec->rec.nfields; ++i) {
- const Field* f = &rec->rec.fields[i];
- if (f->name == name && name != 0) {
- *out_type = f->type;
- *out_offset = L->fields[i].offset;
- *out_field = f;
- return 1;
- }
- if ((f->flags & FIELD_ANON) && (f->type->kind == TY_STRUCT ||
- f->type->kind == TY_UNION)) {
- const Type* inner_ty = NULL;
- u32 inner_off = 0;
- const Field* inner_f = NULL;
- if (find_field(abi, f->type, name, &inner_ty, &inner_off, &inner_f)) {
- *out_type = inner_ty;
- *out_offset = L->fields[i].offset + inner_off;
- *out_field = inner_f;
- return 1;
- }
- }
- }
- return 0;
-}
-
-/* True when the current token starts a declaration-specifier sequence: a
- * type keyword, a storage-class keyword, a qualifier, or a function
- * specifier. Used at lookahead points (cast vs. paren expr; sizeof's
- * inner form; for-init declarator vs. expression). The list mirrors
- * parse_decl_specs's accepted set so the two stay in sync.
- *
- * Typedef-names are not yet implemented; when they land, they become
- * the second branch here and dispatch on scope_lookup().kind ==
- * SEK_TYPEDEF, just like any other type-name token. */
-static int starts_type_name(const Parser* p, const Tok* t) {
- if (t->kind != TOK_IDENT) return 0;
- CKw k = ident_kw(p, t->v.ident);
- switch (k) {
- case KW_VOID:
- case KW_CHAR:
- case KW_SHORT:
- case KW_INT:
- case KW_LONG:
- case KW_FLOAT:
- case KW_DOUBLE:
- case KW_SIGNED:
- case KW_UNSIGNED:
- case KW_BOOL:
- case KW_STRUCT:
- case KW_UNION:
- case KW_ENUM:
- case KW_CONST:
- case KW_VOLATILE:
- case KW_RESTRICT:
- case KW_ATOMIC:
- case KW_STATIC:
- case KW_EXTERN:
- case KW_INLINE:
- case KW_NORETURN:
- case KW_REGISTER:
- case KW_AUTO:
- case KW_TYPEDEF:
- case KW_ALIGNAS:
- case KW_THREAD_LOCAL:
- return 1;
- case KW_NONE: {
- /* `__builtin_va_list` is a target-defined type-name (the va_list
- * type produced by `abi_va_list_type`). Phase 9. */
- if (t->v.ident == p->sym_b_va_list) return 1;
- /* Typedef-name. Cast away const for the lookup helper, which only
- * reads scope state. */
- SymEntry* e = scope_lookup((Parser*)p, t->v.ident);
- return e && e->kind == SEK_TYPEDEF;
- }
- default:
- return 0;
- }
-}
-
-/* Walk a `*` chain at the front of a declarator (and optional qualifiers
- * after each `*`), wrapping `base` in successive pointer types. Qualifiers
- * after a `*` qualify the pointer just produced (`int *const p` → p is a
- * const-qualified pointer to int). */
-static const Type* parse_pointer_layer(Parser* p, const Type* base) {
- while (accept_punct(p, '*')) {
- u16 q = 0;
- base = type_ptr(p->pool, base);
- for (;;) {
- if (accept_kw(p, KW_CONST)) { q |= Q_CONST; continue; }
- if (accept_kw(p, KW_VOLATILE)) { q |= Q_VOLATILE; continue; }
- if (accept_kw(p, KW_RESTRICT)) { q |= Q_RESTRICT; continue; }
- if (accept_kw(p, KW_ATOMIC)) { q |= Q_ATOMIC; continue; }
- /* Pointer-layer attributes (e.g. `int * __attribute__((aligned(8))) p`).
- * Phase 1 parses + drops. */
- if (starts_attr(p)) { parse_and_discard_attributes(p); continue; }
- break;
- }
- if (q) base = type_qualified(p->pool, base, q);
- }
- return base;
-}
-
-/* Type-name (§6.7.7): specifier-qualifier-list (abstract-declarator)?
- * The abstract declarator at this slice is just a `*` chain — array and
- * function suffixes land in Phase 2. Used by sizeof / _Alignof / cast. */
-static const Type* parse_type_name(Parser* p) {
- DeclSpecs specs;
- Sym dummy_name = 0;
- SrcLoc dummy_loc = {0, 0, 0};
- if (!parse_decl_specs(p, &specs)) {
- perr(p, "expected type-name");
- }
- /* Type-name accepts a full abstract declarator (pointer prefix + array
- * and/or function suffixes); compound literals like `(int[]){...}` and
- * casts like `(int (*)[3])` rely on this. */
- return parse_declarator_full(p, specs.type, /*allow_abstract=*/1,
- &dummy_name, &dummy_loc);
-}
-
-/* ============================================================
- * Literal parsing
- * ============================================================
- * Integer literals are parsed by parse_int_literal (returns the value);
- * the §6.4.4.1 type-by-suffix selection lives in int_literal_type and
- * runs from parse_primary so cexpr / array-size paths that only need
- * the value can ignore typing. Float literals share parse_float_literal
- * and float_literal_type. */
-static i64 parse_int_literal(Parser* p, const Tok* t) {
- size_t len = 0;
- const char* s = pool_str(p->pool, t->spelling, &len);
- size_t i = 0;
- i64 base = 10;
- i64 acc = 0;
- if (!s) perr(p, "bad numeric literal");
- if (len >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) {
- base = 16;
- i = 2;
- } else if (len >= 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B')) {
- base = 2;
- i = 2;
- } else if (len >= 1 && s[0] == '0') {
- base = 8;
- i = 1;
- }
- for (; i < len; ++i) {
- int c = (unsigned char)s[i];
- int dv;
- /* Stop at suffix characters (u/U/l/L). */
- if (c == 'u' || c == 'U' || c == 'l' || c == 'L') break;
- if (c >= '0' && c <= '9')
- dv = c - '0';
- else if (c >= 'a' && c <= 'f')
- dv = c - 'a' + 10;
- else if (c >= 'A' && c <= 'F')
- dv = c - 'A' + 10;
- else
- perr(p, "bad digit in numeric literal");
- if (dv >= base) perr(p, "digit out of range for base");
- acc = acc * base + dv;
- }
- return acc;
-}
-
-/* §6.4.4.1 ¶5 — pick a TY_* tag for an integer constant from its
- * suffix flags. Promotion-by-magnitude (e.g. an unsuffixed decimal that
- * doesn't fit in `int` widening to `long`) is not modelled; corpus
- * literals fit in their suffix family. */
-static const Type* int_literal_type(Parser* p, const Tok* t) {
- int u = (t->flags & TF_INT_U) != 0;
- int l = (t->flags & TF_INT_L) != 0;
- int ll = (t->flags & TF_INT_LL) != 0;
- TypeKind k;
- if (ll) k = u ? TY_ULLONG : TY_LLONG;
- else if (l) k = u ? TY_ULONG : TY_LONG;
- else if (u) k = TY_UINT;
- else k = TY_INT;
- return type_prim(p->pool, k);
-}
-
-/* Decimal/hex float-literal parser. Allowed source forms (§6.4.4.2):
- * decimal: digits[.digits][e[+-]digits]
- * hex: 0x hexdigits[.hexdigits][p[+-]digits]
- * Either part of a fractional pair may be empty (`1.`, `.5`); the
- * exponent is required for hex floats per the standard but accepted
- * without here for resilience. The result type is selected by the
- * f/F/l/L suffix and returned via `*ty_out`. */
-static double parse_float_literal(Parser* p, const Tok* t) {
- size_t len = 0;
- const char* s = pool_str(p->pool, t->spelling, &len);
- size_t i = 0;
- int is_hex = 0;
- double v = 0.0;
- int exp = 0; /* additional power of base from fractional digits */
- int dec_exp = 0; /* explicit exponent (decimal: pow10; hex: pow2) */
- int frac_seen = 0;
- if (!s) perr(p, "bad float literal");
- if (len >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) {
- is_hex = 1;
- i = 2;
- }
- /* Integer part. */
- while (i < len) {
- int c = (unsigned char)s[i];
- int dv;
- if (c == '.' || c == 'e' || c == 'E' || c == 'p' || c == 'P' ||
- c == 'f' || c == 'F' || c == 'l' || c == 'L')
- break;
- if (c >= '0' && c <= '9') dv = c - '0';
- else if (is_hex && c >= 'a' && c <= 'f') dv = c - 'a' + 10;
- else if (is_hex && c >= 'A' && c <= 'F') dv = c - 'A' + 10;
- else perr(p, "bad digit in float literal");
- v = v * (is_hex ? 16.0 : 10.0) + (double)dv;
- i++;
- }
- /* Fractional part. */
- if (i < len && s[i] == '.') {
- i++;
- while (i < len) {
- int c = (unsigned char)s[i];
- int dv;
- if (c == 'e' || c == 'E' || c == 'p' || c == 'P' ||
- c == 'f' || c == 'F' || c == 'l' || c == 'L')
- break;
- if (c >= '0' && c <= '9') dv = c - '0';
- else if (is_hex && c >= 'a' && c <= 'f') dv = c - 'a' + 10;
- else if (is_hex && c >= 'A' && c <= 'F') dv = c - 'A' + 10;
- else perr(p, "bad digit in float literal");
- v = v * (is_hex ? 16.0 : 10.0) + (double)dv;
- exp -= 1;
- frac_seen = 1;
- i++;
- }
- }
- (void)frac_seen;
- /* Explicit exponent. Decimal uses e/E and base 10; hex uses p/P and base 2
- * applied to the (already-scaled) hex significand. */
- if (i < len && (s[i] == 'e' || s[i] == 'E' || s[i] == 'p' || s[i] == 'P')) {
- int neg = 0;
- int n = 0;
- int hex_exp = (s[i] == 'p' || s[i] == 'P');
- i++;
- if (i < len && (s[i] == '+' || s[i] == '-')) {
- if (s[i] == '-') neg = 1;
- i++;
- }
- while (i < len) {
- int c = (unsigned char)s[i];
- if (c < '0' || c > '9') break;
- n = n * 10 + (c - '0');
- i++;
- }
- dec_exp = neg ? -n : n;
- if (hex_exp) {
- /* For hex floats the explicit exponent is in base 2 and applies to
- * the significand interpreted as the hex digits without the
- * fractional adjustment we accumulated in `exp` (which is base-16
- * digits). Convert the base-16 fractional adjustment to base-2 by
- * multiplying by 4, then combine with the explicit base-2 exp. */
- dec_exp += exp * 4;
- exp = 0;
- }
- }
- /* Apply the implicit fractional-digit exponent (decimal only — for hex
- * we already folded `exp*4` into dec_exp above). */
- while (exp < 0) { v /= (is_hex ? 16.0 : 10.0); exp++; }
- while (exp > 0) { v *= (is_hex ? 16.0 : 10.0); exp--; }
- /* Apply the explicit exponent (base 10 for decimal, base 2 for hex). */
- if (is_hex) {
- while (dec_exp < 0) { v /= 2.0; dec_exp++; }
- while (dec_exp > 0) { v *= 2.0; dec_exp--; }
- } else {
- while (dec_exp < 0) { v /= 10.0; dec_exp++; }
- while (dec_exp > 0) { v *= 10.0; dec_exp--; }
- }
- return v;
-}
-
-static const Type* float_literal_type(Parser* p, const Tok* t) {
- if (t->flags & TF_FLT_F) return type_prim(p->pool, TY_FLOAT);
- if (t->flags & TF_FLT_L) return type_prim(p->pool, TY_LDOUBLE);
- return type_prim(p->pool, TY_DOUBLE);
-}
-
-/* ============================================================
- * Expressions — precedence climbing
- * ============================================================
- * Pratt-style climber: each level consumes its operators and recurses
- * into the next-tighter level. Each function leaves the result on the CG
- * stack. The grammar follows C11 §6.5 top-down; only the productions
- * needed by the spine are wired today, with the remaining ones marked
- * with TODO comments at the call sites where they'll slot in.
- *
- * Level (loose → tight):
- * assignment = `=` `+=` `-=` `*=` `/=` `%=` `&=` `|=` `^=` `<<=` `>>=`
- * conditional = `? :` (TODO)
- * logical_or = `||` (TODO)
- * logical_and = `&&` (TODO)
- * bit_or = `|`
- * bit_xor = `^`
- * bit_and = `&`
- * equality = `==` `!=`
- * relational = `<` `<=` `>` `>=`
- * shift = `<<` `>>`
- * additive = `+` `-`
- * multiplicative = `*` `/` `%`
- * cast = `(type) cast` (TODO)
- * unary = `+ - ! ~ * & ++ --` `sizeof` (partial)
- * postfix = `[] () . -> ++ --` (partial)
- * primary = ident | num | `(` expr `)` | strlit | charlit
- */
-
-static void parse_expr(Parser* p);
-static void parse_assign_expr(Parser* p);
-static void parse_unary(Parser* p);
-static void parse_postfix(Parser* p);
-
-/* Initializer entry points used by compound-literal lowering in parse_unary;
- * the bodies live next to the rest of the initializer machinery further
- * down. */
-typedef struct DeclSpecs DeclSpecs;
-static const Type* complete_incomplete_array(Parser* p, const Type* ty);
-static void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset,
- const Type* ty);
-
-/* If the rvalue on top of the stack and the lvalue beneath it (the store
- * target the parser is about to drive into cg_store) are both arithmetic
- * but of different C types, emit the implicit §6.5.16.1 conversion the
- * standard requires for `=`. Pointer/aggregate cases are the caller's
- * responsibility — they don't need an arithmetic convert. */
-static void coerce_top_to_lvalue(Parser* p) {
- const Type* src = cg_top_type(p->cg);
- const Type* dst = cg_top2_type(p->cg);
- if (!src || !dst || src == dst) return;
- if (type_is_arith(src) && type_is_arith(dst)) {
- cg_convert(p->cg, dst);
- }
-}
-
-/* Produce an rvalue on the stack. Three cases beyond the trivial scalar:
- * - array lvalue: §6.3.2.1 array-to-pointer decay → take address, retag the
- * resulting `T(*)[N]` as `T*` so subsequent ops see a pointer.
- * - function lvalue: §6.3.2.1 function-to-pointer decay → take address; the
- * type becomes `T(*)()` automatically because cg_addr wraps the operand
- * type in TY_PTR.
- * - scalar lvalue (LOCAL/GLOBAL/INDIRECT): cg_load. Idempotent on rvalues. */
-static void to_rvalue(Parser* p) {
- const Type* t = cg_top_type(p->cg);
- if (t) {
- if (t->kind == TY_ARRAY) {
- cg_addr(p->cg);
- cg_retag_top(p->cg, type_ptr(p->pool, t->arr.elem));
- return;
- }
- if (t->kind == TY_FUNC) {
- cg_addr(p->cg);
- return;
- }
- /* Aggregates do not load into a single scratch register — they are
- * consumed by cg_call/cg_ret/struct-copy as addressable storage. The
- * value stack already holds an lvalue (LOCAL/GLOBAL/INDIRECT) or the
- * call return's hidden slot lvalue; leave it alone. */
- if (t->kind == TY_STRUCT || t->kind == TY_UNION) return;
- }
- cg_load(p->cg);
-}
-
-/* Decode one character (the first encoded code unit) from the token's
- * spelling at offset `i`, advancing `*pi` past the consumed bytes.
- * Handles the §6.4.4.4 escape sequences a freestanding compiler is
- * required to recognize. */
-static i64 decode_one_char(Parser* p, const char* s, size_t len, size_t* pi,
- SrcLoc loc) {
- size_t i = *pi;
- i64 v;
- int c;
- if (i >= len) compiler_panic(p->c, loc, "truncated character literal");
- if (s[i] != '\\') {
- v = (unsigned char)s[i++];
- *pi = i;
- return v;
- }
- /* Escape sequence. */
- i++;
- if (i >= len) compiler_panic(p->c, loc, "trailing '\\' in literal");
- c = (unsigned char)s[i++];
- switch (c) {
- case 'n': v = '\n'; break;
- case 't': v = '\t'; break;
- case 'r': v = '\r'; break;
- case 'b': v = '\b'; break;
- case 'f': v = '\f'; break;
- case 'v': v = '\v'; break;
- case 'a': v = '\a'; break;
- case '\\': v = '\\'; break;
- case '\'': v = '\''; break;
- case '"': v = '"'; break;
- case '?': v = '?'; break;
- case 'x': {
- i64 hex = 0;
- int any = 0;
- while (i < len) {
- int d = (unsigned char)s[i];
- int dv;
- if (d >= '0' && d <= '9') dv = d - '0';
- else if (d >= 'a' && d <= 'f') dv = d - 'a' + 10;
- else if (d >= 'A' && d <= 'F') dv = d - 'A' + 10;
- else break;
- hex = hex * 16 + dv;
- any = 1;
- i++;
- }
- if (!any) compiler_panic(p->c, loc, "\\x with no hex digits");
- v = hex & 0xff;
- break;
- }
- default:
- if (c >= '0' && c <= '7') {
- i64 oct = c - '0';
- int n = 1;
- while (n < 3 && i < len && s[i] >= '0' && s[i] <= '7') {
- oct = oct * 8 + (s[i] - '0');
- i++;
- n++;
- }
- v = oct & 0xff;
- } else {
- /* Unknown escape: implementation-defined; keep the literal byte. */
- v = c;
- }
- break;
- }
- *pi = i;
- return v;
-}
-
-static i64 decode_char_literal(Parser* p, const Tok* t) {
- size_t len = 0;
- const char* s = pool_str(p->pool, t->spelling, &len);
- size_t i = 0;
- i64 v;
- if (!s) perr(p, "bad char literal");
- /* Skip optional encoding prefix (`L`, `u`, `U`, `u8`). The flag bits
- * tell us which one without re-parsing. */
- if (t->flags & TF_STR_U8) i = 2;
- else if (t->flags & (TF_STR_WIDE | TF_STR_U16 | TF_STR_U32)) i = 1;
- if (i >= len || s[i] != '\'') perr(p, "malformed character literal");
- i++; /* opening quote */
- if (i >= len || s[i] == '\'') perr(p, "empty character literal");
- v = decode_one_char(p, s, len, &i, t->loc);
- /* Multi-character constants are valid C but undefined-implementation;
- * the spine corpus only uses single-char constants. Diagnose extra
- * source bytes before the closing quote conservatively. */
- if (i >= len || s[i] != '\'') {
- perr(p, "multi-character constants are not supported");
- }
- return v;
-}
-
-/* Decode the content of a string-literal token (without the surrounding
- * quotes / encoding prefix) into raw bytes. Returns a heap-allocated
- * buffer of length `*nlen_out`; caller frees through the same heap. */
-static u8* decode_string_literal(Parser* p, const Tok* t, size_t* nlen_out) {
- size_t len = 0;
- const char* s = pool_str(p->pool, t->spelling, &len);
- size_t i = 0;
- Heap* h = p->c->env->heap;
- u8* buf;
- size_t k = 0;
- if (!s) perr(p, "bad string literal");
- if (t->flags & TF_STR_U8) i = 2;
- else if (t->flags & (TF_STR_WIDE | TF_STR_U16 | TF_STR_U32)) i = 1;
- if (i >= len || s[i] != '"') perr(p, "malformed string literal");
- i++;
- /* Conservative buffer: at most one byte per source byte, plus NUL. */
- buf = (u8*)h->alloc(h, len + 1, 1);
- if (!buf) perr(p, "out of memory in string literal");
- while (i < len && s[i] != '"') {
- i64 ch = decode_one_char(p, s, len, &i, t->loc);
- buf[k++] = (u8)ch;
- }
- buf[k++] = 0; /* NUL terminator */
- *nlen_out = k;
- return buf;
-}
-
-/* Place decoded string bytes in .rodata and return an ObjSymId pointing at
- * them. Used by string literals in primary. */
-static ObjSymId emit_string_to_rodata(Parser* p, const u8* bytes, size_t n) {
- ObjBuilder* ob = decl_obj(p->decls);
- Sym secname = pool_intern_cstr(p->pool, ".rodata");
- ObjSecId sec = obj_section(ob, secname, SEC_RODATA, SF_ALLOC, 1u);
- u32 base = obj_pos(ob, sec);
- Sym lname;
- ObjSymId sym;
- char namebuf[32];
- static u32 counter;
- /* Anonymous local symbol; the name is just for readability in objdump. */
- int wlen = 0;
- u32 id = ++counter;
- /* Tiny formatter — avoids stdio dependencies in the parser. */
- namebuf[wlen++] = '.';
- namebuf[wlen++] = 'L';
- namebuf[wlen++] = 'C';
- {
- char digits[12];
- int dn = 0;
- if (id == 0) digits[dn++] = '0';
- while (id) {
- digits[dn++] = (char)('0' + (id % 10));
- id /= 10;
- }
- while (dn) namebuf[wlen++] = digits[--dn];
- }
- namebuf[wlen] = 0;
- lname = pool_intern(p->pool, namebuf, (size_t)wlen);
- sym = obj_symbol(ob, lname, SB_LOCAL, SK_OBJ, sec, base, n);
- {
- u8* dst = obj_reserve(ob, sec, n);
- if (dst) memcpy(dst, bytes, n);
- }
- return sym;
-}
-
-/* Phase 9 — Builtins.
- *
- * `__builtin_*` and `__atomic_*` calls are not ordinary function references:
- * they don't go through scope_lookup / cg_call. Instead the parser dispatches
- * the name to a per-builtin handler that consumes the argument list and
- * emits the corresponding cg primitive (cg_alloca, cg_va_*, cg_atomic_*) or
- * folds the call to a constant (`__builtin_offsetof`, `__builtin_expect`).
- *
- * Pre: p->cur is TOK_IDENT for `name`, and peek1() is `(`. (The caller's
- * responsibility — try_parse_builtin_call assumes both checks have run.)
- * Returns 1 if the name matched a known builtin (token stream advanced
- * past the closing `)`); 0 if not (no tokens consumed).
- *
- * Stack discipline mirrors a normal call: a non-void builtin leaves its
- * result rvalue on the stack; a void-returning one (va_start/end/copy,
- * atomic_store) pushes the same int-0 sentinel parse_postfix uses for
- * void calls so higher levels never underflow. */
-static int try_parse_builtin_call(Parser* p);
-
-/* Walk a `__builtin_offsetof` member-designator (`.field` / `[index]` chain)
- * starting from `rec` (a struct/union/array). Adds offsets into `*off` and
- * descends through nested aggregates. Returns the leaf type. The first
- * step must be a field name (`.` is implicit per §7.18); subsequent steps
- * may be either form. */
-static const Type* offsetof_designator(Parser* p, const Type* base,
- u32* off) {
- const Type* cur = base;
- /* First member name — required, no leading `.` per the macro contract
- * but we accept the GCC form `,member-designator` directly here, which
- * is `.name` written as just `name` for the leading element. */
- if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) {
- perr(p, "expected member name in __builtin_offsetof");
- }
- for (;;) {
- if (cur->kind == TY_STRUCT || cur->kind == TY_UNION) {
- Sym mname = p->cur.v.ident;
- const Type* mty = NULL;
- u32 moff = 0;
- const Field* mf = NULL;
- if (!find_field(p->abi, cur, mname, &mty, &moff, &mf)) {
- perr(p, "no such member in __builtin_offsetof");
- }
- advance(p);
- *off += moff;
- cur = mty;
- } else if (cur->kind == TY_ARRAY) {
- /* `[index]` step — fall through to the bracket branch below. */
- } else {
- perr(p, "__builtin_offsetof step into non-aggregate");
- }
- /* Optional continuation: `.field` or `[index]`. */
- if (is_punct(&p->cur, '.')) {
- advance(p);
- if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) {
- perr(p, "expected member name after '.'");
- }
- continue;
- }
- if (is_punct(&p->cur, '[')) {
- advance(p);
- i64 idx = eval_const_int(p, p->cur.loc);
- expect_punct(p, ']', "']' in __builtin_offsetof");
- if (cur->kind != TY_ARRAY) {
- perr(p, "__builtin_offsetof '[' on non-array");
- }
- *off += (u32)((i64)abi_sizeof(p->abi, cur->arr.elem) * idx);
- cur = cur->arr.elem;
- continue;
- }
- break;
- }
- return cur;
-}
-
-static int try_parse_builtin_call(Parser* p) {
- Sym name = p->cur.v.ident;
- SrcLoc loc = p->cur.loc;
-
- /* Common dispatch: only the names below match. Falling through means the
- * IDENT is not a builtin and parse_primary should resolve it normally. */
- if (name != p->sym_b_alloca && name != p->sym_b_ctz &&
- name != p->sym_b_expect &&
- name != p->sym_b_offsetof && name != p->sym_b_va_start &&
- name != p->sym_b_va_arg && name != p->sym_b_va_end &&
- name != p->sym_b_va_copy && name != p->sym_a_load_n &&
- name != p->sym_a_store_n && name != p->sym_a_exchange_n &&
- name != p->sym_a_fetch_add && name != p->sym_a_fetch_sub &&
- name != p->sym_a_fetch_and && name != p->sym_a_fetch_or &&
- name != p->sym_a_fetch_xor && name != p->sym_a_cas_n &&
- name != p->sym_a_thread_fence && name != p->sym_a_signal_fence) {
- return 0;
- }
- advance(p); /* IDENT */
- expect_punct(p, '(', "'(' after builtin");
-
- if (name == p->sym_b_offsetof) {
- /* `__builtin_offsetof(type, member-designator)` — fold to a size_t
- * constant. The type is the aggregate root; the designator chain
- * accumulates field/element offsets. */
- const Type* root = parse_type_name(p);
- expect_punct(p, ',', "',' in __builtin_offsetof");
- u32 off = 0;
- (void)offsetof_designator(p, root, &off);
- expect_punct(p, ')', "')' after __builtin_offsetof");
- cg_push_int(p->cg, (i64)off, ty_size_t(p));
- return 1;
- }
-
- if (name == p->sym_b_expect) {
- /* `__builtin_expect(expr, hint)` — value of the call is `expr`; the
- * hint is evaluated for side effects (none in practice) then dropped.
- * No backend-side intrinsic is needed for the corpus. */
- parse_assign_expr(p);
- to_rvalue(p);
- expect_punct(p, ',', "',' in __builtin_expect");
- parse_assign_expr(p);
- cg_drop(p->cg);
- expect_punct(p, ')', "')' after __builtin_expect");
- return 1;
- }
-
- if (name == p->sym_b_alloca) {
- parse_assign_expr(p);
- to_rvalue(p);
- expect_punct(p, ')', "')' after __builtin_alloca");
- cg_set_loc(p->cg, loc);
- cg_alloca(p->cg);
- return 1;
- }
-
- if (name == p->sym_b_ctz) {
- /* __builtin_ctz(unsigned) — count trailing zeros, result `int`.
- * UB when arg is 0; the inline lowering produces the arch's natural
- * result for that case (typically the operand width). */
- parse_assign_expr(p);
- to_rvalue(p);
- expect_punct(p, ')', "')' after __builtin_ctz");
- cg_set_loc(p->cg, loc);
- cg_intrinsic_unary_to_int(p->cg, INTRIN_CTZ);
- return 1;
- }
-
- if (name == p->sym_b_va_start) {
- /* `__builtin_va_start(ap, last)` — push &ap, parse-and-drop `last`
- * (its name is required by C but the runtime impl ignores it). */
- parse_assign_expr(p);
- cg_addr(p->cg);
- expect_punct(p, ',', "',' in __builtin_va_start");
- parse_assign_expr(p);
- cg_drop(p->cg);
- expect_punct(p, ')', "')' after __builtin_va_start");
- cg_set_loc(p->cg, loc);
- cg_va_start_(p->cg);
- cg_push_int(p->cg, 0, ty_int(p)); /* void-call sentinel */
- return 1;
- }
-
- if (name == p->sym_b_va_end) {
- parse_assign_expr(p);
- cg_addr(p->cg);
- expect_punct(p, ')', "')' after __builtin_va_end");
- cg_set_loc(p->cg, loc);
- cg_va_end_(p->cg);
- cg_push_int(p->cg, 0, ty_int(p));
- return 1;
- }
-
- if (name == p->sym_b_va_copy) {
- parse_assign_expr(p);
- cg_addr(p->cg);
- expect_punct(p, ',', "',' in __builtin_va_copy");
- parse_assign_expr(p);
- cg_addr(p->cg);
- expect_punct(p, ')', "')' after __builtin_va_copy");
- cg_set_loc(p->cg, loc);
- cg_va_copy_(p->cg);
- cg_push_int(p->cg, 0, ty_int(p));
- return 1;
- }
-
- if (name == p->sym_b_va_arg) {
- parse_assign_expr(p);
- cg_addr(p->cg);
- expect_punct(p, ',', "',' in __builtin_va_arg");
- const Type* ty = parse_type_name(p);
- expect_punct(p, ')', "')' after __builtin_va_arg");
- cg_set_loc(p->cg, loc);
- cg_va_arg_(p->cg, ty);
- return 1;
- }
-
- if (name == p->sym_a_load_n) {
- /* `__atomic_load_n(ptr, order)`. The order must be a constant
- * matching one of the predefined `__ATOMIC_*` values (Phase 9
- * predefines 0–5 to align with the MemOrder enum). */
- parse_assign_expr(p);
- to_rvalue(p);
- expect_punct(p, ',', "',' in __atomic_load_n");
- i64 ord = eval_const_int(p, p->cur.loc);
- expect_punct(p, ')', "')' after __atomic_load_n");
- cg_set_loc(p->cg, loc);
- cg_atomic_load(p->cg, (MemOrder)ord);
- return 1;
- }
-
- if (name == p->sym_a_store_n) {
- parse_assign_expr(p);
- to_rvalue(p);
- expect_punct(p, ',', "',' in __atomic_store_n");
- parse_assign_expr(p);
- to_rvalue(p);
- expect_punct(p, ',', "',' in __atomic_store_n");
- i64 ord = eval_const_int(p, p->cur.loc);
- expect_punct(p, ')', "')' after __atomic_store_n");
- cg_set_loc(p->cg, loc);
- cg_atomic_store(p->cg, (MemOrder)ord);
- cg_push_int(p->cg, 0, ty_int(p));
- return 1;
- }
-
- if (name == p->sym_a_thread_fence || name == p->sym_a_signal_fence) {
- /* `__atomic_thread_fence(order)` / `__atomic_signal_fence(order)`.
- * Both consume an order constant. signal_fence is a compiler barrier
- * only; on real arches we conservatively lower it the same as
- * thread_fence (the backend's fence emits DMB ISH). */
- i64 ord = eval_const_int(p, p->cur.loc);
- expect_punct(p, ')', "')' after atomic fence");
- cg_set_loc(p->cg, loc);
- cg_fence(p->cg, (MemOrder)ord);
- cg_push_int(p->cg, 0, ty_int(p));
- return 1;
- }
-
- if (name == p->sym_a_cas_n) {
- /* `__atomic_compare_exchange_n(ptr, &expected, desired, weak,
- * success_order, failure_order)`.
- * On match: stores `desired` at *ptr; returns 1.
- * On mismatch: stores *ptr (the prior value) at *expected; returns 0.
- *
- * Strategy: pin &expected to a local, lower the CAS to [prior, ok]
- * via cg, save both to locals, conditionally store prior to *expected
- * on the failure branch, then push ok as the i32 result. Routing
- * through frame slots keeps the value stack balanced across the
- * conditional. */
- parse_assign_expr(p); to_rvalue(p); /* ptr */
- expect_punct(p, ',', "',' in __atomic_compare_exchange_n");
-
- parse_assign_expr(p); to_rvalue(p); /* &expected */
- const Type* eptr_ty = cg_top_type(p->cg);
- if (!eptr_ty || eptr_ty->kind != TY_PTR) {
- perr(p, "__atomic_compare_exchange_n: arg 2 must be a pointer");
- }
- const Type* val_ty = eptr_ty->ptr.pointee;
-
- /* Stash &expected. */
- FrameSlotDesc fsd; memset(&fsd, 0, sizeof fsd);
- fsd.type = eptr_ty; fsd.size = 8; fsd.align = 8; fsd.kind = FS_LOCAL;
- FrameSlot eslot = cg_local(p->cg, &fsd);
- cg_push_local_typed(p->cg, eslot, eptr_ty);
- cg_swap(p->cg); /* [ptr, eslot_lv, &expected] */
- cg_store(p->cg); cg_drop(p->cg); /* [ptr] */
-
- /* Load expected_val = *expected. */
- cg_push_local_typed(p->cg, eslot, eptr_ty);
- cg_load(p->cg);
- cg_deref(p->cg, val_ty);
- cg_load(p->cg); /* [ptr, expected_val] */
-
- expect_punct(p, ',', "',' in __atomic_compare_exchange_n");
- parse_assign_expr(p); to_rvalue(p); /* desired */
- expect_punct(p, ',', "',' in __atomic_compare_exchange_n");
-
- /* Stack: [ptr, expected_val, desired]. */
- (void)eval_const_int(p, p->cur.loc); /* weak (ignored — strong CAS) */
- expect_punct(p, ',', "',' in __atomic_compare_exchange_n");
- i64 succ = eval_const_int(p, p->cur.loc);
- expect_punct(p, ',', "',' in __atomic_compare_exchange_n");
- i64 fail = eval_const_int(p, p->cur.loc);
- expect_punct(p, ')', "')' after __atomic_compare_exchange_n");
-
- cg_set_loc(p->cg, loc);
- cg_atomic_cas(p->cg, (MemOrder)succ, (MemOrder)fail);
- /* Stack: [prior, ok]. */
-
- /* Stash ok. */
- const Type* ok_ty = cg_top_type(p->cg);
- FrameSlotDesc okd; memset(&okd, 0, sizeof okd);
- okd.type = ok_ty; okd.size = 4; okd.align = 4; okd.kind = FS_LOCAL;
- FrameSlot okslot = cg_local(p->cg, &okd);
- cg_push_local_typed(p->cg, okslot, ok_ty);
- cg_swap(p->cg); cg_store(p->cg); cg_drop(p->cg); /* [prior] */
-
- /* Stash prior. */
- FrameSlotDesc pd; memset(&pd, 0, sizeof pd);
- pd.type = val_ty;
- pd.size = abi_sizeof(p->abi, val_ty);
- pd.align = abi_alignof(p->abi, val_ty);
- pd.kind = FS_LOCAL;
- FrameSlot pslot = cg_local(p->cg, &pd);
- cg_push_local_typed(p->cg, pslot, val_ty);
- cg_swap(p->cg); cg_store(p->cg); cg_drop(p->cg); /* [] */
-
- /* if (!ok) *expected = prior; */
- cg_push_local_typed(p->cg, okslot, ok_ty);
- cg_load(p->cg);
- CGLabel L_done = cg_label_new(p->cg);
- cg_branch_true(p->cg, L_done);
- /* writeback */
- cg_push_local_typed(p->cg, eslot, eptr_ty);
- cg_load(p->cg);
- cg_deref(p->cg, val_ty);
- cg_push_local_typed(p->cg, pslot, val_ty);
- cg_load(p->cg);
- cg_store(p->cg); cg_drop(p->cg);
- cg_label_place(p->cg, L_done);
-
- /* Push ok as the i32 result. */
- cg_push_local_typed(p->cg, okslot, ok_ty);
- cg_load(p->cg);
- return 1;
- }
-
- /* The rmw family — exchange / fetch_{add,sub,and,or,xor} share the same
- * (ptr, val, order) shape; map name → AtomicOp. */
- AtomicOp op;
- if (name == p->sym_a_exchange_n) op = AO_XCHG;
- else if (name == p->sym_a_fetch_add) op = AO_ADD;
- else if (name == p->sym_a_fetch_sub) op = AO_SUB;
- else if (name == p->sym_a_fetch_and) op = AO_AND;
- else if (name == p->sym_a_fetch_or) op = AO_OR;
- else if (name == p->sym_a_fetch_xor) op = AO_XOR;
- else { perr(p, "internal: unhandled builtin"); }
-
- parse_assign_expr(p);
- to_rvalue(p);
- expect_punct(p, ',', "',' in atomic builtin");
- parse_assign_expr(p);
- to_rvalue(p);
- expect_punct(p, ',', "',' in atomic builtin");
- i64 ord = eval_const_int(p, p->cur.loc);
- expect_punct(p, ')', "')' after atomic builtin");
- cg_set_loc(p->cg, loc);
- cg_atomic_rmw(p->cg, op, (MemOrder)ord);
- return 1;
-}
-
-static void parse_primary(Parser* p) {
- Tok t = p->cur;
- if (t.kind == TOK_NUM) {
- i64 v = parse_int_literal(p, &t);
- const Type* lty = int_literal_type(p, &t);
- advance(p);
- cg_push_int(p->cg, v, lty);
- return;
- }
- if (t.kind == TOK_FLT) {
- double v = parse_float_literal(p, &t);
- const Type* lty = float_literal_type(p, &t);
- advance(p);
- cg_push_float(p->cg, v, lty);
- return;
- }
- if (is_punct(&t, '(')) {
- advance(p);
- parse_expr(p);
- expect_punct(p, ')', "')'");
- return;
- }
- if (t.kind == TOK_IDENT) {
- SymEntry* e;
- /* Reject keywords used as expressions. */
- if (ident_kw(p, t.v.ident) != KW_NONE) {
- perr(p, "unexpected keyword in expression");
- }
- /* Phase 9 — Builtins. Intercepted before scope_lookup because they
- * have no SymEntry: `__builtin_*` and `__atomic_*` followed by `(`
- * route to dedicated cg primitives (or fold to constants) instead
- * of going through cg_call. */
- {
- Tok n = peek1(p);
- if (is_punct(&n, '(') && try_parse_builtin_call(p)) return;
- }
- e = scope_lookup(p, t.v.ident);
- if (!e) {
- size_t nlen = 0;
- const char* nm = pool_str(p->pool, t.v.ident, &nlen);
- compiler_panic(p->c, t.loc, "undeclared identifier '%.*s'", (int)nlen,
- nm ? nm : "?");
- }
- advance(p);
- switch (e->kind) {
- case SEK_LOCAL:
- cg_push_local_typed(p->cg, e->v.slot, e->type);
- if (e->vla_byte_slot != FRAME_SLOT_NONE) {
- p->last_pushed_vla_slot = e->vla_byte_slot;
- }
- return;
- case SEK_GLOBAL:
- case SEK_FUNC:
- cg_push_global(p->cg, e->v.sym, e->type);
- return;
- case SEK_ENUM_CST:
- cg_push_int(p->cg, e->v.enum_value, e->type);
- return;
- case SEK_TYPEDEF:
- default:
- perr(p, "identifier is not a value");
- }
- }
- if (t.kind == TOK_CHR) {
- i64 v = decode_char_literal(p, &t);
- advance(p);
- cg_push_int(p->cg, v, ty_int(p));
- return;
- }
- if (t.kind == TOK_STR) {
- /* Per §6.4.5 ¶6 the literal has type `char[N]` (N includes the NUL).
- * Push it as a GLOBAL lvalue of that array type so sizeof/_Alignof see
- * the array, subscripting computes the right element offset, and the
- * normal array-to-pointer decay in to_rvalue / parse_postfix '[' kicks
- * in everywhere else. */
- size_t n = 0;
- u8* bytes = decode_string_literal(p, &t, &n);
- ObjSymId sym = emit_string_to_rodata(p, bytes, n);
- p->c->env->heap->free(p->c->env->heap, bytes, 0);
- advance(p);
- {
- const Type* char_ty = type_prim(p->pool, TY_CHAR);
- const Type* arr_ty = type_array(p->pool, char_ty, (u32)n, 0);
- cg_push_global(p->cg, sym, arr_ty);
- }
- return;
- }
- perr(p, "expected expression");
-}
-
-static void parse_postfix(Parser* p) {
- parse_primary(p);
- for (;;) {
- Tok t = p->cur;
- if (is_punct(&t, P_INC)) {
- advance(p);
- cg_inc_dec(p->cg, BO_IADD, /*post=*/1);
- continue;
- }
- if (is_punct(&t, P_DEC)) {
- advance(p);
- cg_inc_dec(p->cg, BO_ISUB, /*post=*/1);
- continue;
- }
- if (is_punct(&t, '(')) {
- /* Function call. The callee was pushed by parse_primary as a function
- * lvalue (OPK_GLOBAL when SEK_FUNC); a function-pointer callee is also
- * accepted (TY_PTR-to-TY_FUNC) — load it to a register and indirect. */
- const Type* top = cg_top_type(p->cg);
- const Type* fn_type;
- if (top && top->kind == TY_FUNC) {
- fn_type = top;
- } else if (top && top->kind == TY_PTR && top->ptr.pointee &&
- top->ptr.pointee->kind == TY_FUNC) {
- fn_type = top->ptr.pointee;
- /* Materialize the pointer rvalue (cg_call's force_reg fallback would
- * also do this, but doing it here keeps the invariant that the value
- * stack settles to a register before argument evaluation starts). */
- cg_load(p->cg);
- } else {
- perr(p, "called object is not a function");
- }
- advance(p); /* '(' */
- u32 nargs = 0;
- if (!is_punct(&p->cur, ')')) {
- for (;;) {
- parse_assign_expr(p);
- to_rvalue(p);
- ++nargs;
- if (!accept_punct(p, ',')) break;
- }
- }
- expect_punct(p, ')', "')' after argument list");
- if (fn_type->fn.nparams != nargs && !fn_type->fn.variadic) {
- perr(p, "wrong number of arguments");
- }
- if (fn_type->fn.variadic && nargs < fn_type->fn.nparams) {
- perr(p, "too few arguments to variadic function");
- }
- cg_call(p->cg, nargs, fn_type);
- /* cg_call leaves nothing on the stack for void-returning functions.
- * Higher-level expression machinery (drop in stmt context, dispatch
- * inside ternary, etc.) expects a top SValue, so push a sentinel
- * int 0. Using the value of a void-returning call is invalid C; the
- * sentinel just keeps stack discipline so the parser doesn't
- * underflow on `f();` style statements. */
- if (fn_type->fn.ret && fn_type->fn.ret->kind == TY_VOID) {
- cg_push_int(p->cg, 0, ty_int(p));
- }
- continue;
- }
- if (is_punct(&t, '[')) {
- /* Subscript `e1[e2]` is `*((e1) + (e2))` per §6.5.2.1. We resolve the
- * pointer side after parsing the index so the commutative `i[a]` form
- * (where the bracketed side is the pointer/array) falls out naturally. */
- const Type* lt0 = cg_top_type(p->cg);
- advance(p); /* '[' */
- /* If the left operand is an array/pointer, decay/load to get a pointer
- * rvalue. Integer base is left alone — we'll commute below if needed. */
- if (lt0 && lt0->kind == TY_ARRAY) {
- cg_addr(p->cg);
- cg_retag_top(p->cg, type_ptr(p->pool, lt0->arr.elem));
- } else if (lt0 && lt0->kind == TY_PTR) {
- cg_load(p->cg);
- }
- parse_expr(p);
- /* Decay/load the index side similarly. */
- {
- const Type* it0 = cg_top_type(p->cg);
- if (it0 && it0->kind == TY_ARRAY) {
- cg_addr(p->cg);
- cg_retag_top(p->cg, type_ptr(p->pool, it0->arr.elem));
- } else {
- to_rvalue(p);
- }
- }
- expect_punct(p, ']', "']' after subscript");
- {
- const Type* lt = cg_top2_type(p->cg);
- const Type* it = cg_top_type(p->cg);
- const Type* elem;
- if (lt && lt->kind == TY_PTR && type_is_int(it)) {
- elem = lt->ptr.pointee;
- } else if (it && it->kind == TY_PTR && type_is_int(lt)) {
- /* Commute so the pointer is on the bottom for the add below. */
- cg_swap(p->cg);
- elem = it->ptr.pointee;
- } else {
- perr(p, "invalid subscript: needs one pointer and one integer");
- }
- if (!elem) perr(p, "subscript on incomplete pointee");
- u32 esz = abi_sizeof(p->abi, elem);
- if (esz != 1) {
- cg_push_int(p->cg, (i64)esz, ty_size_t(p));
- cg_binop(p->cg, BO_IMUL);
- }
- cg_binop(p->cg, BO_IADD);
- cg_deref(p->cg, elem);
- }
- continue;
- }
- if (is_punct(&t, '.')) {
- /* `e.member` — `e` must be an lvalue of struct/union type. The result
- * is an lvalue of the field's type with the same address category as
- * the parent (LOCAL/GLOBAL/INDIRECT all collapse to INDIRECT once we
- * take the address). Anonymous aggregate members are flattened by
- * find_field. */
- const Type* lt = cg_top_type(p->cg);
- Sym mname;
- const Type* mty = NULL;
- u32 moff = 0;
- const Field* mf = NULL;
- advance(p); /* '.' */
- if (!lt || (lt->kind != TY_STRUCT && lt->kind != TY_UNION)) {
- perr(p, "request for member in something that is not a struct or union");
- }
- if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) {
- perr(p, "expected member name after '.'");
- }
- mname = p->cur.v.ident;
- advance(p);
- if (!find_field(p->abi, lt, mname, &mty, &moff, &mf)) {
- perr(p, "no such member");
- }
- cg_addr(p->cg);
- cg_retag_top(p->cg, type_ptr(p->pool, mty));
- if (moff > 0) {
- cg_push_int(p->cg, (i64)moff, ty_size_t(p));
- cg_binop(p->cg, BO_IADD);
- }
- cg_deref(p->cg, mty);
- continue;
- }
- if (is_punct(&t, P_ARROW)) {
- /* `e->member` — `e` must be a pointer to struct/union. */
- const Type* lt0;
- const Type* rec_ty;
- Sym mname;
- const Type* mty = NULL;
- u32 moff = 0;
- const Field* mf = NULL;
- advance(p); /* `->` */
- to_rvalue(p);
- lt0 = cg_top_type(p->cg);
- if (!lt0 || lt0->kind != TY_PTR) {
- perr(p, "'->' requires a pointer operand");
- }
- rec_ty = lt0->ptr.pointee;
- if (!rec_ty || (rec_ty->kind != TY_STRUCT && rec_ty->kind != TY_UNION)) {
- perr(p, "'->' on pointer to non-struct/union");
- }
- if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) {
- perr(p, "expected member name after '->'");
- }
- mname = p->cur.v.ident;
- advance(p);
- if (!find_field(p->abi, rec_ty, mname, &mty, &moff, &mf)) {
- perr(p, "no such member");
- }
- if (moff > 0) {
- cg_push_int(p->cg, (i64)moff, ty_size_t(p));
- cg_binop(p->cg, BO_IADD);
- }
- cg_deref(p->cg, mty);
- continue;
- }
- break;
- }
-}
-
-/* sizeof / _Alignof and cast all parse a type-name from inside parentheses;
- * detection at `(` requires looking past the opening paren. The work is the
- * same: dispatch on what comes next. */
-static void parse_unary(Parser* p) {
- Tok t = p->cur;
- /* Cast expression `(type-name) cast`. Disambiguated against `(expr)`
- * by checking the token immediately after `(`. */
- if (is_punct(&t, '(')) {
- Tok n = peek1(p);
- if (starts_type_name(p, &n)) {
- const Type* dst;
- const Type* src;
- advance(p); /* '(' */
- dst = parse_type_name(p);
- expect_punct(p, ')', "')' after type-name");
- /* Compound literal `(type-name) { init-list }` per §6.5.2.5. The
- * literal has automatic storage in the enclosing block (function
- * scope here — same lifetime as a local). Allocate a hidden frame
- * slot, parse the brace initializer into it, and push the slot's
- * lvalue. Outer postfix/cast machinery handles array-to-pointer
- * decay if the consumer expects an rvalue. */
- if (is_punct(&p->cur, '{')) {
- FrameSlotDesc fsd;
- FrameSlot slot;
- const Type* lit_ty = dst;
- if (lit_ty && lit_ty->kind == TY_ARRAY && lit_ty->arr.incomplete) {
- lit_ty = complete_incomplete_array(p, lit_ty);
- }
- memset(&fsd, 0, sizeof fsd);
- fsd.type = lit_ty;
- fsd.size = abi_sizeof(p->abi, lit_ty);
- fsd.align = abi_alignof(p->abi, lit_ty);
- fsd.kind = FS_LOCAL;
- fsd.flags = FSF_NONE;
- slot = cg_local(p->cg, &fsd);
- if (lit_ty && (lit_ty->kind == TY_ARRAY || lit_ty->kind == TY_STRUCT ||
- lit_ty->kind == TY_UNION)) {
- init_at(p, slot, lit_ty, 0, lit_ty);
- } else {
- /* Scalar compound literal `(int){42}`. */
- init_at(p, slot, lit_ty, 0, lit_ty);
- }
- cg_push_local_typed(p->cg, slot, lit_ty);
- return;
- }
- parse_unary(p); /* cast-expression */
- to_rvalue(p);
- /* `(void) expr` is the C idiom for "discard the value"; we must not
- * convert (no value to materialize) — drop the rvalue and push
- * nothing. The corpus relies on this for `(void)42;` style stmts. */
- if (dst && dst->kind == TY_VOID) {
- cg_drop(p->cg);
- /* Leave nothing on stack. parse_stmt's expression-stmt path drops
- * the result; our caller is parse_unary, so leave the stack
- * exactly empty and synthesize a sentinel int 0 to keep value-
- * stack discipline (so to_rvalue from a higher level still has
- * a top). The expression `(void)e` cannot appear where a value
- * is required, so this is dead-but-harmless. */
- cg_push_int(p->cg, 0, ty_int(p));
- return;
- }
- src = cg_top_type(p->cg);
- /* Pointer-to-pointer cast is a no-op at the value level once the
- * pointer is already in a register. Skip cg_convert (which would
- * dispatch to the backend's same-class bitcast, not implemented for
- * register-resident pointers). Update the SValue's type so later
- * dereference picks the right pointee — easiest done by re-pushing
- * with the new type. */
- if (src && src->kind == TY_PTR && dst->kind == TY_PTR) {
- cg_retag_top(p->cg, dst);
- return;
- }
- cg_convert(p->cg, dst);
- return;
- }
- /* fall through to parse_postfix → parse_primary which handles `(expr)`. */
- }
- if (is_punct(&t, '+')) {
- advance(p);
- parse_unary(p);
- to_rvalue(p);
- return;
- }
- if (is_punct(&t, '-')) {
- advance(p);
- parse_unary(p);
- to_rvalue(p);
- cg_unop(p->cg, UO_NEG);
- return;
- }
- if (is_punct(&t, '!')) {
- advance(p);
- parse_unary(p);
- to_rvalue(p);
- /* Logical not via cmp == 0. */
- cg_push_int(p->cg, 0, ty_int(p));
- cg_cmp(p->cg, CMP_EQ);
- return;
- }
- if (is_punct(&t, '~')) {
- advance(p);
- parse_unary(p);
- to_rvalue(p);
- cg_unop(p->cg, UO_BNOT);
- return;
- }
- if (is_punct(&t, '&')) {
- advance(p);
- parse_unary(p);
- /* The operand is required to be an lvalue; cg_addr panics otherwise. */
- cg_addr(p->cg);
- return;
- }
- if (is_punct(&t, '*')) {
- /* Dereference: parse the operand, force to a pointer rvalue, then
- * derive the INDIRECT lvalue. The pointee type drives the next access. */
- const Type* pty;
- const Type* pointee;
- advance(p);
- parse_unary(p);
- to_rvalue(p);
- pty = cg_top_type(p->cg);
- if (!pty || pty->kind != TY_PTR) {
- perr(p, "indirection requires pointer operand");
- }
- pointee = pty->ptr.pointee;
- if (pointee && pointee->kind == TY_VOID) {
- perr(p, "dereferencing pointer to incomplete type");
- }
- cg_deref(p->cg, pointee);
- return;
- }
- if (is_punct(&t, P_INC) || is_punct(&t, P_DEC)) {
- BinOp bop = is_punct(&t, P_INC) ? BO_IADD : BO_ISUB;
- advance(p);
- parse_unary(p);
- cg_inc_dec(p->cg, bop, /*post=*/0);
- return;
- }
- if (is_kw(p, &t, KW_SIZEOF)) {
- /* sizeof has two forms: `sizeof ( type-name )` and `sizeof unary`.
- * The expression form must NOT evaluate its operand (per §6.5.3.4),
- * which is awkward in single-pass codegen. The Phase 1 corpus only
- * needs `sizeof(type-name)` and `sizeof(IDENT)` where IDENT is a
- * declared object — both reducible to a type lookup with no
- * emission. Other expression forms are diagnosed. */
- const Type* ty = NULL;
- FrameSlot vla_slot = FRAME_SLOT_NONE;
- advance(p);
- if (is_punct(&p->cur, '(')) {
- Tok n = peek1(p);
- if (starts_type_name(p, &n)) {
- advance(p);
- ty = parse_type_name(p);
- expect_punct(p, ')', "')'");
- } else {
- /* `sizeof ( expression )` — parenthesized expression form. Fall
- * through to the unary path so paren-primary handles the `(`. */
- p->last_pushed_vla_slot = FRAME_SLOT_NONE;
- parse_unary(p);
- ty = cg_top_type(p->cg);
- vla_slot = p->last_pushed_vla_slot;
- cg_drop(p->cg);
- }
- } else {
- /* `sizeof unary-expression`: §6.5.3.4 says the operand is not
- * evaluated. We parse it through the regular unary path and grab
- * its type from the cg stack, then drop. lvalues stay as lvalues
- * (no load is emitted) so for the corpus shapes (array, subscript,
- * member access) this is side-effect-free. VLA operands need
- * actual evaluation and are deferred. */
- p->last_pushed_vla_slot = FRAME_SLOT_NONE;
- parse_unary(p);
- ty = cg_top_type(p->cg);
- vla_slot = p->last_pushed_vla_slot;
- cg_drop(p->cg);
- }
- if (vla_slot != FRAME_SLOT_NONE) {
- /* sizeof on a VLA-bound IDENT: emit the runtime byte-size load
- * instead of the constant pointer width. */
- cg_push_local_typed(p->cg, vla_slot, ty_size_t(p));
- cg_load(p->cg);
- } else {
- cg_push_int(p->cg, (i64)abi_sizeof(p->abi, ty), ty_size_t(p));
- }
- return;
- }
- if (is_kw(p, &t, KW_GENERIC)) {
- /* `_Generic ( controlling-expr , generic-association+ )`
- *
- * §6.5.1.1: the controlling-expression is not evaluated. Single-pass
- * codegen makes that awkward — we instead evaluate it (cheap when the
- * spine has no side-effecting operands), drop the value, and then
- * emit code only for the matching association. Non-matching
- * associations are token-skipped with paren/bracket/brace balancing
- * so their assignment-expressions don't run. */
- advance(p);
- expect_punct(p, '(', "'('");
- parse_assign_expr(p);
- to_rvalue(p);
- const Type* ctl_ty = cg_top_type(p->cg);
- cg_drop(p->cg);
- expect_punct(p, ',', "','");
- /* Walk associations. Track default position for use if no type
- * matches. We need to be able to "rewind" — but the parser is
- * single-pass, so the strategy is: first, scan associations once,
- * recording the offset of each (in token bytes). We can't rewind
- * tokens cheaply, so a different strategy: walk left-to-right,
- * skipping non-matching assoc-exprs by token-balancing; on the
- * first match, parse-and-emit the assoc-expr; on subsequent
- * associations after a match, skip. If no match, fall back to
- * default at end (we have to remember whether we saw default and
- * its tokens are gone — so we record default position by buffering
- * the default's sub-expr tokens... actually simplest: scan once,
- * skipping every assoc-expr (no codegen), recording the matched
- * one's parse position. Pp doesn't support rewind cheaply.
- *
- * Workable trick: since the parser is recursive-descent, we can
- * "peek" tokens by reading until we find the matching assoc, then
- * parse it once we're inside it. But that requires per-token
- * lookahead beyond what `peek1` offers. Practical compromise: walk
- * once, parsing the FIRST matching association inline (emitting
- * code there), then skipping the rest. If we hit `default:` first
- * before any match, buffer-skip and remember its location is
- * impossible. So scan twice: pass 1 collects assoc types and
- * positions (no codegen), pass 2 parses the chosen one. To do
- * this we'd need a mark-and-rewind on the pp stream.
- *
- * Phase-3 pragmatic implementation: walk once. Parse each assoc-
- * expr unconditionally into a no-op buffer when its type doesn't
- * match — but again, we don't have a no-op codegen path.
- *
- * Workable compromise that covers the corpus row
- * `_Generic((x), int: x, default: 0)`: walk associations
- * left-to-right. For each:
- * - Parse the assoc type-name (or `default`).
- * - If we have not yet emitted a result and this assoc matches
- * (or is default and we're at the end without a prior match),
- * parse the assoc expression and emit. Otherwise skip the
- * assoc-expr by balanced token count.
- * - The default is held back until after a non-default scan.
- * Without a real rewind, we instead make a single pass that
- * remembers whether default has appeared, and on no match
- * panics with a directive that the corpus row doesn't trigger.
- *
- * For the corpus row the controlling expr is `int`, the first
- * association is `int:`, so the first-match path is hit before
- * default. */
- int emitted = 0;
- /* Buffer for the `default:` association's expression tokens, so we
- * can replay it if no typed association matches. Recording happens
- * at most once (the C standard allows at most one default). The
- * trailing `,` or `)` that ended the recording is included so the
- * replayed parse_assign_expr stops cleanly at the same boundary. */
- Tok* default_buf = NULL;
- u32 default_len = 0;
- for (;;) {
- const Type* assoc_ty = NULL;
- int is_default = 0;
- if (is_kw(p, &p->cur, KW_DEFAULT)) {
- advance(p);
- is_default = 1;
- } else {
- assoc_ty = parse_type_name(p);
- }
- expect_punct(p, ':', "':' in _Generic association");
- int take = 0;
- if (!emitted && !is_default && ctl_ty && assoc_ty &&
- ctl_ty->kind == assoc_ty->kind) {
- take = 1;
- }
- if (take) {
- parse_assign_expr(p);
- emitted = 1;
- } else if (is_default && !default_buf) {
- /* Record default's assoc-expr tokens for later replay. */
- u32 cap = 16;
- Tok* buf = arena_array(p->c->tu, Tok, cap);
- u32 len = 0;
- int paren_depth = 0, brack_depth = 0, brace_depth = 0;
- while (p->cur.kind != TOK_EOF) {
- if (paren_depth == 0 && brack_depth == 0 && brace_depth == 0) {
- if (is_punct(&p->cur, ',') || is_punct(&p->cur, ')')) break;
- }
- if (len == cap) {
- u32 new_cap = cap * 2;
- Tok* nv = arena_array(p->c->tu, Tok, new_cap);
- if (!nv) perr(p, "out of memory recording _Generic default");
- memcpy(nv, buf, len * sizeof(Tok));
- buf = nv;
- cap = new_cap;
- }
- buf[len++] = p->cur;
- if (is_punct(&p->cur, '(')) ++paren_depth;
- else if (is_punct(&p->cur, ')')) --paren_depth;
- else if (is_punct(&p->cur, '[')) ++brack_depth;
- else if (is_punct(&p->cur, ']')) --brack_depth;
- else if (is_punct(&p->cur, '{')) ++brace_depth;
- else if (is_punct(&p->cur, '}')) --brace_depth;
- advance(p);
- }
- /* Append a sentinel `,` so the replayed parse_assign_expr
- * stops cleanly without falling through to pp_next. */
- if (len == cap) {
- u32 new_cap = cap + 1;
- Tok* nv = arena_array(p->c->tu, Tok, new_cap);
- if (!nv) perr(p, "out of memory recording _Generic default");
- memcpy(nv, buf, len * sizeof(Tok));
- buf = nv;
- cap = new_cap;
- }
- memset(&buf[len], 0, sizeof(Tok));
- buf[len].kind = TOK_PUNCT;
- buf[len].v.punct = ',';
- ++len;
- default_buf = buf;
- default_len = len;
- } else {
- /* Skip assoc-expr by token-balancing. */
- int paren_depth = 0;
- int brack_depth = 0;
- int brace_depth = 0;
- while (p->cur.kind != TOK_EOF) {
- if (paren_depth == 0 && brack_depth == 0 && brace_depth == 0) {
- if (is_punct(&p->cur, ',') || is_punct(&p->cur, ')')) break;
- }
- if (is_punct(&p->cur, '(')) ++paren_depth;
- else if (is_punct(&p->cur, ')')) --paren_depth;
- else if (is_punct(&p->cur, '[')) ++brack_depth;
- else if (is_punct(&p->cur, ']')) --brack_depth;
- else if (is_punct(&p->cur, '{')) ++brace_depth;
- else if (is_punct(&p->cur, '}')) --brace_depth;
- advance(p);
- }
- }
- if (!accept_punct(p, ',')) break;
- }
- if (!emitted && default_buf) {
- /* No typed association matched; replay the default's recorded
- * assoc-expr through the replay buffer, then resume the original
- * stream at the `)`. */
- Tok* save_replay = p->replay;
- u32 save_cap = p->replay_cap;
- u32 save_len = p->replay_len;
- u32 save_pos = p->replay_pos;
- u8 save_active = p->replay_active;
- Tok save_cur = p->cur;
- int save_has_next = p->has_next;
- p->replay = default_buf;
- p->replay_cap = default_len;
- p->replay_len = default_len;
- p->replay_pos = 1;
- p->replay_active = 1;
- p->cur = default_buf[0];
- p->has_next = 0;
- parse_assign_expr(p);
- emitted = 1;
- /* Restore the outer stream — we don't consume the trailing
- * sentinel `,` from the recorded buffer; callers expect cur = `)`
- * after the loop. */
- p->replay = save_replay;
- p->replay_cap = save_cap;
- p->replay_len = save_len;
- p->replay_pos = save_pos;
- p->replay_active = save_active;
- p->cur = save_cur;
- p->has_next = save_has_next;
- }
- expect_punct(p, ')', "')' after _Generic");
- if (!emitted) {
- perr(p, "_Generic: no association matched and no default present");
- }
- return;
- }
- if (is_kw(p, &t, KW_ALIGNOF)) {
- /* `_Alignof ( type-name )` per §6.5.3.4 ¶1. The GNU `__alignof__`
- * alias additionally accepts an expression operand, mirroring sizeof.
- * Disambiguate at the `(`: type-name → parse_type_name; otherwise
- * route through parse_unary, read the operand's type, drop. */
- const Type* ty;
- advance(p);
- expect_punct(p, '(', "'('");
- if (starts_type_name(p, &p->cur)) {
- ty = parse_type_name(p);
- } else {
- parse_unary(p);
- ty = cg_top_type(p->cg);
- cg_drop(p->cg);
- }
- expect_punct(p, ')', "')'");
- cg_push_int(p->cg, (i64)abi_alignof(p->abi, ty), ty_size_t(p));
- return;
- }
- parse_postfix(p);
- /* postfix may have left an lvalue or rvalue. Higher-level callers
- * issue to_rvalue when they need the value. */
-}
-
-/* Binary operator levels: each takes a `next` pointer to the tighter level
- * and a list of accepted operators with their codegen mapping. Inlined as
- * a single function per level to keep the call graph readable. */
-
-static int type_is_fp(const Type* t) {
- return t && (t->kind == TY_FLOAT || t->kind == TY_DOUBLE ||
- t->kind == TY_LDOUBLE);
-}
-
-/* §6.3.1.8 usual arithmetic conversions (FP slice). When either operand
- * is FP, both convert to the wider FP type. When both are integer, the
- * caller's existing integer dispatch handles it. Returns the common
- * arithmetic type, or NULL if the parser should fall through to integer
- * dispatch. */
-static const Type* common_fp_type(Parser* p, const Type* a, const Type* b) {
- if (!type_is_fp(a) && !type_is_fp(b)) return NULL;
- /* `long double` not yet wired through cg's FP path. */
- if ((a && a->kind == TY_LDOUBLE) || (b && b->kind == TY_LDOUBLE)) {
- return type_prim(p->pool, TY_LDOUBLE);
- }
- if ((a && a->kind == TY_DOUBLE) || (b && b->kind == TY_DOUBLE)) {
- return type_prim(p->pool, TY_DOUBLE);
- }
- return type_prim(p->pool, TY_FLOAT);
-}
-
-/* Coerce the top two stack values to `common`, then dispatch the FP form
- * of `bop` (BO_IADD→BO_FADD, etc.). */
-static void emit_fp_binop(Parser* p, BinOp bop, const Type* common) {
- /* Convert top (rhs) first; cg_convert pops+pushes, leaving stack
- * shape unchanged. Then swap, convert lhs, swap back so [lhs, rhs]
- * land in the right order for cg_binop. */
- if (cg_top_type(p->cg) != common) cg_convert(p->cg, common);
- cg_swap(p->cg);
- if (cg_top_type(p->cg) != common) cg_convert(p->cg, common);
- cg_swap(p->cg);
- BinOp fop;
- switch (bop) {
- case BO_IADD: fop = BO_FADD; break;
- case BO_ISUB: fop = BO_FSUB; break;
- case BO_IMUL: fop = BO_FMUL; break;
- case BO_SDIV: fop = BO_FDIV; break;
- default:
- perr(p, "operator does not apply to floating types");
- return;
- }
- cg_binop(p->cg, fop);
-}
-
-static void parse_mul(Parser* p) {
- parse_unary(p);
- for (;;) {
- Tok t = p->cur;
- BinOp bop;
- if (is_punct(&t, '*')) {
- bop = BO_IMUL;
- } else if (is_punct(&t, '/')) {
- bop = BO_SDIV;
- } else if (is_punct(&t, '%')) {
- bop = BO_SREM;
- } else {
- break;
- }
- advance(p);
- to_rvalue(p);
- parse_unary(p);
- to_rvalue(p);
- const Type* lt = cg_top2_type(p->cg);
- const Type* rt = cg_top_type(p->cg);
- const Type* common = common_fp_type(p, lt, rt);
- if (common) {
- emit_fp_binop(p, bop, common);
- } else {
- cg_binop(p->cg, bop);
- }
- }
-}
-
-/* Apply C pointer arithmetic to the top two values on the stack:
- * ptr + int → ptr + int * sizeof(*ptr)
- * int + ptr → ptr + int * sizeof(*ptr) (commute, then scale)
- * ptr - int → ptr - int * sizeof(*ptr)
- * ptr - ptr → (ptr - ptr) / sizeof(*ptr) (ptrdiff_t result)
- * int +/- int → integer add/sub
- * Pops both operands and pushes the result. */
-static void emit_add_or_sub(Parser* p, BinOp bop) {
- const Type* lt = cg_top2_type(p->cg);
- const Type* rt = cg_top_type(p->cg);
- int l_is_ptr = lt && lt->kind == TY_PTR;
- int r_is_ptr = rt && rt->kind == TY_PTR;
- if (bop == BO_IADD) {
- if (l_is_ptr && type_is_int(rt)) {
- u32 esz = abi_sizeof(p->abi, lt->ptr.pointee);
- if (esz != 1) {
- cg_push_int(p->cg, (i64)esz, ty_size_t(p));
- cg_binop(p->cg, BO_IMUL);
- }
- cg_binop(p->cg, BO_IADD);
- return;
- }
- if (r_is_ptr && type_is_int(lt)) {
- cg_swap(p->cg);
- u32 esz = abi_sizeof(p->abi, rt->ptr.pointee);
- if (esz != 1) {
- cg_push_int(p->cg, (i64)esz, ty_size_t(p));
- cg_binop(p->cg, BO_IMUL);
- }
- cg_binop(p->cg, BO_IADD);
- return;
- }
- } else { /* BO_ISUB */
- if (l_is_ptr && type_is_int(rt)) {
- u32 esz = abi_sizeof(p->abi, lt->ptr.pointee);
- if (esz != 1) {
- cg_push_int(p->cg, (i64)esz, ty_size_t(p));
- cg_binop(p->cg, BO_IMUL);
- }
- cg_binop(p->cg, BO_ISUB);
- return;
- }
- if (l_is_ptr && r_is_ptr) {
- u32 esz = abi_sizeof(p->abi, lt->ptr.pointee);
- cg_binop(p->cg, BO_ISUB);
- if (esz != 1) {
- cg_push_int(p->cg, (i64)esz, ty_size_t(p));
- cg_binop(p->cg, BO_SDIV);
- }
- return;
- }
- }
- const Type* common = common_fp_type(p, lt, rt);
- if (common) {
- emit_fp_binop(p, bop, common);
- return;
- }
- cg_binop(p->cg, bop);
-}
-
-static void parse_add(Parser* p) {
- parse_mul(p);
- for (;;) {
- Tok t = p->cur;
- BinOp bop;
- if (is_punct(&t, '+')) {
- bop = BO_IADD;
- } else if (is_punct(&t, '-')) {
- bop = BO_ISUB;
- } else {
- break;
- }
- advance(p);
- to_rvalue(p);
- parse_mul(p);
- to_rvalue(p);
- emit_add_or_sub(p, bop);
- }
-}
-
-static void parse_shift(Parser* p) {
- parse_add(p);
- for (;;) {
- Tok t = p->cur;
- BinOp bop;
- if (is_punct(&t, P_SHL)) {
- bop = BO_SHL;
- } else if (is_punct(&t, P_SHR)) {
- bop = BO_SHR_S;
- } else {
- break;
- }
- advance(p);
- to_rvalue(p);
- parse_add(p);
- to_rvalue(p);
- cg_binop(p->cg, bop);
- }
-}
-
-static void parse_rel(Parser* p) {
- parse_shift(p);
- for (;;) {
- Tok t = p->cur;
- CmpOp cop;
- if (is_punct(&t, '<')) {
- cop = CMP_LT_S;
- } else if (is_punct(&t, '>')) {
- cop = CMP_GT_S;
- } else if (is_punct(&t, P_LE)) {
- cop = CMP_LE_S;
- } else if (is_punct(&t, P_GE)) {
- cop = CMP_GE_S;
- } else {
- break;
- }
- advance(p);
- to_rvalue(p);
- parse_shift(p);
- to_rvalue(p);
- cg_cmp(p->cg, cop);
- }
-}
-
-static void parse_eq(Parser* p) {
- parse_rel(p);
- for (;;) {
- Tok t = p->cur;
- CmpOp cop;
- if (is_punct(&t, P_EQ)) {
- cop = CMP_EQ;
- } else if (is_punct(&t, P_NE)) {
- cop = CMP_NE;
- } else {
- break;
- }
- advance(p);
- to_rvalue(p);
- parse_rel(p);
- to_rvalue(p);
- cg_cmp(p->cg, cop);
- }
-}
-
-static void parse_band(Parser* p) {
- parse_eq(p);
- while (is_punct(&p->cur, '&')) {
- advance(p);
- to_rvalue(p);
- parse_eq(p);
- to_rvalue(p);
- cg_binop(p->cg, BO_AND);
- }
-}
-
-static void parse_bxor(Parser* p) {
- parse_band(p);
- while (is_punct(&p->cur, '^')) {
- advance(p);
- to_rvalue(p);
- parse_band(p);
- to_rvalue(p);
- cg_binop(p->cg, BO_XOR);
- }
-}
-
-static void parse_bor(Parser* p) {
- parse_bxor(p);
- while (is_punct(&p->cur, '|')) {
- advance(p);
- to_rvalue(p);
- parse_bxor(p);
- to_rvalue(p);
- cg_binop(p->cg, BO_OR);
- }
-}
-
-/* Logical && / || are short-circuiting: the right operand is evaluated
- * only when the left does not already determine the result. We lower
- * each as a label-driven branch sequence that materializes a 0/1 i32
- * result. Both produce an int rvalue regardless of operand types
- * (per §6.5.13/14).
- *
- * a && b lowers to: a || b lowers to:
- * <a>; jz Lfalse <a>; jnz Ltrue
- * <b>; jz Lfalse <b>; jnz Ltrue
- * store 1 → tmp; jmp Lend store 0 → tmp; jmp Lend
- * Lfalse: store 0 → tmp Ltrue: store 1 → tmp
- * Lend: load tmp Lend: load tmp
- *
- * The result is routed through a frame slot for the same reason ternary
- * is: cg's abstract value stack is linear-flow only, so a naive push
- * from each arm leaves two operands at the merge instead of one. */
-static FrameSlot ll_tmp_slot(Parser* p, const Type* ty) {
- FrameSlotDesc fsd;
- memset(&fsd, 0, sizeof fsd);
- fsd.type = ty;
- fsd.size = abi_sizeof(p->abi, ty);
- fsd.align = abi_alignof(p->abi, ty);
- fsd.kind = FS_LOCAL;
- fsd.flags = FSF_NONE;
- return cg_local(p->cg, &fsd);
-}
-
-static void ll_store_const(Parser* p, FrameSlot tmp, const Type* ty, i64 v) {
- cg_push_local_typed(p->cg, tmp, ty);
- cg_push_int(p->cg, v, ty);
- cg_store(p->cg);
- cg_drop(p->cg);
-}
-
-static void parse_land(Parser* p) {
- parse_bor(p);
- while (is_punct(&p->cur, P_AND)) {
- CGLabel L_false = cg_label_new(p->cg);
- CGLabel L_end = cg_label_new(p->cg);
- const Type* result_ty = ty_int(p);
- FrameSlot tmp = ll_tmp_slot(p, result_ty);
- advance(p);
- to_rvalue(p);
- cg_branch_false(p->cg, L_false);
- parse_bor(p);
- to_rvalue(p);
- cg_branch_false(p->cg, L_false);
- ll_store_const(p, tmp, result_ty, 1);
- cg_jump(p->cg, L_end);
- cg_label_place(p->cg, L_false);
- ll_store_const(p, tmp, result_ty, 0);
- cg_label_place(p->cg, L_end);
- cg_push_local_typed(p->cg, tmp, result_ty);
- }
-}
-
-static void parse_lor(Parser* p) {
- parse_land(p);
- while (is_punct(&p->cur, P_OR)) {
- CGLabel L_true = cg_label_new(p->cg);
- CGLabel L_end = cg_label_new(p->cg);
- const Type* result_ty = ty_int(p);
- FrameSlot tmp = ll_tmp_slot(p, result_ty);
- advance(p);
- to_rvalue(p);
- cg_branch_true(p->cg, L_true);
- parse_land(p);
- to_rvalue(p);
- cg_branch_true(p->cg, L_true);
- ll_store_const(p, tmp, result_ty, 0);
- cg_jump(p->cg, L_end);
- cg_label_place(p->cg, L_true);
- ll_store_const(p, tmp, result_ty, 1);
- cg_label_place(p->cg, L_end);
- cg_push_local_typed(p->cg, tmp, result_ty);
- }
-}
-
-/* Ternary `c ? t : f`. The cg value stack is linear-flow only, so a naive
- * "push from each arm" leaves the stack in an inconsistent state at the
- * merge point. We materialize the result through a fresh local: each arm
- * stores into the same slot, the merge label reloads. v1 picks the slot's
- * type from the then-arm and assumes the else-arm is the same type
- * (matches the §6.5.15 corpus rows; full usual-conversions rules slot in
- * with Phase 7).
- *
- * `&&` / `||` use the same temp-slot merge pattern (see parse_land /
- * parse_lor above); the ternary differs only in that its two arms are
- * arbitrary expressions rather than the constant 0/1. */
-static void parse_ternary(Parser* p) {
- parse_lor(p);
- if (!is_punct(&p->cur, '?')) return;
- CGLabel L_else = cg_label_new(p->cg);
- CGLabel L_end = cg_label_new(p->cg);
- const Type* result_ty = ty_int(p);
- FrameSlot tmp;
- FrameSlotDesc fsd;
- /* Pop the cond, branch on it. */
- advance(p); /* '?' */
- to_rvalue(p);
- cg_branch_false(p->cg, L_else);
- parse_assign_expr(p);
- to_rvalue(p);
- /* Update result_ty from the then-arm (a closer approximation than int). */
- result_ty = cg_top_type(p->cg);
- if (!result_ty) result_ty = ty_int(p);
- memset(&fsd, 0, sizeof fsd);
- fsd.type = result_ty;
- fsd.size = abi_sizeof(p->abi, result_ty);
- fsd.align = abi_alignof(p->abi, result_ty);
- fsd.kind = FS_LOCAL;
- fsd.flags = FSF_NONE;
- tmp = cg_local(p->cg, &fsd);
- /* Store then-arm value into tmp. cg_store needs [lv, rv]; the rvalue
- * is already on top, so push the lvalue and swap. */
- cg_push_local_typed(p->cg, tmp, result_ty);
- cg_swap(p->cg);
- cg_store(p->cg);
- cg_drop(p->cg); /* cg_store leaves the rvalue; drop in stmt-style usage */
- cg_jump(p->cg, L_end);
- cg_label_place(p->cg, L_else);
- expect_punct(p, ':', "':' in ternary");
- parse_assign_expr(p);
- to_rvalue(p);
- /* §6.5.15 ¶5 usual arithmetic conversions: if the else-arm's type
- * differs from the slot type chosen from the then-arm, coerce so the
- * store types line up. v1 only converts the else-arm down/up to match
- * the then-arm; full common-type widening lives behind the buffered-
- * arms rewrite that's still pending. */
- if (cg_top_type(p->cg) != result_ty) {
- cg_convert(p->cg, result_ty);
- }
- cg_push_local_typed(p->cg, tmp, result_ty);
- cg_swap(p->cg);
- cg_store(p->cg);
- cg_drop(p->cg);
- cg_label_place(p->cg, L_end);
- /* At the merge, push the slot lvalue; callers can to_rvalue if needed. */
- cg_push_local_typed(p->cg, tmp, result_ty);
-}
-
-static void parse_assign_expr(Parser* p) {
- parse_ternary(p);
- /* The LHS is now on the CG stack. If it's an lvalue we may consume it
- * for assignment; otherwise we keep the rvalue as the final result. */
- Tok t = p->cur;
- BinOp compound;
- int is_simple_assign;
- if (is_punct(&t, '=')) {
- is_simple_assign = 1;
- compound = (BinOp)0;
- } else if (is_punct(&t, P_ADD_ASSIGN)) {
- is_simple_assign = 0; compound = BO_IADD;
- } else if (is_punct(&t, P_SUB_ASSIGN)) {
- is_simple_assign = 0; compound = BO_ISUB;
- } else if (is_punct(&t, P_MUL_ASSIGN)) {
- is_simple_assign = 0; compound = BO_IMUL;
- } else if (is_punct(&t, P_DIV_ASSIGN)) {
- is_simple_assign = 0; compound = BO_SDIV;
- } else if (is_punct(&t, P_MOD_ASSIGN)) {
- is_simple_assign = 0; compound = BO_SREM;
- } else if (is_punct(&t, P_AND_ASSIGN)) {
- is_simple_assign = 0; compound = BO_AND;
- } else if (is_punct(&t, P_OR_ASSIGN)) {
- is_simple_assign = 0; compound = BO_OR;
- } else if (is_punct(&t, P_XOR_ASSIGN)) {
- is_simple_assign = 0; compound = BO_XOR;
- } else if (is_punct(&t, P_SHL_ASSIGN)) {
- is_simple_assign = 0; compound = BO_SHL;
- } else if (is_punct(&t, P_SHR_ASSIGN)) {
- is_simple_assign = 0; compound = BO_SHR_S;
- } else {
- return;
- }
- advance(p);
- if (is_simple_assign) {
- /* LHS lvalue is on stack. Parse RHS, store. The result of the
- * assignment is the assigned value; for the spine we leave the stack
- * empty after store (statement context), which is correct for
- * `x = expr;` and for the for-init `i = 1` since the value is
- * discarded. To support assignment-as-expression, we'd need to
- * cg_dup the LHS first and re-load after store. */
- parse_assign_expr(p);
- to_rvalue(p);
- coerce_top_to_lvalue(p);
- cg_store(p->cg);
- return;
- }
- /* Compound: x += y → load x, compute, store. We need to keep the LHS
- * lvalue and produce a new rvalue. Stack: [lv]. Sequence:
- * dup [lv, lv]
- * load [lv, x]
- * parse RHS, rvalue [lv, x, y]
- * binop [lv, x_op_y]
- * store [] */
- cg_dup(p->cg);
- cg_load(p->cg);
- parse_assign_expr(p);
- to_rvalue(p);
- if (compound == BO_IADD || compound == BO_ISUB) {
- /* `+=`/`-=` on a pointer needs the same scaling/decay as `+`/`-`. */
- emit_add_or_sub(p, compound);
- } else {
- cg_binop(p->cg, compound);
- }
- cg_store(p->cg);
-}
-
-static void parse_expr(Parser* p) {
- parse_assign_expr(p);
- while (is_punct(&p->cur, ',')) {
- advance(p);
- /* Discard left, evaluate right. */
- cg_drop(p->cg);
- parse_assign_expr(p);
- }
-}
-
-/* ============================================================
- * Declarations (slice: `int` / `void` / `char` only)
- * ============================================================
- * DeclSpecs and parse_decl_specs are defined above (hoisted before the
- * expression parsing section). What follows here is the declarator-and-
- * initializer machinery built on top of them. */
-
-/* Forward decl for parse_compound_stmt (mutually recursive with statement
- * dispatch). */
-static void parse_stmt(Parser* p);
-static void parse_compound_stmt(Parser* p);
-
-/* Allocate a frame slot for a local variable of `type` and bind `name`
- * into the current scope. */
-static FrameSlot make_local_aligned(Parser* p, Sym name, const Type* type,
- SrcLoc loc, u32 align_override) {
- FrameSlotDesc fsd;
- FrameSlot s;
- SymEntry* e;
- u32 nat = abi_alignof(p->abi, type);
- memset(&fsd, 0, sizeof fsd);
- fsd.type = type;
- fsd.name = name;
- fsd.loc = loc;
- fsd.size = abi_sizeof(p->abi, type);
- fsd.align = (align_override > nat) ? align_override : nat;
- fsd.kind = FS_LOCAL;
- fsd.flags = FSF_NONE;
- s = cg_local(p->cg, &fsd);
- e = scope_define(p, name, SEK_LOCAL, type);
- e->v.slot = s;
- return s;
-}
-
-static FrameSlot make_local(Parser* p, Sym name, const Type* type, SrcLoc loc) {
- return make_local_aligned(p, name, type, loc, 0);
-}
-
-/* Forward decls for declarator components. */
-typedef enum DSuffKind { DS_ARRAY, DS_FUNC } DSuffKind;
-typedef struct ParamInfo ParamInfo;
-typedef struct DeclSuffix {
- u8 kind; /* DSuffKind */
- /* DS_ARRAY */
- u32 count; /* element count; meaningful when !vla and !incomplete */
- u8 incomplete; /* true for `[]` (no size given) */
- u8 vla; /* true for `[expr]` with a non-constant size */
- /* When `vla` is set, the size expression has already been emitted and the
- * resulting i64 (in bytes-of-elem-count) is held in this scratch slot.
- * Materialized at suffix-parse time because the size expression's tokens
- * are consumed there; init_declarator reads it back to drive cg_alloca. */
- FrameSlot vla_count_slot;
- /* DS_FUNC */
- ParamInfo* params;
- u16 nparams;
- u8 variadic;
-} DeclSuffix;
-
-typedef struct ParamInfo {
- Sym name;
- const Type* type;
- SrcLoc loc;
-} ParamInfo;
-
-static void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out,
- u8* variadic_out);
-
-/* Parse a single trailing suffix (`[...]` or `(...)`) after a declarator's
- * IDENT or parenthesized inner-declarator. Returns 1 if a suffix was consumed
- * and filled into *out, 0 otherwise. */
-static int parse_decl_suffix(Parser* p, DeclSuffix* out) {
- if (accept_punct(p, '[')) {
- /* `[ qualifier* static? assignment-expression? ]` or `[ static qualifier* assign-expr ]`.
- * Only constant integer expressions are accepted at this slice; non-constant
- * sizes are VLA territory (Phase 9). */
- out->kind = DS_ARRAY;
- out->count = 0;
- out->incomplete = 0;
- out->vla = 0;
- /* Optional `static`/qualifiers before the size; recognized, no-op here.
- * `[static N]` only changes parameter ABI hints (caller promises ≥N). */
- for (;;) {
- if (accept_kw(p, KW_STATIC) || accept_kw(p, KW_CONST) ||
- accept_kw(p, KW_VOLATILE) || accept_kw(p, KW_RESTRICT) ||
- accept_kw(p, KW_ATOMIC)) {
- continue;
- }
- break;
- }
- if (accept_punct(p, ']')) {
- out->incomplete = 1;
- return 1;
- }
- /* Function-prototype parameter: any `[...]` decays to `T*` (§6.7.6.3
- * ¶7), so the size expression is unused. Consume tokens up to the
- * matching `]` (handling `[*]`, `[n]`, nested brackets) and record
- * the parameter as an incomplete array; the caller decays it to a
- * pointer. */
- if (p->in_param_decl) {
- int depth = 1;
- while (depth > 0) {
- if (p->cur.kind == TOK_EOF) {
- perr(p, "unexpected EOF in parameter array bound");
- }
- if (is_punct(&p->cur, '[')) ++depth;
- else if (is_punct(&p->cur, ']')) {
- --depth;
- if (depth == 0) break;
- }
- advance(p);
- }
- out->incomplete = 1;
- expect_punct(p, ']', "']' after array size");
- return 1;
- }
- /* Constant integer size: an expression starting with a numeric or
- * character literal (or an enum constant) is routed through the
- * constant evaluator so `[3+4]`, `[N*2]` etc. round-trip. Anything
- * else kicks the suffix into VLA mode (§6.7.6.2 ¶4). */
- {
- Tok t = p->cur;
- int is_const_start = (t.kind == TOK_NUM || t.kind == TOK_CHR);
- if (!is_const_start && t.kind == TOK_IDENT) {
- SymEntry* e = scope_lookup(p, t.v.ident);
- if (e && e->kind == SEK_ENUM_CST) is_const_start = 1;
- if (!is_const_start) {
- /* `sizeof` and `_Alignof` tokenize as identifiers but yield
- * compile-time constants — admit them so `int a[_Alignof(T)]`
- * lowers as a fixed-size array, not a VLA. */
- CKw k = ident_kw(p, t.v.ident);
- if (k == KW_SIZEOF || k == KW_ALIGNOF) is_const_start = 1;
- }
- }
- if (is_const_start) {
- SrcLoc cloc = tok_loc(&p->cur);
- i64 v = eval_const_int(p, cloc);
- if (v < 0) perr(p, "negative array size");
- out->count = (u32)v;
- } else {
- /* VLA: emit the size-expression code now (the tokens go away after
- * we return), spill its int value to a fresh i64 frame slot so
- * init_declarator can pick it back up at the right time. */
- FrameSlotDesc fsd;
- if (p->vla_pending) {
- perr(p, "v1 supports only one VLA dimension per declarator");
- }
- out->vla = 1;
- memset(&fsd, 0, sizeof fsd);
- fsd.type = ty_size_t(p);
- fsd.size = abi_sizeof(p->abi, fsd.type);
- fsd.align = abi_alignof(p->abi, fsd.type);
- fsd.kind = FS_LOCAL;
- out->vla_count_slot = cg_local(p->cg, &fsd);
- parse_assign_expr(p);
- to_rvalue(p);
- cg_push_local_typed(p->cg, out->vla_count_slot, fsd.type);
- cg_swap(p->cg);
- cg_store(p->cg);
- cg_drop(p->cg);
- p->vla_pending = 1;
- p->vla_pending_count_slot = out->vla_count_slot;
- }
- }
- expect_punct(p, ']', "']' after array size");
- return 1;
- }
- if (accept_punct(p, '(')) {
- out->kind = DS_FUNC;
- out->params = NULL;
- out->nparams = 0;
- out->variadic = 0;
- parse_param_list(p, &out->params, &out->nparams, &out->variadic);
- expect_punct(p, ')', "')' after parameter list");
- return 1;
- }
- return 0;
-}
-
-/* Wrap `base` with a single suffix's transform. Used when materializing the
- * declarator type from the collected suffix list. */
-static const Type* apply_decl_suffix(Parser* p, const Type* base,
- const DeclSuffix* s) {
- if (s->kind == DS_ARRAY) {
- /* VLA: count is runtime; record an incomplete array type so the type
- * system carries the elem-type but the size is treated as unknown.
- * init_declarator notices the parser-side `vla_pending` flag and emits
- * the alloca + bind. */
- return type_array(p->pool, base, s->count, s->incomplete || s->vla);
- }
- /* DS_FUNC */
- {
- const Type** ptypes = NULL;
- if (s->nparams) {
- ptypes = (const Type**)arena_array(p->c->tu, const Type*, s->nparams);
- for (u16 i = 0; i < s->nparams; ++i) ptypes[i] = s->params[i].type;
- }
- return type_func(p->pool, base, ptypes, s->nparams, (int)s->variadic);
- }
-}
-
-/* Parse a (possibly abstract) declarator. Supports:
- * pointer-prefix? ( IDENT | '(' inner-declarator ')' ) suffix*
- * where suffix is `[N]` or `(params)`. The inner declarator handles one level
- * of nesting (e.g. `int (*fp)(int)`). Multiple nested parens would recurse
- * naturally — for Phase 2 a single level covers all corpus cases.
- *
- * If `allow_abstract` is true, the IDENT may be absent (used by parameters).
- * On success returns the declared type and writes *name_out (=0 if abstract). */
-static const Type* parse_declarator_full(Parser* p, const Type* base,
- int allow_abstract, Sym* name_out,
- SrcLoc* loc_out) {
- return parse_declarator_full_ex(p, base, allow_abstract, name_out, loc_out,
- NULL);
-}
-
-static const Type* parse_declarator_full_ex(Parser* p, const Type* base,
- int allow_abstract, Sym* name_out,
- SrcLoc* loc_out,
- Attr** attrs_out) {
- /* Outer pointer prefix wraps `base` as we go. */
- base = parse_pointer_layer(p, base);
-
- /* Inner declarator: collect inner pointer prefix (innermost-first array)
- * to wrap LATER (after we know the suffix-applied base). */
- Sym name = 0;
- SrcLoc nloc = {0, 0, 0};
- u8 nptrs_inner = 0;
- u16 inner_quals[8];
- int has_inner_parens = 0;
- DeclSuffix inner_suffs[8];
- int n_inner_suffs = 0;
-
- if (is_punct(&p->cur, '(')) {
- /* Disambiguate `(declarator)` vs. function suffix `(params)`. The token
- * after `(` decides:
- * `*` → inner-declarator pointer prefix
- * IDENT (non-kw) → inner-declarator IDENT
- * IDENT (type kw) → function suffix (parameters)
- * `)` → function suffix `()` (unspecified args)
- * Phase 2 doesn't have typedef-names; once they land, the IDENT branch
- * also needs to dispatch on SEK_TYPEDEF. */
- Tok n = peek1(p);
- int is_inner = 0;
- if (is_punct(&n, '*')) {
- is_inner = 1;
- } else if (n.kind == TOK_IDENT && ident_kw(p, n.v.ident) == KW_NONE) {
- /* Plain IDENT could be a declarator name OR a typedef-name (which
- * makes the parens a function-parameter list). Disambiguate by
- * peeking at the symbol table. */
- SymEntry* e = scope_lookup(p, n.v.ident);
- if (!(e && e->kind == SEK_TYPEDEF)) is_inner = 1;
- }
- if (is_inner) {
- has_inner_parens = 1;
- advance(p); /* '(' */
- /* Inner pointer prefix: each `*` (with optional qualifiers) records one
- * wrap layer. We store qualifiers per layer so we can apply them in
- * reverse order below. */
- while (accept_punct(p, '*')) {
- u16 q = 0;
- if (nptrs_inner >= 8) perr(p, "too many pointer levels");
- for (;;) {
- if (accept_kw(p, KW_CONST)) { q |= Q_CONST; continue; }
- if (accept_kw(p, KW_VOLATILE)) { q |= Q_VOLATILE; continue; }
- if (accept_kw(p, KW_RESTRICT)) { q |= Q_RESTRICT; continue; }
- if (accept_kw(p, KW_ATOMIC)) { q |= Q_ATOMIC; continue; }
- if (starts_attr(p)) { parse_and_discard_attributes(p); continue; }
- break;
- }
- inner_quals[nptrs_inner++] = q;
- }
- if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) {
- name = p->cur.v.ident;
- nloc = tok_loc(&p->cur);
- advance(p);
- } else if (!allow_abstract) {
- perr(p, "expected declarator name");
- }
- /* Inner declarator may carry its own suffixes — `int (*ops[2])(int)`
- * has `[2]` between IDENT and the closing `)`. Collect them so
- * they wrap LAST (closest to IDENT), after the outer suffix and
- * inner pointer layers. */
- if (starts_attr(p)) parse_and_discard_attributes(p);
- while (n_inner_suffs < 8) {
- if (!parse_decl_suffix(p, &inner_suffs[n_inner_suffs])) break;
- ++n_inner_suffs;
- if (starts_attr(p)) parse_and_discard_attributes(p);
- }
- expect_punct(p, ')', "')' after inner declarator");
- }
- }
-
- if (!has_inner_parens) {
- if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) {
- name = p->cur.v.ident;
- nloc = tok_loc(&p->cur);
- advance(p);
- } else if (!allow_abstract) {
- perr(p, "expected declarator name");
- }
- }
-
- /* Optional attributes after the declarator-id (before any suffix).
- * Honored when the caller supplies an `attrs_out` sink (e.g. struct
- * members care about aligned / packed at this position); otherwise
- * dropped to stay compatible with positions that ignore them. */
- if (starts_attr(p)) {
- if (attrs_out) parse_attrs_into(p, attrs_out);
- else parse_and_discard_attributes(p);
- }
-
- /* Collect outer suffixes left-to-right; apply in reverse so the innermost
- * suffix wraps `base` first. For `int a[5][3]` the resulting type is
- * "array[5] of array[3] of int": [3] applied first → array[3], then [5]
- * wraps that → array[5] of array[3]. */
- DeclSuffix suffs[8];
- int nsuffs = 0;
- while (nsuffs < 8) {
- if (!parse_decl_suffix(p, &suffs[nsuffs])) break;
- ++nsuffs;
- /* Attributes between/after suffixes — most commonly after a function
- * declarator's `)`. Same sink rule as the post-id position. */
- if (starts_attr(p)) {
- if (attrs_out) parse_attrs_into(p, attrs_out);
- else parse_and_discard_attributes(p);
- }
- }
- if (nsuffs == 8 && (is_punct(&p->cur, '[') || is_punct(&p->cur, '('))) {
- perr(p, "too many declarator suffixes (raise the cap if needed)");
- }
- for (int i = nsuffs - 1; i >= 0; --i) {
- base = apply_decl_suffix(p, base, &suffs[i]);
- }
-
- /* Apply inner pointer wraps. inner_quals[0] is the FIRST `*` consumed (the
- * outermost in the chain `**fp` reads as "fp is ptr to ptr"); the LAST `*`
- * is the one nearest the IDENT. Wrap from nearest-IDENT outward, so we
- * iterate inner_quals in reverse. */
- for (int i = (int)nptrs_inner - 1; i >= 0; --i) {
- base = type_ptr(p->pool, base);
- if (inner_quals[i]) {
- base = type_qualified(p->pool, base, inner_quals[i]);
- }
- }
-
- /* Apply inner declarator suffixes last — they sit closest to IDENT, so
- * for `int (*ops[2])(int)` `[2]` wraps the (already-built) function-
- * pointer type to give "array[2] of pointer to function(int) → int". */
- for (int i = n_inner_suffs - 1; i >= 0; --i) {
- base = apply_decl_suffix(p, base, &inner_suffs[i]);
- }
-
- if (name_out) *name_out = name;
- if (loc_out) *loc_out = nloc;
- return base;
-}
-
-/* Non-abstract entry point used by ordinary declarations. */
-static const Type* parse_declarator(Parser* p, const Type* base, Sym* name_out,
- SrcLoc* loc_out) {
- return parse_declarator_full(p, base, /*allow_abstract=*/0, name_out, loc_out);
-}
-
-/* True if `ty` is char/signed char/unsigned char (the three element types
- * permitted as the target of a string-literal initializer per §6.7.9 ¶14). */
-static int is_char_kind(const Type* ty) {
- if (!ty) return 0;
- return ty->kind == TY_CHAR || ty->kind == TY_SCHAR || ty->kind == TY_UCHAR;
-}
-
-/* Decode the string token at p->cur (must be TOK_STR) without advancing.
- * Returns a heap-allocated byte buffer (caller frees) and writes the
- * length (including the trailing NUL) to *nlen_out. Convenience wrapper
- * around decode_string_literal, kept here so initializer code doesn't
- * need to reach into the literal-parsing section. */
-static u8* peek_string_bytes(Parser* p, size_t* nlen_out) {
- Tok t = p->cur;
- if (t.kind != TOK_STR) perr(p, "internal: peek_string_bytes on non-string");
- return decode_string_literal(p, &t, nlen_out);
-}
-
-/* Push the lvalue of a sub-object at byte offset `offset` within the array
- * local `slot` (whose type is `arr_ty`), with element type `elem_ty`. The
- * value stack ends with an OPK_INDIRECT lvalue ready for cg_store. */
-static void push_subobject_lv(Parser* p, FrameSlot slot, const Type* arr_ty,
- u32 offset, const Type* elem_ty) {
- cg_push_local_typed(p->cg, slot, arr_ty);
- cg_addr(p->cg);
- cg_retag_top(p->cg, type_ptr(p->pool, elem_ty));
- if (offset > 0) {
- cg_push_int(p->cg, (i64)offset, ty_size_t(p));
- cg_binop(p->cg, BO_IADD);
- }
- cg_deref(p->cg, elem_ty);
-}
-
-/* Emit a load+store for one scalar leaf from the source pointer
- * (`src_ptr_slot`, holding a pointer rvalue) to a sub-object of the
- * destination slot. `src_ptr_ty` is the slot's declared type so we read
- * it back at the right width before retagging to the leaf's pointer
- * type. */
-static void emit_copy_leaf(Parser* p, FrameSlot dst_slot, const Type* dst_arr_ty,
- u32 dst_off, FrameSlot src_ptr_slot,
- const Type* src_ptr_ty, u32 src_off,
- const Type* leaf_ty) {
- push_subobject_lv(p, dst_slot, dst_arr_ty, dst_off, leaf_ty);
- cg_push_local_typed(p->cg, src_ptr_slot, src_ptr_ty);
- cg_load(p->cg);
- cg_retag_top(p->cg, type_ptr(p->pool, leaf_ty));
- if (src_off > 0) {
- cg_push_int(p->cg, (i64)src_off, ty_size_t(p));
- cg_binop(p->cg, BO_IADD);
- }
- cg_deref(p->cg, leaf_ty);
- cg_load(p->cg);
- cg_store(p->cg);
- cg_drop(p->cg);
-}
-
-/* Walk a (possibly nested) aggregate type, emitting a leaf load+store
- * for each scalar member. Used to lower `struct s = expr;` and
- * `struct s = (struct S){...};` after the source's address has been
- * spilled into `src_ptr_slot`. Bitfields and flexible array members are
- * not supported here yet. */
-static void emit_walk_copy(Parser* p, FrameSlot dst_slot,
- const Type* dst_arr_ty, u32 dst_off,
- FrameSlot src_ptr_slot, const Type* src_ptr_ty,
- u32 src_off, const Type* ty) {
- if (ty->kind == TY_STRUCT) {
- const ABIRecordLayout* L = abi_record_layout(p->abi, ty);
- for (u16 i = 0; i < ty->rec.nfields; ++i) {
- const Field* f = &ty->rec.fields[i];
- if (f->flags & FIELD_BITFIELD) continue;
- u32 foff = L->fields[i].offset;
- emit_walk_copy(p, dst_slot, dst_arr_ty, dst_off + foff,
- src_ptr_slot, src_ptr_ty, src_off + foff, f->type);
- }
- return;
- }
- if (ty->kind == TY_ARRAY) {
- u32 esz = abi_sizeof(p->abi, ty->arr.elem);
- for (u32 i = 0; i < ty->arr.count; ++i) {
- emit_walk_copy(p, dst_slot, dst_arr_ty, dst_off + i * esz,
- src_ptr_slot, src_ptr_ty, src_off + i * esz,
- ty->arr.elem);
- }
- return;
- }
- if (ty->kind == TY_UNION) {
- /* Byte-wise copy preserves whichever member was active. */
- u32 sz = abi_sizeof(p->abi, ty);
- const Type* uchar_ty = type_prim(p->pool, TY_UCHAR);
- for (u32 i = 0; i < sz; ++i) {
- emit_copy_leaf(p, dst_slot, dst_arr_ty, dst_off + i,
- src_ptr_slot, src_ptr_ty, src_off + i, uchar_ty);
- }
- return;
- }
- emit_copy_leaf(p, dst_slot, dst_arr_ty, dst_off, src_ptr_slot, src_ptr_ty,
- src_off, ty);
-}
-
-/* Source struct/union value is on top of the cg stack as an lvalue.
- * Spill its address into a fresh pointer slot, then walk the type and
- * copy each scalar leaf into the destination sub-object. */
-static void emit_struct_copy_into_slot(Parser* p, FrameSlot dst_slot,
- const Type* dst_arr_ty, u32 dst_off,
- const Type* ty) {
- const Type* ptr_ty = type_ptr(p->pool, ty);
+FrameSlot make_local_aligned(Parser* p, Sym name, const Type* type,
+ SrcLoc loc, u32 align_override) {
FrameSlotDesc fsd;
- FrameSlot src_ptr_slot;
- cg_addr(p->cg);
- memset(&fsd, 0, sizeof fsd);
- fsd.type = ptr_ty;
- fsd.size = abi_sizeof(p->abi, ptr_ty);
- fsd.align = abi_alignof(p->abi, ptr_ty);
- fsd.kind = FS_LOCAL;
- fsd.flags = FSF_NONE;
- src_ptr_slot = cg_local(p->cg, &fsd);
- cg_push_local_typed(p->cg, src_ptr_slot, ptr_ty);
- cg_swap(p->cg);
- cg_store(p->cg);
- cg_drop(p->cg);
- emit_walk_copy(p, dst_slot, dst_arr_ty, dst_off, src_ptr_slot, ptr_ty, 0, ty);
-}
-
-/* Recursively zero-initialize the sub-object at `offset` of type `ty`. */
-static void zero_init_at(Parser* p, FrameSlot slot, const Type* arr_ty,
- u32 offset, const Type* ty) {
- if (ty->kind == TY_ARRAY) {
- u32 esz = abi_sizeof(p->abi, ty->arr.elem);
- for (u32 i = 0; i < ty->arr.count; ++i) {
- zero_init_at(p, slot, arr_ty, offset + i * esz, ty->arr.elem);
- }
- return;
- }
- if (ty->kind == TY_STRUCT) {
- const ABIRecordLayout* L = abi_record_layout(p->abi, ty);
- for (u16 i = 0; i < ty->rec.nfields; ++i) {
- const Field* f = &ty->rec.fields[i];
- zero_init_at(p, slot, arr_ty, offset + L->fields[i].offset, f->type);
- }
- return;
- }
- if (ty->kind == TY_UNION) {
- /* Zero the union's storage by zeroing the first non-bitfield field
- * sized to the union's storage requirement. v1 just zeroes the first
- * non-bitfield member; storage outside it stays whatever the OS
- * gives a fresh stack slot. Tightening to a memset-equivalent is a
- * Phase 6 concern. */
- if (ty->rec.nfields > 0) {
- const Field* f = &ty->rec.fields[0];
- if (!(f->flags & FIELD_BITFIELD)) {
- zero_init_at(p, slot, arr_ty, offset, f->type);
- }
- }
- return;
- }
- push_subobject_lv(p, slot, arr_ty, offset, ty);
- cg_push_int(p->cg, 0, ty);
- cg_store(p->cg);
- cg_drop(p->cg);
-}
-
-/* Parse the initializer for the sub-object at `offset` of type `ty`.
- *
- * Aggregates (`{...}`) follow §6.7.9:
- * - Designated initializers (`[i] = ...`, `.field = ...`, and chains
- * such as `[i][j] = ...` or `.a.b = ...`) reset the cursor before
- * each item; subsequent positional items continue from there. Gaps
- * between the previous cursor and a forward designator are
- * zero-filled.
- * - Brace elision: a sub-aggregate without its own `{` consumes
- * scalars from the parent's stream until its first scalar slot is
- * filled.
- * - String literals initialize char-arrays directly per §6.7.9 ¶14
- * (with or without surrounding braces).
- *
- * Scalars take a single assignment-expression, optionally wrapped in
- * `{ x }` per §6.7.9 ¶11. */
-static void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset,
- const Type* ty);
-
-/* Emit byte stores for a string literal initializing a char-array sub-
- * object at `offset` whose declared element count is `count`. Bytes
- * beyond the literal are zero-filled. Per §6.7.9 ¶14 it is well-formed
- * to drop the terminating NUL when `count == strlen(s)`; longer arrays
- * keep the NUL and zero-pad. */
-static void init_string_at(Parser* p, FrameSlot slot, const Type* arr_ty,
- u32 offset, const Type* elem_ty, u32 count) {
- size_t n = 0;
- u8* bytes = peek_string_bytes(p, &n);
- size_t copy = n;
- size_t i;
- if (copy > count) copy = count; /* §6.7.9 ¶14 truncation */
- for (i = 0; i < copy; ++i) {
- push_subobject_lv(p, slot, arr_ty, offset + (u32)i, elem_ty);
- cg_push_int(p->cg, (i64)bytes[i], elem_ty);
- cg_store(p->cg);
- cg_drop(p->cg);
- }
- for (; i < count; ++i) {
- push_subobject_lv(p, slot, arr_ty, offset + (u32)i, elem_ty);
- cg_push_int(p->cg, 0, elem_ty);
- cg_store(p->cg);
- cg_drop(p->cg);
- }
- p->c->env->heap->free(p->c->env->heap, bytes, 0);
- advance(p); /* consume TOK_STR */
-}
-
-/* Parse a designator chain (`[const]` and `.ident` repeats) starting at
- * the current token and ending at `=`. The chain navigates from the outer
- * type `outer_ty` (offset_in `outer_offset`) down to a sub-object;
- * returns the sub-object's type via *sub_ty_out and absolute byte offset
- * via *sub_offset_out. Also writes the index of the FIRST designator
- * (which selects the cursor position in the immediately-enclosing brace
- * list): for an array that's the [i] index, for a struct that's the
- * field index of the named member. */
-static void parse_designator_chain(Parser* p, const Type* outer_ty,
- u32 outer_offset, const Type** sub_ty_out,
- u32* sub_offset_out, u32* top_index_out) {
- const Type* cur_ty = outer_ty;
- u32 cur_off = outer_offset;
- int first = 1;
- for (;;) {
- if (is_punct(&p->cur, '[')) {
- i64 idx;
- u32 esz;
- SrcLoc cloc = tok_loc(&p->cur);
- advance(p);
- idx = eval_const_int(p, cloc);
- expect_punct(p, ']', "']' after designator index");
- if (!cur_ty || cur_ty->kind != TY_ARRAY) {
- perr(p, "array designator on non-array");
- }
- if (idx < 0 || (u32)idx >= cur_ty->arr.count) {
- perr(p, "array designator index out of range");
- }
- esz = abi_sizeof(p->abi, cur_ty->arr.elem);
- cur_off += (u32)idx * esz;
- cur_ty = cur_ty->arr.elem;
- if (first) *top_index_out = (u32)idx;
- first = 0;
- } else if (is_punct(&p->cur, '.')) {
- Sym fname;
- const Type* fty;
- u32 foff;
- const Field* ff;
- u16 fi;
- advance(p);
- if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) {
- perr(p, "expected field name after '.'");
- }
- fname = p->cur.v.ident;
- advance(p);
- if (!cur_ty ||
- (cur_ty->kind != TY_STRUCT && cur_ty->kind != TY_UNION)) {
- perr(p, "field designator on non-record type");
- }
- if (!find_field(p->abi, cur_ty, fname, &fty, &foff, &ff)) {
- perr(p, "no such field in designator");
- }
- cur_off += foff;
- if (first) {
- /* Find the field index for cursor advance in the parent loop.
- * find_field returns the offset/type but not the index, so do a
- * second linear scan here. Anonymous-member transparency: an
- * IDENT inside a nested anonymous member belongs to the outer
- * record's NTH visible position; we use the outer slot for
- * cursor advance, scanning the outer record. */
- for (fi = 0; fi < cur_ty->rec.nfields; ++fi) {
- const Field* g = &cur_ty->rec.fields[fi];
- if (g->name == fname && fname != 0) {
- *top_index_out = fi;
- break;
- }
- if ((g->flags & FIELD_ANON) &&
- (g->type->kind == TY_STRUCT || g->type->kind == TY_UNION)) {
- const Type* tmp_ty;
- u32 tmp_off;
- const Field* tmp_f;
- if (find_field(p->abi, g->type, fname, &tmp_ty, &tmp_off,
- &tmp_f)) {
- *top_index_out = fi;
- break;
- }
- }
- }
- }
- cur_ty = fty;
- first = 0;
- } else {
- break;
- }
- }
- if (first) perr(p, "internal: empty designator chain");
- expect_punct(p, '=', "'=' after designator");
- *sub_ty_out = cur_ty;
- *sub_offset_out = cur_off;
-}
-
-/* Parse a brace-elided sequence of scalars filling sub-objects of `ty`
- * starting at `offset`. `count_out` is set to the number of scalars
- * consumed; the function returns when the parent's initializer stream
- * is exhausted (next token is `}` or `,`) or when `ty`'s scalar slots
- * are full. */
-static u32 init_elided(Parser* p, FrameSlot slot, const Type* arr_ty,
- u32 offset, const Type* ty);
-
-static u32 init_struct_fields(Parser* p, FrameSlot slot, const Type* arr_ty,
- u32 offset, const Type* ty, u32 start_field,
- int braced) {
- /* Iterate over the struct's fields, consuming initializers from the parent
- * stream. With `braced=1`, we are inside this struct's own `{ ... }` and
- * stop on `}`; with `braced=0`, we are eliding into the parent's stream
- * and return as soon as the first scalar slot is filled (caller manages
- * outer field index). Returns the number of fields consumed.
- *
- * In braced mode, designated initializers (`.field = ...`) reset `i`;
- * gaps between the previous cursor and the designator are zero-filled. */
- const ABIRecordLayout* L = abi_record_layout(p->abi, ty);
- u32 i = start_field;
- u32 zero_lo = start_field; /* first not-yet-zero-filled field index */
- for (; i < ty->rec.nfields; ++i) {
- const Field* f = &ty->rec.fields[i];
- u32 foff = offset + L->fields[i].offset;
- if (braced && (is_punct(&p->cur, '}') || p->cur.kind == TOK_EOF)) break;
- if (braced && is_punct(&p->cur, '.')) {
- const Type* sub_ty;
- u32 sub_off;
- u32 top_idx = 0;
- parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, &top_idx);
- /* Zero-fill any fields the designator skipped over (or back-tracked
- * past — duplicate inits are allowed but we just overwrite). */
- while (zero_lo < top_idx) {
- const Field* zf = &ty->rec.fields[zero_lo];
- u32 zoff = offset + L->fields[zero_lo].offset;
- zero_init_at(p, slot, arr_ty, zoff, zf->type);
- ++zero_lo;
- }
- init_at(p, slot, arr_ty, sub_off, sub_ty);
- i = top_idx; /* loop ++ advances past it */
- if (zero_lo <= top_idx) zero_lo = top_idx + 1;
- goto next_item_struct;
- }
- init_at(p, slot, arr_ty, foff, f->type);
- if (zero_lo <= i) zero_lo = i + 1;
- if (!braced) {
- /* Caller (parent's elision) only wanted us to consume one scalar's
- * worth into our first non-bitfield slot. */
- ++i;
- break;
- }
- next_item_struct:
- if (!accept_punct(p, ',')) {
- ++i;
- break;
- }
- if (is_punct(&p->cur, '}')) {
- ++i;
- break; /* trailing comma */
- }
- }
- /* Zero-fill any unconsumed fields in braced mode. */
- if (braced) {
- u32 j;
- for (j = zero_lo; j < ty->rec.nfields; ++j) {
- const Field* f = &ty->rec.fields[j];
- u32 foff = offset + L->fields[j].offset;
- zero_init_at(p, slot, arr_ty, foff, f->type);
- }
- }
- return i;
-}
-
-static u32 init_elided(Parser* p, FrameSlot slot, const Type* arr_ty,
- u32 offset, const Type* ty) {
- if (ty->kind == TY_ARRAY) {
- u32 esz = abi_sizeof(p->abi, ty->arr.elem);
- init_at(p, slot, arr_ty, offset, ty->arr.elem);
- (void)esz;
- return 1;
- }
- if (ty->kind == TY_STRUCT) {
- init_struct_fields(p, slot, arr_ty, offset, ty, 0, /*braced=*/0);
- return 1;
- }
- /* Scalar / pointer / union: consume one assignment-expr. */
- int had_brace = accept_punct(p, '{');
- push_subobject_lv(p, slot, arr_ty, offset, ty);
- parse_assign_expr(p);
- to_rvalue(p);
- cg_store(p->cg);
- cg_drop(p->cg);
- if (had_brace) {
- accept_punct(p, ',');
- expect_punct(p, '}', "'}' after scalar initializer");
- }
- return 1;
-}
-
-static void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset,
- const Type* ty) {
- if (ty->kind == TY_ARRAY) {
- const Type* elem_ty = ty->arr.elem;
- u32 esz = abi_sizeof(p->abi, elem_ty);
- /* String literal initializing a char-array (with or without braces) per
- * §6.7.9 ¶14. Wide character types are deferred (Phase 7). */
- if (is_char_kind(elem_ty)) {
- if (p->cur.kind == TOK_STR) {
- init_string_at(p, slot, arr_ty, offset, elem_ty, ty->arr.count);
- return;
- }
- if (is_punct(&p->cur, '{') && peek1(p).kind == TOK_STR) {
- advance(p);
- init_string_at(p, slot, arr_ty, offset, elem_ty, ty->arr.count);
- accept_punct(p, ',');
- expect_punct(p, '}', "'}' after string initializer");
- return;
- }
- }
- if (!is_punct(&p->cur, '{')) {
- /* Brace elision: the array consumes scalars from the parent stream.
- * A bare assignment-expression on entry only fills one scalar slot
- * worth, then returns. */
- init_elided(p, slot, arr_ty, offset, elem_ty);
- return;
- }
- advance(p); /* '{' */
- {
- u32 i = 0;
- u32 zero_lo = 0; /* first index not yet zero-filled (after explicit init) */
- if (!is_punct(&p->cur, '}')) {
- for (;;) {
- if (is_punct(&p->cur, '[')) {
- const Type* sub_ty;
- u32 sub_off;
- u32 top_idx = 0;
- parse_designator_chain(p, ty, offset, &sub_ty, &sub_off,
- &top_idx);
- while (zero_lo < top_idx) {
- zero_init_at(p, slot, arr_ty, offset + zero_lo * esz, elem_ty);
- ++zero_lo;
- }
- init_at(p, slot, arr_ty, sub_off, sub_ty);
- i = top_idx + 1;
- if (zero_lo < i) zero_lo = i;
- } else {
- if (i >= ty->arr.count) {
- perr(p, "too many initializers for array");
- }
- init_at(p, slot, arr_ty, offset + i * esz, elem_ty);
- ++i;
- if (zero_lo < i) zero_lo = i;
- }
- if (!accept_punct(p, ',')) break;
- if (is_punct(&p->cur, '}')) break;
- }
- }
- expect_punct(p, '}', "'}' after array initializer");
- {
- u32 j;
- for (j = zero_lo; j < ty->arr.count; ++j) {
- zero_init_at(p, slot, arr_ty, offset + j * esz, elem_ty);
- }
- }
- }
- return;
- }
- if (ty->kind == TY_STRUCT) {
- if (!is_punct(&p->cur, '{')) {
- /* Brace elision into the parent's stream: take scalars for our first
- * non-bitfield field, then return so the parent advances to its next
- * sibling. */
- init_struct_fields(p, slot, arr_ty, offset, ty, 0, /*braced=*/0);
- return;
- }
- advance(p); /* '{' */
- init_struct_fields(p, slot, arr_ty, offset, ty, 0, /*braced=*/1);
- expect_punct(p, '}', "'}' after struct initializer");
- return;
- }
- if (ty->kind == TY_UNION) {
- /* `union U u = {.field = expr}` per §6.7.9 ¶7 names a specific
- * member; without a designator the first non-bitfield member is
- * initialized. Only one member can be active, so we honor the
- * (optional) leading designator and ignore the rest. */
- int had_brace = accept_punct(p, '{');
- if (ty->rec.nfields == 0) {
- if (had_brace) expect_punct(p, '}', "'}'");
- return;
- }
- if (had_brace && is_punct(&p->cur, '.')) {
- const Type* sub_ty;
- u32 sub_off;
- u32 top_idx = 0;
- parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, &top_idx);
- init_at(p, slot, arr_ty, sub_off, sub_ty);
- } else {
- const Field* f = &ty->rec.fields[0];
- if (!(f->flags & FIELD_BITFIELD)) {
- init_at(p, slot, arr_ty, offset, f->type);
- }
- }
- if (had_brace) {
- accept_punct(p, ',');
- expect_punct(p, '}', "'}' after union initializer");
- }
- return;
- }
- /* Scalar (incl. pointer). */
- int had_brace = accept_punct(p, '{');
- push_subobject_lv(p, slot, arr_ty, offset, ty);
- parse_assign_expr(p);
- to_rvalue(p);
- coerce_top_to_lvalue(p);
- cg_store(p->cg);
- cg_drop(p->cg);
- if (had_brace) {
- accept_punct(p, ',');
- expect_punct(p, '}', "'}' after scalar initializer");
- }
-}
-
-/* ============================================================
- * Static-storage initializers (file-scope objects + static locals)
- * ============================================================
- *
- * A static-storage object's initializer is a constant expression that the
- * compiler must materialize as bytes in the object file. We allocate a
- * working buffer of `abi_sizeof(ty)` bytes (zero-filled), recursively walk
- * the (possibly braced) initializer, and write each scalar's encoding at its
- * computed offset. The buffer is then handed to decl_define_object as a
- * single INIT_BYTES item — obj_reserve hands back uninitialized chunk
- * storage, so we always patch the entire range.
- *
- * v1 scope: integer scalars only (eval_const_int). Pointer relocations are
- * deferred — none of the Phase 4 corpus rows need them. Aggregates are
- * positional brace lists with brace-elision elsewhere; designators arrive
- * with Phase 6. */
-
-static void encode_int_le(u8* dst, u32 size, i64 v) {
- for (u32 i = 0; i < size; ++i) {
- dst[i] = (u8)((v >> (8u * i)) & 0xffu);
- }
-}
-
-/* Encode a string literal at *buf+offset for a char-array sub-object of
- * declared element count `count`. Bytes beyond the literal stay zero
- * (buf is pre-zeroed by define_static_object). Truncation rules match
- * §6.7.9 ¶14. */
-static void parse_static_string_at(Parser* p, u8* buf, u32 buflen, u32 offset,
- u32 count) {
- size_t n = 0;
- u8* bytes = peek_string_bytes(p, &n);
- size_t copy = n;
- if (copy > count) copy = count;
- if (offset + (u32)copy > buflen) perr(p, "string initializer overflows object");
- memcpy(buf + offset, bytes, copy);
- p->c->env->heap->free(p->c->env->heap, bytes, 0);
- advance(p);
-}
-
-/* Append one pending relocation to the parser-side list, growing on
- * demand. Flushed by `define_static_object` after the section is pinned. */
-static void srl_push(Parser* p, u32 offset, u32 size, ObjSymId target,
- i64 addend) {
- if (p->static_relocs_len == p->static_relocs_cap) {
- u32 nc = p->static_relocs_cap ? p->static_relocs_cap * 2u : 4u;
- void* nb = arena_array(p->c->tu, char,
- nc * sizeof(*p->static_relocs));
- if (!nb) perr(p, "out of memory recording static relocs");
- if (p->static_relocs && p->static_relocs_len) {
- memcpy(nb, p->static_relocs,
- p->static_relocs_len * sizeof(*p->static_relocs));
- }
- p->static_relocs = nb;
- p->static_relocs_cap = nc;
- }
- p->static_relocs[p->static_relocs_len].offset = offset;
- p->static_relocs[p->static_relocs_len].size = size;
- p->static_relocs[p->static_relocs_len].target = target;
- p->static_relocs[p->static_relocs_len].addend = addend;
- ++p->static_relocs_len;
-}
-
-/* Try to parse the current expression as an address constant of pointer
- * type `ty`, recording it as a pending relocation. Forms supported:
- * `&IDENT` — absolute reloc at offset, addend 0
- * `&IDENT [const]` — addend = const * sizeof(elem)
- * `IDENT` — IDENT is an array; same as `&IDENT[0]`
- * `IDENT [+|-] const` — pointer arithmetic; addend scaled by
- * sizeof(*IDENT)
- * Returns 1 on success (caller should not call eval_const_int), 0 if
- * the current tokens don't look like an address constant (caller falls
- * back to integer-constant evaluation, which handles `(T*)0`). */
-static int try_parse_addr_const(Parser* p, const Type* ty, u8* buf,
- u32 offset, u32 sz) {
- Tok t = p->cur;
- Sym name = 0;
- SrcLoc nloc = tok_loc(&p->cur);
- int saw_amp = 0;
- i64 element_addend = 0;
- i64 byte_addend = 0;
+ FrameSlot s;
SymEntry* e;
- const Type* tgt_ty;
- ObjSymId tgt;
- if (t.kind == TOK_STR) {
- /* String literal as address constant (§6.6 ¶7). Mint a .rodata symbol
- * and emit a reloc at the pointer slot. */
- size_t n = 0;
- u8* bytes = decode_string_literal(p, &t, &n);
- ObjSymId str_sym = emit_string_to_rodata(p, bytes, n);
- p->c->env->heap->free(p->c->env->heap, bytes, 0);
- advance(p);
- (void)ty;
- (void)buf;
- srl_push(p, offset, sz, str_sym, 0);
- return 1;
- }
- if (is_punct(&t, '&')) {
- saw_amp = 1;
- advance(p);
- if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) {
- /* Not a recognized address-of form. Bail; caller will eval as int. */
- perr(p, "expected identifier after '&' in static initializer");
- }
- name = p->cur.v.ident;
- nloc = tok_loc(&p->cur);
- advance(p);
- } else if (t.kind == TOK_IDENT && ident_kw(p, t.v.ident) == KW_NONE) {
- name = t.v.ident;
- advance(p);
- } else {
- return 0;
- }
- e = scope_lookup(p, name);
- if (!e || (e->kind != SEK_GLOBAL && e->kind != SEK_FUNC)) {
- /* Address constants must reference an object with static storage
- * duration / external-or-internal linkage. Functions also qualify
- * (§6.7.9 ¶4 — addresses of objects of static storage duration; a
- * function designator decays to such an address). */
- perr(p, "static initializer is not a constant address expression");
- }
- tgt = e->v.sym;
- tgt_ty = e->type;
- /* Optional `[const]` after `&IDENT`. */
- if (saw_amp && is_punct(&p->cur, '[')) {
- SrcLoc cloc;
- advance(p);
- cloc = tok_loc(&p->cur);
- element_addend = eval_const_int(p, cloc);
- expect_punct(p, ']', "']' after array-subscript constant");
- if (tgt_ty && tgt_ty->kind == TY_ARRAY) {
- byte_addend +=
- element_addend * (i64)abi_sizeof(p->abi, tgt_ty->arr.elem);
- } else {
- byte_addend += element_addend;
- }
- }
- /* Optional `+`/`-` const for pointer arithmetic. Without `&`, the
- * IDENT must be an array (which decays to a pointer) for arithmetic
- * to make sense. */
- while (is_punct(&p->cur, '+') || is_punct(&p->cur, '-')) {
- int neg = is_punct(&p->cur, '-');
- SrcLoc cloc;
- i64 v;
- advance(p);
- cloc = tok_loc(&p->cur);
- v = eval_const_int(p, cloc);
- if (neg) v = -v;
- /* Scale by element size if the base is array/pointer. */
- if (tgt_ty && tgt_ty->kind == TY_ARRAY) {
- byte_addend += v * (i64)abi_sizeof(p->abi, tgt_ty->arr.elem);
- } else if (tgt_ty && tgt_ty->kind == TY_PTR) {
- byte_addend += v * (i64)abi_sizeof(p->abi, tgt_ty->ptr.pointee);
- } else if (saw_amp) {
- /* `&scalar + const` measured in bytes-of(scalar). */
- byte_addend += v * (i64)abi_sizeof(p->abi, tgt_ty);
- } else {
- byte_addend += v;
- }
- }
- (void)nloc;
- (void)ty;
- (void)buf;
- /* The reloc width (R_ABS32 vs R_ABS64) follows the destination
- * pointer width. */
- srl_push(p, offset, sz, tgt, byte_addend);
- return 1;
-}
-
-static void parse_static_init_at(Parser* p, u8* buf, u32 buflen, u32 offset,
- const Type* ty) {
- if (ty->kind == TY_ARRAY) {
- const Type* elem = ty->arr.elem;
- u32 esz = abi_sizeof(p->abi, elem);
- u32 i = 0;
- int had_brace;
- /* String literal initializer for char-arrays (with or without braces). */
- if (is_char_kind(elem)) {
- if (p->cur.kind == TOK_STR) {
- parse_static_string_at(p, buf, buflen, offset, ty->arr.count);
- return;
- }
- if (is_punct(&p->cur, '{') && peek1(p).kind == TOK_STR) {
- advance(p);
- parse_static_string_at(p, buf, buflen, offset, ty->arr.count);
- accept_punct(p, ',');
- expect_punct(p, '}', "'}' after string initializer");
- return;
- }
- }
- had_brace = accept_punct(p, '{');
- if (!had_brace) {
- perr(p, "expected '{' for static-storage array initializer");
- }
- if (!is_punct(&p->cur, '}')) {
- for (;;) {
- if (is_punct(&p->cur, '[')) {
- const Type* sub_ty;
- u32 sub_off;
- u32 top_idx = 0;
- parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, &top_idx);
- parse_static_init_at(p, buf, buflen, sub_off, sub_ty);
- i = top_idx + 1;
- } else {
- if (i >= ty->arr.count) {
- perr(p, "too many initializers for array");
- }
- parse_static_init_at(p, buf, buflen, offset + i * esz, elem);
- ++i;
- }
- if (!accept_punct(p, ',')) break;
- if (is_punct(&p->cur, '}')) break;
- }
- }
- expect_punct(p, '}', "'}' after array initializer");
- return;
- }
- if (ty->kind == TY_STRUCT) {
- int had_brace = accept_punct(p, '{');
- const ABIRecordLayout* L = abi_record_layout(p->abi, ty);
- u32 i = 0;
- if (!had_brace) {
- perr(p, "expected '{' for static-storage struct initializer");
- }
- while (i < ty->rec.nfields && !is_punct(&p->cur, '}')) {
- const Field* f = &ty->rec.fields[i];
- if (is_punct(&p->cur, '.')) {
- const Type* sub_ty;
- u32 sub_off;
- u32 top_idx = 0;
- parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, &top_idx);
- parse_static_init_at(p, buf, buflen, sub_off, sub_ty);
- i = top_idx + 1;
- if (!accept_punct(p, ',')) break;
- continue;
- }
- parse_static_init_at(p, buf, buflen, offset + L->fields[i].offset,
- f->type);
- ++i;
- if (!accept_punct(p, ',')) break;
- }
- expect_punct(p, '}', "'}' after struct initializer");
- return;
- }
- if (ty->kind == TY_UNION) {
- perr(p, "static-storage union initializer not supported in Phase 4");
- }
- /* Scalar / pointer: integer constant or address-constant per §6.6 ¶9
- * (pointer-typed only). Address constants are recorded as pending
- * relocations and resolved when the section gets pinned. */
- {
- int had_brace = accept_punct(p, '{');
- SrcLoc cloc = tok_loc(&p->cur);
- u32 sz = abi_sizeof(p->abi, ty);
- if (offset + sz > buflen) perr(p, "initializer overflows object");
- if (ty->kind == TY_PTR && try_parse_addr_const(p, ty, buf, offset, sz)) {
- /* Address constant recorded as a reloc. Buffer bytes stay zero;
- * the reloc carries the addend. */
- } else {
- i64 v = eval_const_int(p, cloc);
- encode_int_le(buf + offset, sz, v);
- }
- if (had_brace) {
- accept_punct(p, ',');
- expect_punct(p, '}', "'}' after scalar initializer");
- }
- }
+ u32 nat = abi_alignof(p->abi, type);
+ memset(&fsd, 0, sizeof fsd);
+ fsd.type = type;
+ fsd.name = name;
+ fsd.loc = loc;
+ fsd.size = abi_sizeof(p->abi, type);
+ fsd.align = (align_override > nat) ? align_override : nat;
+ fsd.kind = FS_LOCAL;
+ fsd.flags = FSF_NONE;
+ s = cg_local(p->cg, &fsd);
+ e = scope_define(p, name, SEK_LOCAL, type);
+ e->v.slot = s;
+ return s;
}
-/* Choose the section a defining object decl with `quals` and `storage`
- * should land in: const → .rodata, otherwise let decl_define_object pick
- * .data/.bss based on whether the init is all zero. Returns OBJ_SEC_NONE
- * when the default is appropriate. */
-static ObjSecId pick_object_section(Parser* p, u16 quals, int has_nonzero) {
- if ((quals & Q_CONST) != 0 && has_nonzero) {
- Sym secname = pool_intern_cstr(p->pool, ".rodata");
- return obj_section(decl_obj(p->decls), secname, SEC_RODATA, SF_ALLOC, 1u);
- }
- return OBJ_SEC_NONE;
+FrameSlot make_local(Parser* p, Sym name, const Type* type, SrcLoc loc) {
+ return make_local_aligned(p, name, type, loc, 0);
}
-/* Define a static-storage object: allocate the byte buffer, parse the
- * (optional) initializer into it, route to .rodata / .data / .bss, and call
- * obj_symbol_define. Used for both file-scope objects and static locals.
- * `align_override` is the strictest `_Alignas` argument the declarator
- * collected, or 0 for the natural type alignment. */
-static void define_static_object(Parser* p, ObjSymId sym, const Type* var_ty,
- u16 quals, int has_init, SrcLoc loc,
- u32 align_override) {
- ObjBuilder* ob = decl_obj(p->decls);
- u32 size = abi_sizeof(p->abi, var_ty);
- u32 align = abi_alignof(p->abi, var_ty);
- if (align_override > align) align = align_override;
- u8* buf = NULL;
- int has_nonzero = 0;
- ObjSecId override_sec;
- /* TLS objects route to .tdata / .tbss with SF_TLS; decl_declare marked
- * the symbol SK_TLS when the source carried `_Thread_local`. The
- * .rodata override path is skipped — TLS storage is per-thread mutable
- * even when declared `const`. */
- const ObjSym* os = obj_symbol_get(ob, sym);
- int is_tls = (os && os->kind == SK_TLS);
-
- if (has_init) {
- buf = (u8*)arena_array(p->c->tu, u8, size ? size : 1u);
- memset(buf, 0, size);
- /* Reset the pending-reloc list before parsing this initializer; the
- * caller flushes the collected entries to obj_reloc once the section
- * has been pinned. */
- p->static_relocs_len = 0;
- parse_static_init_at(p, buf, size, 0, var_ty);
- for (u32 i = 0; i < size; ++i) {
- if (buf[i]) { has_nonzero = 1; break; }
- }
- /* Pointer-typed initializers count as nonzero — the bytes are
- * patched by the loader at runtime, so we must place the object in
- * .data (or .rodata when const) rather than .bss. */
- if (p->static_relocs_len) has_nonzero = 1;
- }
-
- if (is_tls) {
- /* TLS path: .tbss for zero-init, .tdata otherwise. The section flags
- * mirror what clang emits for `_Thread_local` globals so the linker's
- * existing PT_TLS / TLV layout code applies unchanged. */
- Sym sname;
- ObjSecId sec;
- u32 a = align ? align : 1u;
- u32 base;
- if (!has_init || !has_nonzero) {
- sname = obj_secname_tbss(p->c);
- sec = obj_section_ex(ob, sname, SEC_BSS, SSEM_NOBITS,
- SF_ALLOC | SF_WRITE | SF_TLS, a, 0, OBJ_SEC_NONE, 0);
- base = obj_align_to(ob, sec, a);
- obj_reserve_bss(ob, sec, base + size, a);
- obj_symbol_define(ob, sym, sec, base, size);
- return;
- }
- sname = obj_secname_tdata(p->c);
- sec = obj_section(ob, sname, SEC_DATA, SF_ALLOC | SF_WRITE | SF_TLS, a);
- base = obj_align_to(ob, sec, a);
- {
- u8* dst = obj_reserve(ob, sec, size);
- if (dst) memcpy(dst, buf, size);
- }
- obj_symbol_define(ob, sym, sec, base, size);
- for (u32 i = 0; i < p->static_relocs_len; ++i) {
- RelocKind rk = (p->static_relocs[i].size == 8) ? R_ABS64 : R_ABS32;
- obj_reloc(ob, sec, base + p->static_relocs[i].offset, rk,
- p->static_relocs[i].target, p->static_relocs[i].addend);
- }
- p->static_relocs_len = 0;
- (void)loc;
- return;
- }
-
- override_sec = pick_object_section(p, quals, has_nonzero);
- if (override_sec != OBJ_SEC_NONE) {
- /* .rodata path: emit bytes directly here so we can pin the section.
- * obj_section dedupes by name, so multiple const inits share one
- * .rodata — align each placement to the object's own requirement. */
- u32 base = obj_align_to(ob, override_sec, align > 1u ? align : 1u);
- {
- u8* dst = obj_reserve(ob, override_sec, size);
- if (dst && buf) memcpy(dst, buf, size);
- }
- obj_symbol_define(ob, sym, override_sec, base, size);
- /* Flush pending pointer-init relocations against this section. */
- for (u32 i = 0; i < p->static_relocs_len; ++i) {
- RelocKind rk = (p->static_relocs[i].size == 8) ? R_ABS64 : R_ABS32;
- obj_reloc(ob, override_sec, base + p->static_relocs[i].offset, rk,
- p->static_relocs[i].target, p->static_relocs[i].addend);
- }
- p->static_relocs_len = 0;
- (void)loc;
- return;
- }
-
- if (!has_init || !has_nonzero) {
- /* BSS path. SSEM_NOBITS keeps the bytes off-disk; the loader zeros
- * the section image. The symbol's value is the offset within the
- * accumulated bss_size — bumped after we record this object. */
- Sym sname = pool_intern_cstr(p->pool, ".bss");
- ObjSecId sec = obj_section_ex(ob, sname, SEC_BSS, SSEM_NOBITS,
- SF_ALLOC | SF_WRITE,
- align ? align : 1u, 0, OBJ_SEC_NONE, 0);
- u32 a = align ? align : 1u;
- u32 base = obj_align_to(ob, sec, a);
- obj_reserve_bss(ob, sec, base + size, a);
- obj_symbol_define(ob, sym, sec, base, size);
- return;
- }
- /* .data path. */
- {
- Sym sname = pool_intern_cstr(p->pool, ".data");
- ObjSecId sec = obj_section(ob, sname, SEC_DATA, SF_ALLOC | SF_WRITE,
- align ? align : 1u);
- u32 base = obj_align_to(ob, sec, align ? align : 1u);
- u8* dst = obj_reserve(ob, sec, size);
- if (dst) memcpy(dst, buf, size);
- obj_symbol_define(ob, sym, sec, base, size);
- /* Flush pointer-init relocations against the .data section. */
- for (u32 i = 0; i < p->static_relocs_len; ++i) {
- RelocKind rk = (p->static_relocs[i].size == 8) ? R_ABS64 : R_ABS32;
- obj_reloc(ob, sec, base + p->static_relocs[i].offset, rk,
- p->static_relocs[i].target, p->static_relocs[i].addend);
- }
- p->static_relocs_len = 0;
- }
-}
+/* ============================================================
+ * Static-local symbol naming
+ * ============================================================ */
-/* Mint a unique linker name for a static local: `<orig>.<counter>`. The
- * static_local_counter never resets across the TU, so two static locals in
- * different functions never collide even if they share the source name. */
-static Sym mint_static_local_sym(Parser* p, Sym orig) {
+/* Mint a unique linker name for a static local: `<orig>.<counter>`. */
+Sym mint_static_local_sym(Parser* p, Sym orig) {
size_t olen = 0;
const char* on = pool_str(p->pool, orig, &olen);
char buf[128];
@@ -5201,70 +382,22 @@ static Sym mint_static_local_sym(Parser* p, Sym orig) {
return pool_intern(p->pool, buf, wlen);
}
-/* If `ty` is an incomplete array (`T[]`), peek the initializer at p->cur
- * and complete the type by counting the items it provides. Three cases:
- * - `T` is a char-kind and the initializer is a string literal: count =
- * decoded length (including NUL).
- * - `{...}` initializer: record the braced range and count top-level
- * items; positional only, no designators (sufficient for the corpus).
- * After completion the parser is rewound to the recorded `{`.
- * - Otherwise: panic (incomplete array with non-list init).
- * Returns the completed array type. The caller should use this as the
- * declared variable type going forward. */
-static const Type* complete_incomplete_array(Parser* p, const Type* ty) {
- const Type* elem;
- if (!ty || ty->kind != TY_ARRAY || !ty->arr.incomplete) return ty;
- elem = ty->arr.elem;
- if (is_char_kind(elem) && p->cur.kind == TOK_STR) {
- Tok t = p->cur;
- size_t n = 0;
- u8* bytes = decode_string_literal(p, &t, &n);
- p->c->env->heap->free(p->c->env->heap, bytes, 0);
- return type_array(p->pool, elem, (u32)n, /*incomplete=*/0);
- }
- if (is_punct(&p->cur, '{')) {
- u32 cnt;
- record_braced_block(p);
- cnt = count_recorded_top_level_items(p->replay, p->replay_len);
- /* String literal as the sole brace contents is also valid: `char s[] =
- * {"hi"}`. Detect by replay[1] being TOK_STR; recompute count from the
- * decoded length. */
- if (cnt == 1 && p->replay_len >= 3 && p->replay[1].kind == TOK_STR &&
- is_char_kind(elem)) {
- Tok t = p->replay[1];
- size_t n = 0;
- u8* bytes = decode_string_literal(p, &t, &n);
- p->c->env->heap->free(p->c->env->heap, bytes, 0);
- cnt = (u32)n;
- }
- replay_rewind(p);
- return type_array(p->pool, elem, cnt, /*incomplete=*/0);
- }
- perr(p, "initializer cannot complete incomplete array type");
-}
+/* ============================================================
+ * Declarations
+ * ============================================================ */
-/* Parse a single init-declarator after the decl-specs have been consumed.
- * Grammar: declarator = (`*` qual*)* (IDENT | `(` declarator `)`) suffix*
- * init = `=` (assign_expr | brace_init) */
+/* Parse a single init-declarator after the decl-specs have been consumed. */
static void parse_init_declarator(Parser* p, const DeclSpecs* specs) {
SrcLoc loc;
Sym name;
const Type* var_ty = parse_declarator(p, specs->type, &name, &loc);
- /* Typedef declaration: bind the name as SEK_TYPEDEF in the current
- * scope so subsequent decl-spec sites can recognize it as a type
- * specifier. No storage is allocated and an initializer is not
- * permitted. */
if (specs->storage == DS_TYPEDEF) {
if (is_punct(&p->cur, '=')) {
perr(p, "typedef declarator cannot have initializer");
}
{
SymEntry* e = scope_define(p, name, SEK_TYPEDEF, var_ty);
- /* `typedef T B[n]` (or via a VLA-typedef chain): snapshot the
- * VLA byte size at typedef site so subsequent `B v` declarations
- * each alloca the same captured runtime size, even if the names
- * referenced in the size expression have since changed. */
if (p->vla_pending && var_ty && var_ty->kind == TY_ARRAY) {
FrameSlot count_slot = p->vla_pending_count_slot;
const Type* elem_ty = var_ty->arr.elem;
@@ -5292,7 +425,6 @@ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) {
p->vla_pending = 0;
p->vla_pending_count_slot = FRAME_SLOT_NONE;
} else if (specs->vla_byte_slot != FRAME_SLOT_NONE) {
- /* Typedef of a typedef'd VLA: chain the captured slot. */
e->vla_byte_slot = specs->vla_byte_slot;
}
}
@@ -5300,10 +432,6 @@ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) {
return;
}
- /* Static-storage locals are promoted to a globally-visible symbol with
- * internal linkage; the local scope binds to that symbol so subsequent
- * uses load through cg_push_global. The variable's storage persists
- * across calls (§6.2.4 ¶3). Initializer must be a constant expression. */
if (specs->storage == DS_STATIC) {
Decl decl_in;
DeclId did;
@@ -5326,9 +454,6 @@ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) {
e = scope_define(p, name, SEK_GLOBAL, var_ty);
e->v.sym = sym;
has_init = accept_punct(p, '=');
- /* `static T name[] = {...}` at block scope: peek the initializer to
- * deduce the element count before emitting, the same as the file-scope
- * path above (search for the matching call in parse_global_decl). */
if (has_init && var_ty && var_ty->kind == TY_ARRAY && var_ty->arr.incomplete) {
const Type* completed = complete_incomplete_array(p, var_ty);
if (completed != var_ty) {
@@ -5342,12 +467,6 @@ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) {
return;
}
- /* `extern` block-scope declaration: declares the name but does not define
- * storage. The matching defining declaration must appear elsewhere (file
- * scope here, or another TU). Per §6.2.2 ¶4, if a prior file-scope
- * declaration of this identifier is visible, the linkage of this extern
- * decl matches it (so `static int g; ... { extern int g; }` resolves to
- * the same internal-linkage symbol). */
if (specs->storage == DS_EXTERN) {
Decl decl_in;
DeclId did;
@@ -5379,13 +498,6 @@ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) {
return;
}
- /* VLA: the declarator type is `T[]` (incomplete array) with either a
- * pending runtime count from an inline `T name[n]` suffix, or — when
- * `name` was declared via a VLA typedef — a byte-size slot already
- * captured at the typedef site (carried in specs->vla_byte_slot). The
- * variable binds as `T*` (alloca's return) so subscript/pointer arith
- * just work; the SymEntry's vla_byte_slot lets `sizeof(name)` emit a
- * runtime load instead of the constant pointer width. */
if (var_ty && var_ty->kind == TY_ARRAY && var_ty->arr.incomplete &&
(p->vla_pending || specs->vla_byte_slot != FRAME_SLOT_NONE)) {
const Type* elem_ty = var_ty->arr.elem;
@@ -5394,8 +506,6 @@ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) {
FrameSlot ptr_slot;
SymEntry* sym_entry;
if (p->vla_pending) {
- /* Inline VLA: derive byte size = count * sizeof(elem) and stash
- * it in a fresh i64 slot. */
FrameSlot count_slot = p->vla_pending_count_slot;
u32 esz = abi_sizeof(p->abi, elem_ty);
FrameSlotDesc bsd;
@@ -5419,7 +529,6 @@ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) {
cg_store(p->cg);
cg_drop(p->cg);
} else {
- /* Typedef'd VLA: byte-size already captured at typedef site. */
byte_slot = specs->vla_byte_slot;
}
ptr_slot = make_local(p, name, ptr_ty, loc);
@@ -5445,9 +554,6 @@ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) {
int has_init = is_punct(&p->cur, '=');
FrameSlot s;
if (has_init && var_ty && var_ty->kind == TY_ARRAY && var_ty->arr.incomplete) {
- /* `T name[] = ...`: peek the initializer to deduce the count, then
- * allocate the slot with the now-complete type. The slot allocation
- * has to wait until after sizing, so move it inside this branch. */
advance(p); /* '=' */
var_ty = complete_incomplete_array(p, var_ty);
s = make_local_aligned(p, name, var_ty, loc, specs->align);
@@ -5460,13 +566,10 @@ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) {
cg_set_loc(p->cg, loc);
if ((var_ty->kind == TY_STRUCT || var_ty->kind == TY_UNION) &&
!is_punct(&p->cur, '{')) {
- /* §6.7.9 ¶13: an aggregate initializer that is not a brace list
- * must be a single expression of compatible type — copy it. */
parse_assign_expr(p);
emit_struct_copy_into_slot(p, s, var_ty, 0, var_ty);
} else if (var_ty->kind == TY_ARRAY || var_ty->kind == TY_STRUCT ||
var_ty->kind == TY_UNION) {
- /* Brace initializer (or string literal — Phase 6). */
init_at(p, s, var_ty, 0, var_ty);
} else {
cg_push_local_typed(p->cg, s, var_ty);
@@ -5474,19 +577,13 @@ static void parse_init_declarator(Parser* p, const DeclSpecs* specs) {
to_rvalue(p);
coerce_top_to_lvalue(p);
cg_store(p->cg);
- /* cg_store leaves the assigned value on the stack (C semantics);
- * an init-declarator is statement-context, so drop it. */
cg_drop(p->cg);
}
}
}
}
-static void parse_local_decl(Parser* p, const DeclSpecs* specs) {
- /* `struct S { ... };`, `struct S;`, `enum E { ... };` introduce only a
- * tag and have no declarator — accept the bare `;` here. Without a
- * declarator there is also no `(...)` (not a function), so falling
- * through to parse_init_declarator would panic. */
+void parse_local_decl(Parser* p, const DeclSpecs* specs) {
if (accept_punct(p, ';')) return;
parse_init_declarator(p, specs);
while (accept_punct(p, ',')) {
@@ -5496,791 +593,11 @@ static void parse_local_decl(Parser* p, const DeclSpecs* specs) {
}
/* ============================================================
- * Statements
- * ============================================================ */
-
-static void parse_if_stmt(Parser* p) {
- CGLabel L_else = cg_label_new(p->cg);
- CGLabel L_end = cg_label_new(p->cg);
- expect_punct(p, '(', "'('");
- parse_expr(p);
- to_rvalue(p);
- expect_punct(p, ')', "')'");
- cg_branch_false(p->cg, L_else);
- parse_stmt(p);
- if (accept_kw(p, KW_ELSE)) {
- cg_jump(p->cg, L_end);
- cg_label_place(p->cg, L_else);
- parse_stmt(p);
- cg_label_place(p->cg, L_end);
- } else {
- cg_label_place(p->cg, L_else);
- }
-}
-
-static void parse_while_stmt(Parser* p) {
- CGLabel L_top = cg_label_new(p->cg);
- CGLabel L_end = cg_label_new(p->cg);
- CGLabel saved_break = p->cur_break;
- CGLabel saved_continue = p->cur_continue;
- expect_punct(p, '(', "'('");
- cg_label_place(p->cg, L_top);
- parse_expr(p);
- to_rvalue(p);
- expect_punct(p, ')', "')'");
- cg_branch_false(p->cg, L_end);
- p->cur_break = L_end;
- p->cur_continue = L_top;
- parse_stmt(p);
- p->cur_break = saved_break;
- p->cur_continue = saved_continue;
- cg_jump(p->cg, L_top);
- cg_label_place(p->cg, L_end);
-}
-
-static void parse_for_stmt(Parser* p) {
- CGLabel L_top = cg_label_new(p->cg);
- CGLabel L_step = cg_label_new(p->cg);
- CGLabel L_end = cg_label_new(p->cg);
- CGLabel saved_break = p->cur_break;
- CGLabel saved_continue = p->cur_continue;
-
- /* `for` introduces a block scope spanning init/cond/step/body so that
- * an init declaration `int i=...` is visible only inside the loop. */
- scope_push(p);
- expect_punct(p, '(', "'('");
-
- /* init: declaration | expr | ; */
- if (!accept_punct(p, ';')) {
- DeclSpecs specs;
- if (parse_decl_specs(p, &specs)) {
- parse_local_decl(p, &specs); /* consumes the trailing ';' */
- } else {
- parse_expr(p);
- cg_drop(p->cg);
- expect_punct(p, ';', "';'");
- }
- }
-
- cg_label_place(p->cg, L_top);
- /* cond: optional. Empty cond means "always true". */
- if (!is_punct(&p->cur, ';')) {
- parse_expr(p);
- to_rvalue(p);
- cg_branch_false(p->cg, L_end);
- }
- expect_punct(p, ';', "';'");
-
- /* Need to remember the step expression — but since this is single-pass,
- * we can't replay tokens. Standard trick: emit body before step by
- * jumping over the step on first entry, then placing the step where
- * the back-edge arrives. */
- {
- CGLabel L_body = cg_label_new(p->cg);
- cg_jump(p->cg, L_body);
- cg_label_place(p->cg, L_step);
- /* step: optional. */
- if (!is_punct(&p->cur, ')')) {
- parse_expr(p);
- cg_drop(p->cg);
- }
- cg_jump(p->cg, L_top);
- expect_punct(p, ')', "')'");
- cg_label_place(p->cg, L_body);
-
- p->cur_break = L_end;
- p->cur_continue = L_step;
- parse_stmt(p);
- p->cur_break = saved_break;
- p->cur_continue = saved_continue;
-
- cg_jump(p->cg, L_step);
- cg_label_place(p->cg, L_end);
- }
- scope_pop(p);
-}
-
-static void parse_return_stmt(Parser* p) {
- if (accept_punct(p, ';')) {
- cg_ret(p->cg, 0);
- return;
- }
- parse_expr(p);
- to_rvalue(p);
- expect_punct(p, ';', "';' after return value");
- cg_ret(p->cg, 1);
-}
-
-static void parse_break_stmt(Parser* p) {
- if (p->cur_break == 0) perr(p, "'break' outside of loop or switch");
- cg_jump(p->cg, p->cur_break);
- expect_punct(p, ';', "';' after break");
-}
-
-static void parse_continue_stmt(Parser* p) {
- if (p->cur_continue == 0) perr(p, "'continue' outside of loop");
- cg_jump(p->cg, p->cur_continue);
- expect_punct(p, ';', "';' after continue");
-}
-
-static void parse_do_stmt(Parser* p) {
- CGLabel L_top = cg_label_new(p->cg);
- CGLabel L_cond = cg_label_new(p->cg);
- CGLabel L_end = cg_label_new(p->cg);
- CGLabel saved_break = p->cur_break;
- CGLabel saved_continue = p->cur_continue;
- cg_label_place(p->cg, L_top);
- p->cur_break = L_end;
- p->cur_continue = L_cond;
- parse_stmt(p);
- p->cur_break = saved_break;
- p->cur_continue = saved_continue;
- cg_label_place(p->cg, L_cond);
- if (!is_kw(p, &p->cur, KW_WHILE)) perr(p, "expected 'while' after do-body");
- advance(p); /* while */
- expect_punct(p, '(', "'('");
- parse_expr(p);
- to_rvalue(p);
- expect_punct(p, ')', "')' after do-while condition");
- expect_punct(p, ';', "';' after do-while");
- cg_branch_true(p->cg, L_top);
- cg_label_place(p->cg, L_end);
-}
-
-static GotoLabel* label_get_or_create(Parser* p, Sym name, SrcLoc loc) {
- GotoLabel* gl;
- for (gl = p->goto_labels; gl; gl = gl->next) {
- if (gl->name == name) return gl;
- }
- gl = arena_new(p->c->tu, GotoLabel);
- if (!gl) perr(p, "out of memory in label_get_or_create");
- memset(gl, 0, sizeof *gl);
- gl->name = name;
- gl->label = cg_label_new(p->cg);
- gl->placed = 0;
- gl->first_use = loc;
- gl->next = p->goto_labels;
- p->goto_labels = gl;
- return gl;
-}
-
-static void parse_goto_stmt(Parser* p) {
- Sym name;
- SrcLoc loc;
- GotoLabel* gl;
- if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) {
- perr(p, "expected label name after 'goto'");
- }
- name = p->cur.v.ident;
- loc = tok_loc(&p->cur);
- advance(p);
- expect_punct(p, ';', "';' after goto");
- gl = label_get_or_create(p, name, loc);
- cg_jump(p->cg, gl->label);
-}
-
-/* `IDENT ':' STMT` — labeled statement. The IDENT lookup happens in the label
- * namespace, not the ordinary identifier scope. Caller has already verified
- * that cur is a non-keyword IDENT and the next token is ':'. */
-static void parse_label_stmt(Parser* p) {
- Sym name = p->cur.v.ident;
- SrcLoc loc = tok_loc(&p->cur);
- GotoLabel* gl;
- advance(p); /* IDENT */
- advance(p); /* ':' */
- gl = label_get_or_create(p, name, loc);
- if (gl->placed) perr(p, "duplicate label");
- gl->placed = 1;
- cg_label_place(p->cg, gl->label);
- parse_stmt(p);
-}
-
-static void parse_case_stmt(Parser* p) {
- i64 v;
- CGLabel L;
- CaseEntry* ce;
- SrcLoc loc = tok_loc(&p->cur);
- if (!p->cur_switch) perr(p, "'case' label not in switch statement");
- v = eval_const_int(p, loc);
- expect_punct(p, ':', "':' after case constant");
- L = cg_label_new(p->cg);
- cg_label_place(p->cg, L);
- ce = arena_new(p->c->tu, CaseEntry);
- if (!ce) perr(p, "out of memory in parse_case_stmt");
- ce->value = v;
- ce->label = L;
- ce->next = p->cur_switch->cases;
- p->cur_switch->cases = ce;
- parse_stmt(p);
-}
-
-static void parse_default_stmt(Parser* p) {
- CGLabel L;
- if (!p->cur_switch) perr(p, "'default' label not in switch statement");
- expect_punct(p, ':', "':' after default");
- if (p->cur_switch->default_label != 0) perr(p, "duplicate 'default' label");
- L = cg_label_new(p->cg);
- cg_label_place(p->cg, L);
- p->cur_switch->default_label = L;
- parse_stmt(p);
-}
-
-static void parse_switch_stmt(Parser* p) {
- /* Single-pass lowering: evaluate the controlling expression once into a
- * temp, jump over the body to the dispatch chain, parse the body (which
- * places case/default labels and records (value, label) pairs in
- * cur_switch), then emit a compare-and-branch chain that selects the
- * matching label. Falls through to L_end if no case/default matches. */
- CGLabel L_dispatch = cg_label_new(p->cg);
- CGLabel L_end = cg_label_new(p->cg);
- CGLabel saved_break = p->cur_break;
- SwitchCtx ctx;
- SwitchCtx* saved_switch = p->cur_switch;
- FrameSlotDesc fsd;
- const Type* vty;
- CaseEntry* it;
- CaseEntry* prev;
- CaseEntry* head;
-
- expect_punct(p, '(', "'('");
- parse_expr(p);
- to_rvalue(p);
- vty = cg_top_type(p->cg);
- if (!vty) vty = ty_int(p);
- expect_punct(p, ')', "')' after switch expression");
-
- memset(&ctx, 0, sizeof ctx);
- memset(&fsd, 0, sizeof fsd);
- fsd.type = vty;
- fsd.size = abi_sizeof(p->abi, vty);
- fsd.align = abi_alignof(p->abi, vty);
- fsd.kind = FS_LOCAL;
- ctx.value_slot = cg_local(p->cg, &fsd);
- ctx.value_type = vty;
- ctx.parent = saved_switch;
-
- /* Stash the value: stack has [rv]; want [lv, rv] then store. */
- cg_push_local_typed(p->cg, ctx.value_slot, vty);
- cg_swap(p->cg);
- cg_store(p->cg);
- cg_drop(p->cg);
-
- cg_jump(p->cg, L_dispatch);
-
- p->cur_switch = &ctx;
- p->cur_break = L_end;
- parse_stmt(p);
- p->cur_break = saved_break;
- p->cur_switch = saved_switch;
-
- /* Body fall-through exits the switch. */
- cg_jump(p->cg, L_end);
-
- /* Emit dispatch in source order — reverse the LIFO chain. */
- cg_label_place(p->cg, L_dispatch);
- prev = NULL;
- head = ctx.cases;
- while (head) {
- CaseEntry* nxt = head->next;
- head->next = prev;
- prev = head;
- head = nxt;
- }
- for (it = prev; it; it = it->next) {
- cg_push_local_typed(p->cg, ctx.value_slot, vty);
- cg_load(p->cg);
- cg_push_int(p->cg, it->value, vty);
- cg_cmp(p->cg, CMP_EQ);
- cg_branch_true(p->cg, it->label);
- }
- if (ctx.default_label) {
- cg_jump(p->cg, ctx.default_label);
- }
- cg_label_place(p->cg, L_end);
-}
-
-/* `_Static_assert ( constant-expression , string-literal ) ;` (§6.7.10).
- * The expression is evaluated at compile time; failure aborts parsing
- * with a diagnostic that includes the user's message. The C11 spec
- * requires the message; C2x makes it optional, but we follow C11 here. */
-static void parse_static_assert(Parser* p) {
- SrcLoc loc = tok_loc(&p->cur);
- i64 v;
- if (!accept_kw(p, KW_STATIC_ASSERT)) {
- perr(p, "expected _Static_assert");
- }
- expect_punct(p, '(', "'(' after _Static_assert");
- v = eval_const_int(p, tok_loc(&p->cur));
- expect_punct(p, ',', "',' separating _Static_assert args");
- if (p->cur.kind != TOK_STR) {
- perr(p, "expected string literal as _Static_assert message");
- }
- {
- Tok msg = p->cur;
- advance(p);
- expect_punct(p, ')', "')' after _Static_assert");
- expect_punct(p, ';', "';' after _Static_assert");
- if (!v) {
- size_t mlen = 0;
- const char* mstr = pool_str(p->pool, msg.spelling, &mlen);
- compiler_panic(p->c, loc, "static assertion failed: %.*s",
- (int)mlen, mstr ? mstr : "");
- }
- }
-}
-
-/* GNU inline-asm statement (Track A — frontend only).
- *
- * asm-stmt := ('asm'|'__asm__') ['volatile'|'__volatile__'] ['goto']
- * '(' template-string
- * [':' outputs [':' inputs [':' clobbers [':' labels]]]]
- * ')' ';'
- * outputs := output (',' output)*
- * output := ['[' name ']'] string-literal '(' lvalue-expr ')'
- * inputs := input (',' input)*
- * input := ['[' name ']'] string-literal '(' expr ')'
- * clobbers := string-literal (',' string-literal)*
- * labels := identifier (',' identifier)*
- *
- * The leading 'asm'/'__asm__' keyword has already been consumed by
- * parse_stmt. This function:
- * 1. Captures each output's lvalue address into a fresh frame slot, so
- * after cg_inline_asm pushes the result SValues we can store them
- * back into the user's lvalues.
- * 2. Pushes each input's rvalue onto the CG value stack in declaration
- * order — cg_inline_asm consumes them per its docstring contract.
- * 3. Calls cg_inline_asm. The cg layer (Track B) is still a panic
- * stub, so any test exercising this path will hit the panic until
- * Track B lands.
- * 4. After return, walks outputs in REVERSE order and stores the
- * top-of-stack SValue into the captured lvalue address.
- *
- * `volatile` / `__volatile__` are accepted and ignored (informational —
- * see ASM.md §9 / INLINEASM.md §9). `goto` is accepted at the keyword
- * level; the labels list, if present, is parsed and discarded — Track B
- * will reject `asm goto` inside cg_inline_asm per INLINEASM.md §1. */
-typedef struct AsmOutLValue {
- FrameSlot addr_slot; /* holds &lvalue (pointer) */
- const Type* ptr_ty; /* pointer-to type (for the slot/load) */
- const Type* val_ty; /* the lvalue's value type (for cg_deref) */
-} AsmOutLValue;
-
-static Sym parse_asm_operand_name(Parser* p) {
- /* `[ ident ]` already known to start with `[` per the caller. Returns
- * the interned ident Sym, or 0 if absent. */
- Sym name = 0;
- if (!is_punct(&p->cur, '[')) return 0;
- advance(p);
- if (p->cur.kind != TOK_IDENT) {
- perr(p, "expected identifier inside '[name]' on asm operand");
- }
- name = p->cur.v.ident;
- advance(p);
- expect_punct(p, ']', "']' after asm operand name");
- return name;
-}
-
-static const char* parse_asm_str(Parser* p, const char* what) {
- /* Adjacent string literals are already fused at the pp-pull boundary
- * (see fuse_string_lits). Decode the fused token to raw bytes and
- * intern the resulting C-string into the global pool. */
- u8* bytes;
- size_t nlen = 0;
- Sym s;
- Tok t;
- if (p->cur.kind != TOK_STR) {
- perr(p, "expected string literal in %s", what);
- }
- t = p->cur;
- advance(p);
- bytes = decode_string_literal(p, &t, &nlen);
- /* decode_string_literal includes a trailing NUL in nlen; strip it
- * before interning so the C-string view round-trips. */
- if (nlen > 0) nlen -= 1;
- s = pool_intern(p->pool, (const char*)bytes, nlen);
- p->c->env->heap->free(p->c->env->heap, bytes, 0);
- return pool_str(p->pool, s, NULL);
-}
-
-static void parse_asm_stmt(Parser* p) {
- /* The 'asm'/'__asm__' keyword was just consumed by parse_stmt. */
- const char* tmpl;
- AsmConstraint* outs = NULL;
- AsmConstraint* ins = NULL;
- Sym* clobbers = NULL;
- AsmOutLValue* out_lvs = NULL;
- u32 nout = 0, nin = 0, nclob = 0;
- u32 cap_out = 0, cap_in = 0, cap_clob = 0;
- int saw_goto = 0;
- SrcLoc loc = tok_loc(&p->cur);
-
- /* Optional `volatile` / `__volatile__` — informational, dropped. */
- for (;;) {
- if (accept_kw(p, KW_VOLATILE)) continue;
- if (p->cur.kind == TOK_IDENT && p->cur.v.ident == p->sym_volatile_alias) {
- advance(p);
- continue;
- }
- break;
- }
- /* Optional `goto`. */
- if (accept_kw(p, KW_GOTO)) saw_goto = 1;
-
- expect_punct(p, '(', "'(' after asm");
-
- /* Template string. */
- tmpl = parse_asm_str(p, "asm template");
-
- /* ---- outputs ---- */
- if (accept_punct(p, ':')) {
- if (!is_punct(&p->cur, ':') && !is_punct(&p->cur, ')')) {
- cap_out = 4;
- outs = (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, cap_out);
- out_lvs = (AsmOutLValue*)arena_array(p->c->tu, AsmOutLValue, cap_out);
- for (;;) {
- AsmConstraint c;
- AsmOutLValue lv;
- const Type* val_ty;
- const Type* ptr_ty;
- FrameSlotDesc fsd;
- FrameSlot slot;
- memset(&c, 0, sizeof c);
- memset(&lv, 0, sizeof lv);
- c.name = parse_asm_operand_name(p); /* 0 if absent */
- c.str = parse_asm_str(p, "asm output constraint");
- /* Per GCC: outputs require '=' or '+'. Track B will validate;
- * the parser stays lenient here. */
- if (c.str && c.str[0] == '+') c.dir = ASM_INOUT;
- else c.dir = ASM_OUT;
- expect_punct(p, '(', "'(' before asm output lvalue");
- /* parse_assign_expr leaves an lvalue (or rvalue) on the stack.
- * We need the address; cg_addr converts an lvalue → ptr rvalue.
- * Then we stash that pointer in a scratch frame slot so we can
- * store back into it after cg_inline_asm returns. */
- parse_assign_expr(p);
- val_ty = cg_top_type(p->cg);
- if (!val_ty) perr(p, "asm output: cannot determine lvalue type");
- c.type = val_ty;
- cg_addr(p->cg);
- ptr_ty = cg_top_type(p->cg);
- if (!ptr_ty) perr(p, "asm output: cannot take address");
- memset(&fsd, 0, sizeof fsd);
- fsd.type = ptr_ty;
- fsd.size = 8;
- fsd.align = 8;
- fsd.kind = FS_LOCAL;
- slot = cg_local(p->cg, &fsd);
- cg_push_local_typed(p->cg, slot, ptr_ty);
- cg_swap(p->cg);
- cg_store(p->cg);
- cg_drop(p->cg);
- lv.addr_slot = slot;
- lv.ptr_ty = ptr_ty;
- lv.val_ty = val_ty;
- expect_punct(p, ')', "')' after asm output lvalue");
- if (nout == cap_out) {
- u32 nc = cap_out * 2;
- AsmConstraint* nb =
- (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, nc);
- AsmOutLValue* nlv =
- (AsmOutLValue*)arena_array(p->c->tu, AsmOutLValue, nc);
- memcpy(nb, outs, sizeof(AsmConstraint) * nout);
- memcpy(nlv, out_lvs, sizeof(AsmOutLValue) * nout);
- outs = nb;
- out_lvs = nlv;
- cap_out = nc;
- }
- outs[nout] = c;
- out_lvs[nout] = lv;
- nout++;
- if (!accept_punct(p, ',')) break;
- }
- }
-
- /* ---- inputs ---- */
- if (accept_punct(p, ':')) {
- if (!is_punct(&p->cur, ':') && !is_punct(&p->cur, ')')) {
- cap_in = 4;
- ins = (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, cap_in);
- for (;;) {
- AsmConstraint c;
- memset(&c, 0, sizeof c);
- c.name = parse_asm_operand_name(p);
- c.str = parse_asm_str(p, "asm input constraint");
- c.dir = ASM_IN;
- expect_punct(p, '(', "'(' before asm input expression");
- /* Push input value onto the CG stack in declaration order.
- * cg_inline_asm consumes them per its docstring. */
- parse_assign_expr(p);
- to_rvalue(p);
- /* Capture the rvalue's C type for the binder. cg_top_type
- * is valid after to_rvalue while the value is still on top
- * of the CG stack. */
- c.type = cg_top_type(p->cg);
- expect_punct(p, ')', "')' after asm input expression");
- if (nin == cap_in) {
- u32 nc = cap_in * 2;
- AsmConstraint* nb =
- (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, nc);
- memcpy(nb, ins, sizeof(AsmConstraint) * nin);
- ins = nb;
- cap_in = nc;
- }
- ins[nin++] = c;
- if (!accept_punct(p, ',')) break;
- }
- }
-
- /* ---- clobbers ---- */
- if (accept_punct(p, ':')) {
- if (!is_punct(&p->cur, ':') && !is_punct(&p->cur, ')')) {
- cap_clob = 4;
- clobbers = (Sym*)arena_array(p->c->tu, Sym, cap_clob);
- for (;;) {
- const char* cstr;
- Sym cs;
- cstr = parse_asm_str(p, "asm clobber");
- cs = pool_intern_cstr(p->pool, cstr);
- if (nclob == cap_clob) {
- u32 nc = cap_clob * 2;
- Sym* nb = (Sym*)arena_array(p->c->tu, Sym, nc);
- memcpy(nb, clobbers, sizeof(Sym) * nclob);
- clobbers = nb;
- cap_clob = nc;
- }
- clobbers[nclob++] = cs;
- if (!accept_punct(p, ',')) break;
- }
- }
-
- /* ---- labels (asm goto) ---- */
- if (accept_punct(p, ':')) {
- /* Parse-only: consume identifier list. cg_inline_asm rejects
- * asm-goto in v1 per INLINEASM.md §1. */
- if (!is_punct(&p->cur, ')')) {
- for (;;) {
- if (p->cur.kind != TOK_IDENT) {
- perr(p, "expected label identifier in asm-goto label list");
- }
- advance(p);
- if (!accept_punct(p, ',')) break;
- }
- }
- }
- }
- }
- }
-
- expect_punct(p, ')', "')' to close asm");
- expect_punct(p, ';', "';' after asm statement");
-
- (void)saw_goto; /* parsed; cg layer rejects asm-goto in v1 */
-
- /* In-out ('+r') decomposition: for each ASM_INOUT output k, synthesize
- * a matching input "<k>" carrying the current value of the bound
- * lvalue. The binder's matching-constraint path copies the value into
- * the output reg before the asm runs; the existing store-back loop
- * below then writes the post-asm value back into the lvalue. GCC's
- * matching-digit syntax tops out at "9", so v1 supports up to 10
- * +r constraints per asm statement. */
- u32 ninout = 0;
- for (u32 i = 0; i < nout; ++i) {
- if (outs[i].dir == ASM_INOUT) ninout++;
- }
- if (ninout > 0) {
- static const char* const k_match_strs[10] = {
- "0", "1", "2", "3", "4", "5", "6", "7", "8", "9"};
- /* Grow ins[] to fit synthesized entries. */
- u32 need = nin + ninout;
- if (need > cap_in) {
- u32 nc = cap_in ? cap_in : 4;
- while (nc < need) nc *= 2;
- AsmConstraint* nb =
- (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, nc);
- if (nin) memcpy(nb, ins, sizeof(AsmConstraint) * nin);
- ins = nb;
- cap_in = nc;
- }
- for (u32 i = 0; i < nout; ++i) {
- if (outs[i].dir != ASM_INOUT) continue;
- if (i >= 10) {
- perr(p, "asm: '+r' constraint at output index >9 exceeds "
- "matching-digit syntax");
- }
- AsmOutLValue* lv = &out_lvs[i];
- /* Load lvalue's current value onto the cg stack:
- * [scratch ptr-slot lvalue] → [ptr rvalue] → [rc lvalue] → [rc value] */
- cg_push_local_typed(p->cg, lv->addr_slot, lv->ptr_ty);
- cg_load(p->cg);
- cg_deref(p->cg, lv->val_ty);
- cg_load(p->cg);
- AsmConstraint mc;
- memset(&mc, 0, sizeof mc);
- mc.str = k_match_strs[i];
- mc.dir = ASM_IN;
- mc.type = lv->val_ty;
- ins[nin++] = mc;
- }
- }
-
- cg_set_loc(p->cg, loc);
- cg_inline_asm(p->cg, tmpl, outs, nout, ins, nin, clobbers, nclob);
-
- /* For each output (in reverse so the last output is on top first),
- * store the SValue cg_inline_asm pushed back into the captured lvalue
- * via the address we stashed. cg_inline_asm is currently a panic stub
- * (Track B), so this loop is unreachable until Track B lands. */
- if (nout > 0) {
- u32 i;
- for (i = nout; i-- > 0;) {
- AsmOutLValue* lv = &out_lvs[i];
- /* Stack: [..., out_val]. Push the address (load from slot, then
- * deref to make it an lvalue), swap, store, drop. */
- cg_push_local_typed(p->cg, lv->addr_slot, lv->ptr_ty);
- cg_load(p->cg);
- cg_deref(p->cg, lv->val_ty);
- cg_swap(p->cg);
- cg_store(p->cg);
- cg_drop(p->cg);
- }
- }
-}
-
-static void parse_compound_stmt(Parser* p) {
- expect_punct(p, '{', "'{'");
- scope_push(p);
- while (!is_punct(&p->cur, '}') && p->cur.kind != TOK_EOF) {
- /* Drain stray newlines & pp-hash artifacts. (PP normally consumes
- * these, but we tolerate them here as a no-op safety net.) */
- if (p->cur.kind == TOK_NEWLINE || is_pp_hash(&p->cur)) {
- advance(p);
- continue;
- }
- if (is_kw(p, &p->cur, KW_STATIC_ASSERT)) {
- parse_static_assert(p);
- continue;
- }
- {
- DeclSpecs specs;
- Tok save_tok = p->cur; /* nothing to roll back yet — accept reused below */
- (void)save_tok;
- if (parse_decl_specs(p, &specs)) {
- parse_local_decl(p, &specs);
- } else {
- parse_stmt(p);
- }
- }
- }
- expect_punct(p, '}', "'}'");
- scope_pop(p);
-}
-
-static void parse_stmt(Parser* p) {
- /* Each statement starts from an empty value stack; recycle scratch
- * registers so a function body with many sequential reg-allocating
- * operations isn't bounded by the backend's fixed scratch window. */
- cg_set_loc(p->cg, tok_loc(&p->cur));
- /* Labeled statement: `IDENT ':' STMT`. The IDENT must not be a keyword;
- * peek1 disambiguates the label form from an expression statement that
- * happens to start with an identifier. */
- if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) {
- Tok n = peek1(p);
- if (is_punct(&n, ':')) {
- parse_label_stmt(p);
- return;
- }
- }
- if (is_punct(&p->cur, '{')) {
- parse_compound_stmt(p);
- return;
- }
- if (is_punct(&p->cur, ';')) {
- advance(p);
- return;
- }
- if (is_kw(p, &p->cur, KW_IF)) {
- advance(p);
- parse_if_stmt(p);
- return;
- }
- if (is_kw(p, &p->cur, KW_WHILE)) {
- advance(p);
- parse_while_stmt(p);
- return;
- }
- if (is_kw(p, &p->cur, KW_FOR)) {
- advance(p);
- parse_for_stmt(p);
- return;
- }
- if (is_kw(p, &p->cur, KW_DO)) {
- advance(p);
- parse_do_stmt(p);
- return;
- }
- if (is_kw(p, &p->cur, KW_RETURN)) {
- advance(p);
- parse_return_stmt(p);
- return;
- }
- if (is_kw(p, &p->cur, KW_BREAK)) {
- advance(p);
- parse_break_stmt(p);
- return;
- }
- if (is_kw(p, &p->cur, KW_CONTINUE)) {
- advance(p);
- parse_continue_stmt(p);
- return;
- }
- if (is_kw(p, &p->cur, KW_GOTO)) {
- advance(p);
- parse_goto_stmt(p);
- return;
- }
- if (is_kw(p, &p->cur, KW_SWITCH)) {
- advance(p);
- parse_switch_stmt(p);
- return;
- }
- if (is_kw(p, &p->cur, KW_CASE)) {
- advance(p);
- parse_case_stmt(p);
- return;
- }
- if (is_kw(p, &p->cur, KW_DEFAULT)) {
- advance(p);
- parse_default_stmt(p);
- return;
- }
- if (is_kw(p, &p->cur, KW_ASM) || is_kw(p, &p->cur, KW_BUILTIN_ASM)) {
- advance(p);
- parse_asm_stmt(p);
- return;
- }
- /* Expression statement. */
- parse_expr(p);
- cg_drop(p->cg);
- expect_punct(p, ';', "';' after expression");
-}
-
-/* ============================================================
* External (top-level) declarations
* ============================================================ */
-/* Parse a parameter-type-list. Returns the parameter type array and counts
- * via out-pointers; `*variadic_out` is set if the list ends in `, ...`.
- *
- * Forms accepted:
- * `(void)` — zero named params
- * `()` — old-style "unspecified args"; treated as zero
- * `(T1, T2, ...)` — named or abstract params, possibly trailing ellipsis
- *
- * Per §6.7.6.3, a parameter declared as `T x[N]` is rewritten to `T *x` (and
- * `T x()` to `T (*x)()`); the §6.7.6.3 ¶7 "[static N]" form is a hint to the
- * caller that the pointer points at ≥N elements — semantically still `T*`. */
-static void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out,
- u8* variadic_out) {
+void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out,
+ u8* variadic_out) {
ParamInfo* infos;
u32 cap = 4;
u32 n = 0;
@@ -6289,11 +606,11 @@ static void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out,
*nparams_out = 0;
if (is_punct(&p->cur, ')')) {
- return; /* `()` — no params recorded */
+ return;
}
if (is_kw(p, &p->cur, KW_VOID)) {
- Tok n = peek1(p);
- if (is_punct(&n, ')')) {
+ Tok n2 = peek1(p);
+ if (is_punct(&n2, ')')) {
advance(p); /* `void` */
return; /* `(void)` */
}
@@ -6316,7 +633,6 @@ static void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out,
pty = parse_declarator_full(p, specs.type, /*allow_abstract=*/1, &pname,
&ploc);
p->in_param_decl--;
- /* Adjust array/function parameter to pointer per §6.7.6.3. */
if (pty && pty->kind == TY_ARRAY) {
pty = type_ptr(p->pool, pty->arr.elem);
} else if (pty && pty->kind == TY_FUNC) {
@@ -6338,15 +654,6 @@ static void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out,
*nparams_out = (u16)n;
}
-/* Resolve or mint the ObjSymId for a function declaration. If the same
- * function name was seen before in file scope (forward prototype, prior
- * definition), reuse its symbol so the linker sees one definition.
- *
- * `dattrs` is the per-declarator attribute list (between `)` and `{`/`;`);
- * combined with `specs->attrs` it feeds attr_list_to_decl so DF_WEAK /
- * visibility / section / noreturn / alias_target land on the Decl before
- * decl_declare mints the ObjSym. The out-params let parse_function_body
- * propagate section_id and noreturn into CGFuncDesc. */
static SymEntry* declare_function(Parser* p, Sym fname, const Type* fn_ty,
const DeclSpecs* specs, SrcLoc fname_loc,
const Attr* dattrs,
@@ -6358,11 +665,6 @@ static SymEntry* declare_function(Parser* p, Sym fname, const Type* fn_ty,
if (out_alias_target) *out_alias_target = 0;
SymEntry* existing = scope_lookup(p, fname);
if (existing && existing->kind == SEK_FUNC) {
- /* Compatible-types check is Phase 10 territory; for v1 we trust the
- * declarations agree. Returning the existing entry lets the body
- * defs reuse the prior obj_sym. Attributes on a redeclaration apply
- * only via the per-call decode here; the existing ObjSym already has
- * its bind/visibility chosen at first sight. */
Decl tmp;
memset(&tmp, 0, sizeof tmp);
attr_list_to_decl(p->c, p->decls, specs->attrs, &tmp);
@@ -6398,10 +700,6 @@ static SymEntry* declare_function(Parser* p, Sym fname, const Type* fn_ty,
}
}
-/* Drive cg through a full function definition: build CGFuncDesc with the
- * already-resolved symbol and ABI info, open a parameter scope, allocate
- * FS_PARAM slots for each named param, dispatch cg_param, then parse the
- * compound body. The `infos` array is the parser's per-param state. */
static void parse_function_body(Parser* p, ObjSymId fsym, const Type* fn_ty,
const ABIFuncInfo* abi, const ParamInfo* infos,
u16 nparams, SrcLoc fname_loc,
@@ -6411,9 +709,6 @@ static void parse_function_body(Parser* p, ObjSymId fsym, const Type* fn_ty,
memset(&fd, 0, sizeof fd);
fd.sym = fsym;
- /* Phase 2: __attribute__((section)) on a function overrides the default
- * .text placement. Falls back to the parser's default text section when
- * no attribute named one. */
fd.text_section_id =
(section_id != OBJ_SEC_NONE) ? section_id : p->text_sec;
fd.group_id = OBJ_GROUP_NONE;
@@ -6422,8 +717,6 @@ static void parse_function_body(Parser* p, ObjSymId fsym, const Type* fn_ty,
fd.params = NULL;
fd.nparams = nparams;
fd.loc = fname_loc;
- /* Propagate _Noreturn / __attribute__((noreturn)) to CG. Backends may
- * elide the trailing epilogue; v1 backends ignore the bit. */
if (decl_flags & DF_NORETURN) fd.flags |= CGFD_NORETURN;
if (nparams) {
@@ -6433,11 +726,8 @@ static void parse_function_body(Parser* p, ObjSymId fsym, const Type* fn_ty,
pds[i].index = i;
pds[i].name = infos[i].name;
pds[i].type = infos[i].type;
- pds[i].slot = FRAME_SLOT_NONE; /* filled below */
+ pds[i].slot = FRAME_SLOT_NONE;
pds[i].abi = &abi->params[i];
- /* The aarch64 backend reads parts from `pds[i].abi->parts` directly;
- * `incoming` is the materialized CGABIPart slot used by ABIs that
- * pre-stage values. Leave NULL until a backend wires it up. */
pds[i].incoming = NULL;
pds[i].nincoming = 0;
pds[i].loc = infos[i].loc;
@@ -6446,8 +736,6 @@ static void parse_function_body(Parser* p, ObjSymId fsym, const Type* fn_ty,
}
scope_push(p); /* parameter scope */
- /* Per-function label namespace and switch context — both are saved here
- * for hygiene even though C forbids nested function definitions. */
GotoLabel* saved_goto_labels = p->goto_labels;
SwitchCtx* saved_switch = p->cur_switch;
p->goto_labels = NULL;
@@ -6455,7 +743,6 @@ static void parse_function_body(Parser* p, ObjSymId fsym, const Type* fn_ty,
cg_set_loc(p->cg, fname_loc);
cg_func_begin(p->cg, &fd);
- /* Allocate FS_PARAM slots and dispatch cg_param in declaration order. */
for (u16 i = 0; i < nparams; ++i) {
FrameSlotDesc fsd;
FrameSlot s;
@@ -6478,14 +765,6 @@ static void parse_function_body(Parser* p, ObjSymId fsym, const Type* fn_ty,
}
parse_compound_stmt(p);
- /* Implicit fall-through return: emit a return so the function's epilogue
- * always has a tail to chain into. For non-void scalar returns this
- * returns a zero value, which is undefined behavior at the language
- * level but a useful safety belt against trailing-fall-through.
- * Aggregate returns can't synthesize a typed zero rvalue (no scalar
- * source), so emit a bare ret — the epilogue still runs and the
- * return value is whatever was last written into the destination
- * (UB by the same token). */
if (fn_ty->fn.ret && fn_ty->fn.ret->kind != TY_VOID &&
fn_ty->fn.ret->kind != TY_STRUCT && fn_ty->fn.ret->kind != TY_UNION) {
cg_push_int(p->cg, 0, fn_ty->fn.ret);
@@ -6493,7 +772,6 @@ static void parse_function_body(Parser* p, ObjSymId fsym, const Type* fn_ty,
} else {
cg_ret(p->cg, 0);
}
- /* All goto targets must have been placed by some `name:` in the body. */
for (GotoLabel* gl = p->goto_labels; gl; gl = gl->next) {
if (!gl->placed) {
compiler_panic(p->c, gl->first_use, "goto to undefined label");
@@ -6505,9 +783,7 @@ static void parse_function_body(Parser* p, ObjSymId fsym, const Type* fn_ty,
scope_pop(p);
}
-/* Parse one external declaration: function definition, function prototype,
- * or (deferred) global object declaration. The declarator is consumed by
- * parse_declarator before we know whether a body or `;` follows. */
+/* Parse one external declaration. */
static void parse_external_decl(Parser* p) {
DeclSpecs specs;
Sym name;
@@ -6518,14 +794,8 @@ static void parse_external_decl(Parser* p) {
perr(p, "expected declaration");
}
- /* Tag-only declarations at file scope: `struct S;`, `enum E { ... };`,
- * etc. The decl-specs registered the tag; nothing else to do. */
if (accept_punct(p, ';')) return;
- /* `typedef` at file scope: bind one-or-more declarator names as
- * SEK_TYPEDEF in the current (file) scope. Goes through
- * parse_declarator_full so compound targets (`typedef int (*FP)(int)`,
- * `typedef int A[3]`) lower correctly. */
if (specs.storage == DS_TYPEDEF) {
for (;;) {
Sym tname = 0;
@@ -6544,27 +814,17 @@ static void parse_external_decl(Parser* p) {
return;
}
- /* Parse the declarator's pointer prefix and IDENT. Function and array
- * declarator suffixes are recognized inline below. */
base_ty = parse_pointer_layer(p, specs.type);
- if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) {
+ if (p->cur.kind != TOK_IDENT || ident_kw_inline(p, p->cur.v.ident) != KW_NONE) {
perr(p, "expected declarator");
}
name = p->cur.v.ident;
loc = tok_loc(&p->cur);
advance(p);
- /* Per-declarator attrs collected from positions between the
- * declarator-id and `=`/`,`/`;` (plus, for functions, between `)`
- * and `{`/`;`). Chained in source order; attached to the SymEntry
- * below so Phase 2 can read used/section/noreturn/alias/weak/
- * visibility/aligned without revisiting decl-spec attrs. */
Attr* dattrs = NULL;
parse_attrs_into(p, &dattrs);
- /* Array suffix(es) after the name pin this as an object declaration —
- * e.g. `static int g[3] = {...}`. Apply each suffix in order so the
- * resulting type is the array-of-T we'll allocate storage for. */
while (is_punct(&p->cur, '[')) {
DeclSuffix s;
if (!parse_decl_suffix(p, &s)) break;
@@ -6574,8 +834,6 @@ static void parse_external_decl(Parser* p) {
parse_attrs_into(p, &dattrs);
if (is_punct(&p->cur, '(')) {
- /* Function declaration or definition: build the type from the param
- * list, then dispatch on `{` (definition) vs `;` (prototype). */
ParamInfo* infos = NULL;
u16 nparams = 0;
u8 variadic = 0;
@@ -6587,7 +845,6 @@ static void parse_external_decl(Parser* p) {
advance(p); /* '(' */
parse_param_list(p, &infos, &nparams, &variadic);
expect_punct(p, ')', "')' after parameter list");
- /* Attributes between `)` and `{`/`;` (e.g. `noreturn`, `section`). */
parse_attrs_into(p, &dattrs);
if (nparams) {
@@ -6611,11 +868,6 @@ static void parse_external_decl(Parser* p) {
return;
}
if (accept_punct(p, ';')) {
- /* Function prototype. If it carries `__attribute__((alias("t")))`,
- * resolve `t` now and define this symbol as a copy of t's binding.
- * Cross-TU aliases aren't in scope: the target must already be
- * defined in this TU (matches the §"Alias resolution" note in
- * doc/ATTRIBUTE.md). */
if (fn_alias_target != 0) {
SymEntry* te = scope_lookup(p, fn_alias_target);
if (!te) {
@@ -6637,13 +889,12 @@ static void parse_external_decl(Parser* p) {
obj_symbol_define(ob, fent->v.sym, ts->section_id, ts->value,
ts->size);
}
- return; /* prototype only */
+ return;
}
perr(p, "expected '{' or ';' after function declarator");
}
- /* Global object declaration: `int g;`, `int g = 7;`, `extern int g;`,
- * `static T g = ...;`, `const T g = ...;`. */
+ /* Global object declaration. */
for (;;) {
int has_init = is_punct(&p->cur, '=');
int is_pure_extern = (specs.storage == DS_EXTERN) && !has_init;
@@ -6652,12 +903,6 @@ static void parse_external_decl(Parser* p) {
SymEntry* e = NULL;
if (existing && existing->kind == SEK_GLOBAL) {
- /* Redeclaration: reuse the prior ObjSymId so the linker sees one
- * symbol. Compatible-types checks live in Phase 10.
- * §6.2.7 composite type: if either declarator gives a complete array
- * size where the other is incomplete, the composite is the complete
- * one — propagate that to the SymEntry so later uses (e.g. sizeof)
- * see the size. */
sym = existing->v.sym;
e = existing;
if (e->type && base_ty && e->type->kind == TY_ARRAY &&
@@ -6677,10 +922,6 @@ static void parse_external_decl(Parser* p) {
decl_in.storage = DS_STATIC;
decl_in.linkage = DL_INTERNAL;
} else {
- /* File-scope objects without an explicit storage class still have
- * static storage duration and external linkage (§6.2.2 ¶5,
- * §6.2.4 ¶3). Storing DS_EXTERN drives decl_declare to mint an
- * obj_sym; DS_AUTO is reserved for block-scope autos. */
decl_in.storage = DS_EXTERN;
decl_in.linkage = DL_EXTERNAL;
}
@@ -6695,8 +936,6 @@ static void parse_external_decl(Parser* p) {
}
attr_list_append(&e->attrs, dattrs);
- /* The effective alignment is the max of _Alignas and any
- * __attribute__((aligned(N))) seen in decl-specs or per-declarator. */
u32 attr_align = attrs_pick_aligned(specs.attrs);
{
u32 a2 = attrs_pick_aligned(dattrs);
@@ -6706,9 +945,6 @@ static void parse_external_decl(Parser* p) {
if (has_init) {
advance(p); /* '=' */
- /* `T name[] = {...}` at file scope: peek the initializer to deduce
- * the element count, then carry the completed type into the static
- * emit + SymEntry. Mirrors the block-scope path. */
if (base_ty && base_ty->kind == TY_ARRAY && base_ty->arr.incomplete) {
const Type* completed = complete_incomplete_array(p, base_ty);
if (completed != base_ty) {
@@ -6719,27 +955,20 @@ static void parse_external_decl(Parser* p) {
define_static_object(p, sym, base_ty, specs.quals, /*has_init=*/1,
loc, align_eff);
} else if (!is_pure_extern) {
- /* Tentative def: emit a BSS reservation now. End-of-TU coalescing of
- * multiple tentative defs into one is a Phase 4 follow-up; the
- * Phase 4 corpus only has a single tentative def per TU. */
define_static_object(p, sym, base_ty, specs.quals, /*has_init=*/0,
loc, align_eff);
}
if (!accept_punct(p, ',')) break;
- /* Next declarator: parse pointer prefix + IDENT, then loop. */
base_ty = parse_pointer_layer(p, specs.type);
- if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) {
+ if (p->cur.kind != TOK_IDENT || ident_kw_inline(p, p->cur.v.ident) != KW_NONE) {
perr(p, "expected declarator after ','");
}
name = p->cur.v.ident;
loc = tok_loc(&p->cur);
advance(p);
- /* Reset per-declarator attrs for this iteration; collect post-IDENT
- * and post-suffix attrs (e.g. `int a, b __attribute__((aligned(4)))`). */
dattrs = NULL;
parse_attrs_into(p, &dattrs);
- /* Optional array suffix on a global declarator (e.g. `int g[3]`). */
while (is_punct(&p->cur, '[')) {
DeclSuffix s;
if (!parse_decl_suffix(p, &s)) break;
@@ -6750,8 +979,6 @@ static void parse_external_decl(Parser* p) {
expect_punct(p, ';', "';' after global declaration");
}
-static void parse_static_assert(Parser* p);
-
static void parse_translation_unit(Parser* p) {
while (p->cur.kind != TOK_EOF) {
if (p->cur.kind == TOK_NEWLINE || is_pp_hash(&p->cur)) {
@@ -6784,13 +1011,10 @@ void parse_c(Compiler* c, Pp* pp, DeclTable* decls, CG* cg, Debug* debug) {
p.abi = c->abi;
p.pool = c->global;
- /* Intern keyword spellings. The lexer doesn't know about keywords; this
- * is the canonical bucketization site (DESIGN §5.1 / lex.h §6.4). */
for (i = (CKw)1; i < KW_COUNT; ++i) {
p.kw_sym[i] = pool_intern_cstr(p.pool, kw_names[i]);
}
- /* Builtin / atomic spellings — Phase 9. */
p.sym_b_alloca = pool_intern_cstr(p.pool, "__builtin_alloca");
p.sym_b_ctz = pool_intern_cstr(p.pool, "__builtin_ctz");
p.sym_b_expect = pool_intern_cstr(p.pool, "__builtin_expect");
@@ -6815,18 +1039,12 @@ void parse_c(Compiler* c, Pp* pp, DeclTable* decls, CG* cg, Debug* debug) {
p.sym_a_thread_fence = pool_intern_cstr(p.pool, "__atomic_thread_fence");
p.sym_a_signal_fence = pool_intern_cstr(p.pool, "__atomic_signal_fence");
- /* File scope. */
p.scope = scope_new(&p, NULL);
- /* Default text section. -ffunction-sections / explicit attribute(section)
- * cases will replace this per-function; the spine uses ".text". */
text_name = pool_intern_cstr(p.pool, ".text");
p.text_sec = obj_section(decl_obj(decls), text_name, SEC_TEXT,
SF_ALLOC | SF_EXEC, 4u);
- /* Pull the first token. PP yields preprocessed C tokens; directives
- * have already been consumed. fetch_tok performs adjacent-string-literal
- * fusion (C11 §6.4.5 ¶5) before tokens reach the parse productions. */
p.cur = fetch_tok(&p);
parse_translation_unit(&p);
diff --git a/src/parse/parse_expr.c b/src/parse/parse_expr.c
@@ -0,0 +1,1795 @@
+/* parse_expr.c — precedence climbing, unary/primary, literal decoding,
+ * constant evaluation. */
+
+#include "parse/parse_priv.h"
+
+static const Type* ty_int(Parser* p) { return type_prim(p->pool, TY_INT); }
+static const Type* ty_size_t(Parser* p) {
+ return abi_size_type(p->abi, p->pool);
+}
+
+
+static CKw ident_kw(const Parser* p, Sym name) {
+ return ident_kw_inline(p, name);
+}
+
+static int accept_kw(Parser* p, CKw k) {
+ if (is_kw(p, &p->cur, k)) {
+ advance(p);
+ return 1;
+ }
+ return 0;
+}
+
+/* ============================================================
+ * Literal parsing
+ * ============================================================ */
+
+i64 parse_int_literal(Parser* p, const Tok* t) {
+ size_t len = 0;
+ const char* s = pool_str(p->pool, t->spelling, &len);
+ size_t i = 0;
+ i64 base = 10;
+ i64 acc = 0;
+ if (!s) perr(p, "bad numeric literal");
+ if (len >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) {
+ base = 16;
+ i = 2;
+ } else if (len >= 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B')) {
+ base = 2;
+ i = 2;
+ } else if (len >= 1 && s[0] == '0') {
+ base = 8;
+ i = 1;
+ }
+ for (; i < len; ++i) {
+ int c = (unsigned char)s[i];
+ int dv;
+ if (c == 'u' || c == 'U' || c == 'l' || c == 'L') break;
+ if (c >= '0' && c <= '9')
+ dv = c - '0';
+ else if (c >= 'a' && c <= 'f')
+ dv = c - 'a' + 10;
+ else if (c >= 'A' && c <= 'F')
+ dv = c - 'A' + 10;
+ else
+ perr(p, "bad digit in numeric literal");
+ if (dv >= base) perr(p, "digit out of range for base");
+ acc = acc * base + dv;
+ }
+ return acc;
+}
+
+static const Type* int_literal_type(Parser* p, const Tok* t) {
+ int u = (t->flags & TF_INT_U) != 0;
+ int l = (t->flags & TF_INT_L) != 0;
+ int ll = (t->flags & TF_INT_LL) != 0;
+ TypeKind k;
+ if (ll) k = u ? TY_ULLONG : TY_LLONG;
+ else if (l) k = u ? TY_ULONG : TY_LONG;
+ else if (u) k = TY_UINT;
+ else k = TY_INT;
+ return type_prim(p->pool, k);
+}
+
+static double parse_float_literal(Parser* p, const Tok* t) {
+ size_t len = 0;
+ const char* s = pool_str(p->pool, t->spelling, &len);
+ size_t i = 0;
+ int is_hex = 0;
+ double v = 0.0;
+ int exp = 0;
+ int dec_exp = 0;
+ int frac_seen = 0;
+ if (!s) perr(p, "bad float literal");
+ if (len >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) {
+ is_hex = 1;
+ i = 2;
+ }
+ while (i < len) {
+ int c = (unsigned char)s[i];
+ int dv;
+ if (c == '.' || c == 'e' || c == 'E' || c == 'p' || c == 'P' ||
+ c == 'f' || c == 'F' || c == 'l' || c == 'L')
+ break;
+ if (c >= '0' && c <= '9') dv = c - '0';
+ else if (is_hex && c >= 'a' && c <= 'f') dv = c - 'a' + 10;
+ else if (is_hex && c >= 'A' && c <= 'F') dv = c - 'A' + 10;
+ else perr(p, "bad digit in float literal");
+ v = v * (is_hex ? 16.0 : 10.0) + (double)dv;
+ i++;
+ }
+ if (i < len && s[i] == '.') {
+ i++;
+ while (i < len) {
+ int c = (unsigned char)s[i];
+ int dv;
+ if (c == 'e' || c == 'E' || c == 'p' || c == 'P' ||
+ c == 'f' || c == 'F' || c == 'l' || c == 'L')
+ break;
+ if (c >= '0' && c <= '9') dv = c - '0';
+ else if (is_hex && c >= 'a' && c <= 'f') dv = c - 'a' + 10;
+ else if (is_hex && c >= 'A' && c <= 'F') dv = c - 'A' + 10;
+ else perr(p, "bad digit in float literal");
+ v = v * (is_hex ? 16.0 : 10.0) + (double)dv;
+ exp -= 1;
+ frac_seen = 1;
+ i++;
+ }
+ }
+ (void)frac_seen;
+ if (i < len && (s[i] == 'e' || s[i] == 'E' || s[i] == 'p' || s[i] == 'P')) {
+ int neg = 0;
+ int n = 0;
+ int hex_exp = (s[i] == 'p' || s[i] == 'P');
+ i++;
+ if (i < len && (s[i] == '+' || s[i] == '-')) {
+ if (s[i] == '-') neg = 1;
+ i++;
+ }
+ while (i < len) {
+ int c = (unsigned char)s[i];
+ if (c < '0' || c > '9') break;
+ n = n * 10 + (c - '0');
+ i++;
+ }
+ dec_exp = neg ? -n : n;
+ if (hex_exp) {
+ dec_exp += exp * 4;
+ exp = 0;
+ }
+ }
+ while (exp < 0) { v /= (is_hex ? 16.0 : 10.0); exp++; }
+ while (exp > 0) { v *= (is_hex ? 16.0 : 10.0); exp--; }
+ if (is_hex) {
+ while (dec_exp < 0) { v /= 2.0; dec_exp++; }
+ while (dec_exp > 0) { v *= 2.0; dec_exp--; }
+ } else {
+ while (dec_exp < 0) { v /= 10.0; dec_exp++; }
+ while (dec_exp > 0) { v *= 10.0; dec_exp--; }
+ }
+ return v;
+}
+
+static const Type* float_literal_type(Parser* p, const Tok* t) {
+ if (t->flags & TF_FLT_F) return type_prim(p->pool, TY_FLOAT);
+ if (t->flags & TF_FLT_L) return type_prim(p->pool, TY_LDOUBLE);
+ return type_prim(p->pool, TY_DOUBLE);
+}
+
+static i64 decode_one_char(Parser* p, const char* s, size_t len, size_t* pi,
+ SrcLoc loc) {
+ size_t i = *pi;
+ i64 v;
+ int c;
+ if (i >= len) compiler_panic(p->c, loc, "truncated character literal");
+ if (s[i] != '\\') {
+ v = (unsigned char)s[i++];
+ *pi = i;
+ return v;
+ }
+ i++;
+ if (i >= len) compiler_panic(p->c, loc, "trailing '\\' in literal");
+ c = (unsigned char)s[i++];
+ switch (c) {
+ case 'n': v = '\n'; break;
+ case 't': v = '\t'; break;
+ case 'r': v = '\r'; break;
+ case 'b': v = '\b'; break;
+ case 'f': v = '\f'; break;
+ case 'v': v = '\v'; break;
+ case 'a': v = '\a'; break;
+ case '\\': v = '\\'; break;
+ case '\'': v = '\''; break;
+ case '"': v = '"'; break;
+ case '?': v = '?'; break;
+ case 'x': {
+ i64 hex = 0;
+ int any = 0;
+ while (i < len) {
+ int d = (unsigned char)s[i];
+ int dv;
+ if (d >= '0' && d <= '9') dv = d - '0';
+ else if (d >= 'a' && d <= 'f') dv = d - 'a' + 10;
+ else if (d >= 'A' && d <= 'F') dv = d - 'A' + 10;
+ else break;
+ hex = hex * 16 + dv;
+ any = 1;
+ i++;
+ }
+ if (!any) compiler_panic(p->c, loc, "\\x with no hex digits");
+ v = hex & 0xff;
+ break;
+ }
+ default:
+ if (c >= '0' && c <= '7') {
+ i64 oct = c - '0';
+ int n = 1;
+ while (n < 3 && i < len && s[i] >= '0' && s[i] <= '7') {
+ oct = oct * 8 + (s[i] - '0');
+ i++;
+ n++;
+ }
+ v = oct & 0xff;
+ } else {
+ v = c;
+ }
+ break;
+ }
+ *pi = i;
+ return v;
+}
+
+i64 decode_char_literal(Parser* p, const Tok* t) {
+ size_t len = 0;
+ const char* s = pool_str(p->pool, t->spelling, &len);
+ size_t i = 0;
+ i64 v;
+ if (!s) perr(p, "bad char literal");
+ if (t->flags & TF_STR_U8) i = 2;
+ else if (t->flags & (TF_STR_WIDE | TF_STR_U16 | TF_STR_U32)) i = 1;
+ if (i >= len || s[i] != '\'') perr(p, "malformed character literal");
+ i++;
+ if (i >= len || s[i] == '\'') perr(p, "empty character literal");
+ v = decode_one_char(p, s, len, &i, t->loc);
+ if (i >= len || s[i] != '\'') {
+ perr(p, "multi-character constants are not supported");
+ }
+ return v;
+}
+
+u8* decode_string_literal(Parser* p, const Tok* t, size_t* nlen_out) {
+ size_t len = 0;
+ const char* s = pool_str(p->pool, t->spelling, &len);
+ size_t i = 0;
+ Heap* h = p->c->env->heap;
+ u8* buf;
+ size_t k = 0;
+ if (!s) perr(p, "bad string literal");
+ if (t->flags & TF_STR_U8) i = 2;
+ else if (t->flags & (TF_STR_WIDE | TF_STR_U16 | TF_STR_U32)) i = 1;
+ if (i >= len || s[i] != '"') perr(p, "malformed string literal");
+ i++;
+ buf = (u8*)h->alloc(h, len + 1, 1);
+ if (!buf) perr(p, "out of memory in string literal");
+ while (i < len && s[i] != '"') {
+ i64 ch = decode_one_char(p, s, len, &i, t->loc);
+ buf[k++] = (u8)ch;
+ }
+ buf[k++] = 0;
+ *nlen_out = k;
+ return buf;
+}
+
+ObjSymId emit_string_to_rodata(Parser* p, const u8* bytes, size_t n) {
+ ObjBuilder* ob = decl_obj(p->decls);
+ Sym secname = pool_intern_cstr(p->pool, ".rodata");
+ ObjSecId sec = obj_section(ob, secname, SEC_RODATA, SF_ALLOC, 1u);
+ u32 base = obj_pos(ob, sec);
+ Sym lname;
+ ObjSymId sym;
+ char namebuf[32];
+ static u32 counter;
+ int wlen = 0;
+ u32 id = ++counter;
+ namebuf[wlen++] = '.';
+ namebuf[wlen++] = 'L';
+ namebuf[wlen++] = 'C';
+ {
+ char digits[12];
+ int dn = 0;
+ if (id == 0) digits[dn++] = '0';
+ while (id) {
+ digits[dn++] = (char)('0' + (id % 10));
+ id /= 10;
+ }
+ while (dn) namebuf[wlen++] = digits[--dn];
+ }
+ namebuf[wlen] = 0;
+ lname = pool_intern(p->pool, namebuf, (size_t)wlen);
+ sym = obj_symbol(ob, lname, SB_LOCAL, SK_OBJ, sec, base, n);
+ {
+ u8* dst = obj_reserve(ob, sec, n);
+ if (dst) memcpy(dst, bytes, n);
+ }
+ return sym;
+}
+
+/* ============================================================
+ * Constant expression evaluator (cexpr_*)
+ * ============================================================ */
+
+static i64 cexpr_unary(Parser* p, SrcLoc loc);
+
+static i64 cexpr_mul(Parser* p, SrcLoc loc) {
+ i64 v = cexpr_unary(p, loc);
+ for (;;) {
+ if (accept_punct(p, '*')) v = v * cexpr_unary(p, loc);
+ else if (accept_punct(p, '/')) {
+ i64 r = cexpr_unary(p, loc);
+ if (r == 0) compiler_panic(p->c, loc, "division by zero in constant");
+ v = v / r;
+ } else if (accept_punct(p, '%')) {
+ i64 r = cexpr_unary(p, loc);
+ if (r == 0) compiler_panic(p->c, loc, "modulo by zero in constant");
+ v = v % r;
+ } else break;
+ }
+ return v;
+}
+static i64 cexpr_add(Parser* p, SrcLoc loc) {
+ i64 v = cexpr_mul(p, loc);
+ for (;;) {
+ if (accept_punct(p, '+')) v = v + cexpr_mul(p, loc);
+ else if (accept_punct(p, '-')) v = v - cexpr_mul(p, loc);
+ else break;
+ }
+ return v;
+}
+static i64 cexpr_shift(Parser* p, SrcLoc loc) {
+ i64 v = cexpr_add(p, loc);
+ for (;;) {
+ if (accept_punct(p, P_SHL)) v = v << cexpr_add(p, loc);
+ else if (accept_punct(p, P_SHR)) v = v >> cexpr_add(p, loc);
+ else break;
+ }
+ return v;
+}
+static i64 cexpr_rel(Parser* p, SrcLoc loc) {
+ i64 v = cexpr_shift(p, loc);
+ for (;;) {
+ if (accept_punct(p, P_LE)) v = v <= cexpr_shift(p, loc);
+ else if (accept_punct(p, P_GE)) v = v >= cexpr_shift(p, loc);
+ else if (is_punct(&p->cur, '<')) {
+ advance(p); v = v < cexpr_shift(p, loc);
+ } else if (is_punct(&p->cur, '>')) {
+ advance(p); v = v > cexpr_shift(p, loc);
+ } else break;
+ }
+ return v;
+}
+static i64 cexpr_eq(Parser* p, SrcLoc loc) {
+ i64 v = cexpr_rel(p, loc);
+ for (;;) {
+ if (accept_punct(p, P_EQ)) v = (v == cexpr_rel(p, loc));
+ else if (accept_punct(p, P_NE)) v = (v != cexpr_rel(p, loc));
+ else break;
+ }
+ return v;
+}
+static i64 cexpr_band(Parser* p, SrcLoc loc) {
+ i64 v = cexpr_eq(p, loc);
+ while (is_punct(&p->cur, '&') && !is_punct(&p->cur, P_AND)) {
+ advance(p);
+ v = v & cexpr_eq(p, loc);
+ }
+ return v;
+}
+static i64 cexpr_bxor(Parser* p, SrcLoc loc) {
+ i64 v = cexpr_band(p, loc);
+ while (accept_punct(p, '^')) v = v ^ cexpr_band(p, loc);
+ return v;
+}
+static i64 cexpr_bor(Parser* p, SrcLoc loc) {
+ i64 v = cexpr_bxor(p, loc);
+ while (is_punct(&p->cur, '|') && !is_punct(&p->cur, P_OR)) {
+ advance(p);
+ v = v | cexpr_bxor(p, loc);
+ }
+ return v;
+}
+
+static i64 cexpr_unary(Parser* p, SrcLoc loc) {
+ if (accept_punct(p, '+')) return cexpr_unary(p, loc);
+ if (accept_punct(p, '-')) return -cexpr_unary(p, loc);
+ if (accept_punct(p, '~')) return ~cexpr_unary(p, loc);
+ if (accept_punct(p, '!')) return cexpr_unary(p, loc) ? 0 : 1;
+ if (accept_kw(p, KW_SIZEOF)) {
+ if (is_punct(&p->cur, '(')) {
+ Tok n = peek1(p);
+ if (starts_type_name(p, &n)) {
+ advance(p);
+ {
+ const Type* t = parse_type_name(p);
+ expect_punct(p, ')', "')' after sizeof type-name");
+ return (i64)abi_sizeof(p->abi, t);
+ }
+ }
+ }
+ parse_unary(p);
+ {
+ const Type* ty = cg_top_type(p->cg);
+ i64 sz = (i64)abi_sizeof(p->abi, ty);
+ cg_drop(p->cg);
+ return sz;
+ }
+ }
+ if (accept_kw(p, KW_ALIGNOF)) {
+ if (is_punct(&p->cur, '(')) {
+ Tok n = peek1(p);
+ if (starts_type_name(p, &n)) {
+ advance(p);
+ {
+ const Type* t = parse_type_name(p);
+ expect_punct(p, ')', "')' after _Alignof type-name");
+ return (i64)abi_alignof(p->abi, t);
+ }
+ }
+ }
+ parse_unary(p);
+ {
+ const Type* ty = cg_top_type(p->cg);
+ i64 al = (i64)abi_alignof(p->abi, ty);
+ cg_drop(p->cg);
+ return al;
+ }
+ }
+ if (accept_punct(p, '(')) {
+ if (starts_type_name(p, &p->cur)) {
+ const Type* t = parse_type_name(p);
+ expect_punct(p, ')', "')' after cast type-name");
+ {
+ i64 v = cexpr_unary(p, loc);
+ u32 sz = abi_sizeof(p->abi, t);
+ int is_signed = abi_type_info(p->abi, t).signed_;
+ if (sz < 8) {
+ u64 mask = (1ull << (sz * 8u)) - 1ull;
+ u64 uv = (u64)v & mask;
+ if (is_signed) {
+ u64 sign = 1ull << (sz * 8u - 1u);
+ v = (i64)((uv ^ sign) - sign);
+ } else {
+ v = (i64)uv;
+ }
+ }
+ return v;
+ }
+ }
+ {
+ i64 v = cexpr_bor(p, loc);
+ expect_punct(p, ')', "')' in constant expression");
+ return v;
+ }
+ }
+ if (p->cur.kind == TOK_NUM) {
+ i64 v = parse_int_literal(p, &p->cur);
+ advance(p);
+ return v;
+ }
+ if (p->cur.kind == TOK_CHR) {
+ i64 v = decode_char_literal(p, &p->cur);
+ advance(p);
+ return v;
+ }
+ if (p->cur.kind == TOK_IDENT) {
+ SymEntry* e = scope_lookup(p, p->cur.v.ident);
+ if (e && e->kind == SEK_ENUM_CST) {
+ advance(p);
+ return e->v.enum_value;
+ }
+ compiler_panic(p->c, loc, "non-constant identifier in constant expression");
+ }
+ compiler_panic(p->c, loc, "expected constant expression");
+}
+
+i64 eval_const_int(Parser* p, SrcLoc loc) { return cexpr_bor(p, loc); }
+
+/* ============================================================
+ * to_rvalue
+ * ============================================================ */
+
+void to_rvalue(Parser* p) {
+ const Type* t = cg_top_type(p->cg);
+ if (t) {
+ if (t->kind == TY_ARRAY) {
+ cg_addr(p->cg);
+ cg_retag_top(p->cg, type_ptr(p->pool, t->arr.elem));
+ return;
+ }
+ if (t->kind == TY_FUNC) {
+ cg_addr(p->cg);
+ return;
+ }
+ if (t->kind == TY_STRUCT || t->kind == TY_UNION) return;
+ }
+ cg_load(p->cg);
+}
+
+/* ============================================================
+ * coerce_top_to_lvalue (used by assignment / initializers)
+ * ============================================================ */
+
+void coerce_top_to_lvalue(Parser* p) {
+ const Type* src = cg_top_type(p->cg);
+ const Type* dst = cg_top2_type(p->cg);
+ if (!src || !dst || src == dst) return;
+ if (type_is_arith(src) && type_is_arith(dst)) {
+ cg_convert(p->cg, dst);
+ }
+}
+
+/* ============================================================
+ * Builtin call handling
+ * ============================================================ */
+
+static const Type* offsetof_designator(Parser* p, const Type* base, u32* off) {
+ const Type* cur = base;
+ if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) {
+ perr(p, "expected member name in __builtin_offsetof");
+ }
+ for (;;) {
+ if (cur->kind == TY_STRUCT || cur->kind == TY_UNION) {
+ Sym mname = p->cur.v.ident;
+ const Type* mty = NULL;
+ u32 moff = 0;
+ const Field* mf = NULL;
+ /* find_field is static in parse_type.c; we need it here.
+ * We call abi_record_layout directly inline. */
+ const ABIRecordLayout* L = abi_record_layout(p->abi, cur);
+ if (!L) perr(p, "no such member in __builtin_offsetof");
+ int found = 0;
+ for (u16 i = 0; i < cur->rec.nfields; ++i) {
+ const Field* f = &cur->rec.fields[i];
+ if (f->name == mname && mname != 0) {
+ mty = f->type;
+ moff = L->fields[i].offset;
+ mf = f;
+ found = 1;
+ break;
+ }
+ }
+ (void)mf;
+ if (!found) perr(p, "no such member in __builtin_offsetof");
+ advance(p);
+ *off += moff;
+ cur = mty;
+ } else if (cur->kind == TY_ARRAY) {
+ /* fall through to bracket branch */
+ } else {
+ perr(p, "__builtin_offsetof step into non-aggregate");
+ }
+ if (is_punct(&p->cur, '.')) {
+ advance(p);
+ if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) {
+ perr(p, "expected member name after '.'");
+ }
+ continue;
+ }
+ if (is_punct(&p->cur, '[')) {
+ advance(p);
+ i64 idx = eval_const_int(p, p->cur.loc);
+ expect_punct(p, ']', "']' in __builtin_offsetof");
+ if (cur->kind != TY_ARRAY) {
+ perr(p, "__builtin_offsetof '[' on non-array");
+ }
+ *off += (u32)((i64)abi_sizeof(p->abi, cur->arr.elem) * idx);
+ cur = cur->arr.elem;
+ continue;
+ }
+ break;
+ }
+ return cur;
+}
+
+static int try_parse_builtin_call(Parser* p) {
+ Sym name = p->cur.v.ident;
+ SrcLoc loc = p->cur.loc;
+
+ if (name != p->sym_b_alloca && name != p->sym_b_ctz &&
+ name != p->sym_b_expect &&
+ name != p->sym_b_offsetof && name != p->sym_b_va_start &&
+ name != p->sym_b_va_arg && name != p->sym_b_va_end &&
+ name != p->sym_b_va_copy && name != p->sym_a_load_n &&
+ name != p->sym_a_store_n && name != p->sym_a_exchange_n &&
+ name != p->sym_a_fetch_add && name != p->sym_a_fetch_sub &&
+ name != p->sym_a_fetch_and && name != p->sym_a_fetch_or &&
+ name != p->sym_a_fetch_xor && name != p->sym_a_cas_n &&
+ name != p->sym_a_thread_fence && name != p->sym_a_signal_fence) {
+ return 0;
+ }
+ advance(p); /* IDENT */
+ expect_punct(p, '(', "'(' after builtin");
+
+ if (name == p->sym_b_offsetof) {
+ const Type* root = parse_type_name(p);
+ expect_punct(p, ',', "',' in __builtin_offsetof");
+ u32 off = 0;
+ (void)offsetof_designator(p, root, &off);
+ expect_punct(p, ')', "')' after __builtin_offsetof");
+ cg_push_int(p->cg, (i64)off, ty_size_t(p));
+ return 1;
+ }
+
+ if (name == p->sym_b_expect) {
+ parse_assign_expr(p);
+ to_rvalue(p);
+ expect_punct(p, ',', "',' in __builtin_expect");
+ parse_assign_expr(p);
+ cg_drop(p->cg);
+ expect_punct(p, ')', "')' after __builtin_expect");
+ return 1;
+ }
+
+ if (name == p->sym_b_alloca) {
+ parse_assign_expr(p);
+ to_rvalue(p);
+ expect_punct(p, ')', "')' after __builtin_alloca");
+ cg_set_loc(p->cg, loc);
+ cg_alloca(p->cg);
+ return 1;
+ }
+
+ if (name == p->sym_b_ctz) {
+ parse_assign_expr(p);
+ to_rvalue(p);
+ expect_punct(p, ')', "')' after __builtin_ctz");
+ cg_set_loc(p->cg, loc);
+ cg_intrinsic_unary_to_int(p->cg, INTRIN_CTZ);
+ return 1;
+ }
+
+ if (name == p->sym_b_va_start) {
+ parse_assign_expr(p);
+ cg_addr(p->cg);
+ expect_punct(p, ',', "',' in __builtin_va_start");
+ parse_assign_expr(p);
+ cg_drop(p->cg);
+ expect_punct(p, ')', "')' after __builtin_va_start");
+ cg_set_loc(p->cg, loc);
+ cg_va_start_(p->cg);
+ cg_push_int(p->cg, 0, ty_int(p));
+ return 1;
+ }
+
+ if (name == p->sym_b_va_end) {
+ parse_assign_expr(p);
+ cg_addr(p->cg);
+ expect_punct(p, ')', "')' after __builtin_va_end");
+ cg_set_loc(p->cg, loc);
+ cg_va_end_(p->cg);
+ cg_push_int(p->cg, 0, ty_int(p));
+ return 1;
+ }
+
+ if (name == p->sym_b_va_copy) {
+ parse_assign_expr(p);
+ cg_addr(p->cg);
+ expect_punct(p, ',', "',' in __builtin_va_copy");
+ parse_assign_expr(p);
+ cg_addr(p->cg);
+ expect_punct(p, ')', "')' after __builtin_va_copy");
+ cg_set_loc(p->cg, loc);
+ cg_va_copy_(p->cg);
+ cg_push_int(p->cg, 0, ty_int(p));
+ return 1;
+ }
+
+ if (name == p->sym_b_va_arg) {
+ parse_assign_expr(p);
+ cg_addr(p->cg);
+ expect_punct(p, ',', "',' in __builtin_va_arg");
+ const Type* ty = parse_type_name(p);
+ expect_punct(p, ')', "')' after __builtin_va_arg");
+ cg_set_loc(p->cg, loc);
+ cg_va_arg_(p->cg, ty);
+ return 1;
+ }
+
+ if (name == p->sym_a_load_n) {
+ parse_assign_expr(p);
+ to_rvalue(p);
+ expect_punct(p, ',', "',' in __atomic_load_n");
+ i64 ord = eval_const_int(p, p->cur.loc);
+ expect_punct(p, ')', "')' after __atomic_load_n");
+ cg_set_loc(p->cg, loc);
+ cg_atomic_load(p->cg, (MemOrder)ord);
+ return 1;
+ }
+
+ if (name == p->sym_a_store_n) {
+ parse_assign_expr(p);
+ to_rvalue(p);
+ expect_punct(p, ',', "',' in __atomic_store_n");
+ parse_assign_expr(p);
+ to_rvalue(p);
+ expect_punct(p, ',', "',' in __atomic_store_n");
+ i64 ord = eval_const_int(p, p->cur.loc);
+ expect_punct(p, ')', "')' after __atomic_store_n");
+ cg_set_loc(p->cg, loc);
+ cg_atomic_store(p->cg, (MemOrder)ord);
+ cg_push_int(p->cg, 0, ty_int(p));
+ return 1;
+ }
+
+ if (name == p->sym_a_thread_fence || name == p->sym_a_signal_fence) {
+ i64 ord = eval_const_int(p, p->cur.loc);
+ expect_punct(p, ')', "')' after atomic fence");
+ cg_set_loc(p->cg, loc);
+ cg_fence(p->cg, (MemOrder)ord);
+ cg_push_int(p->cg, 0, ty_int(p));
+ return 1;
+ }
+
+ if (name == p->sym_a_cas_n) {
+ parse_assign_expr(p); to_rvalue(p); /* ptr */
+ expect_punct(p, ',', "',' in __atomic_compare_exchange_n");
+
+ parse_assign_expr(p); to_rvalue(p); /* &expected */
+ const Type* eptr_ty = cg_top_type(p->cg);
+ if (!eptr_ty || eptr_ty->kind != TY_PTR) {
+ perr(p, "__atomic_compare_exchange_n: arg 2 must be a pointer");
+ }
+ const Type* val_ty = eptr_ty->ptr.pointee;
+
+ FrameSlotDesc fsd; memset(&fsd, 0, sizeof fsd);
+ fsd.type = eptr_ty; fsd.size = 8; fsd.align = 8; fsd.kind = FS_LOCAL;
+ FrameSlot eslot = cg_local(p->cg, &fsd);
+ cg_push_local_typed(p->cg, eslot, eptr_ty);
+ cg_swap(p->cg);
+ cg_store(p->cg); cg_drop(p->cg);
+
+ cg_push_local_typed(p->cg, eslot, eptr_ty);
+ cg_load(p->cg);
+ cg_deref(p->cg, val_ty);
+ cg_load(p->cg);
+
+ expect_punct(p, ',', "',' in __atomic_compare_exchange_n");
+ parse_assign_expr(p); to_rvalue(p); /* desired */
+ expect_punct(p, ',', "',' in __atomic_compare_exchange_n");
+
+ (void)eval_const_int(p, p->cur.loc); /* weak */
+ expect_punct(p, ',', "',' in __atomic_compare_exchange_n");
+ i64 succ = eval_const_int(p, p->cur.loc);
+ expect_punct(p, ',', "',' in __atomic_compare_exchange_n");
+ i64 fail = eval_const_int(p, p->cur.loc);
+ expect_punct(p, ')', "')' after __atomic_compare_exchange_n");
+
+ cg_set_loc(p->cg, loc);
+ cg_atomic_cas(p->cg, (MemOrder)succ, (MemOrder)fail);
+
+ const Type* ok_ty = cg_top_type(p->cg);
+ FrameSlotDesc okd; memset(&okd, 0, sizeof okd);
+ okd.type = ok_ty; okd.size = 4; okd.align = 4; okd.kind = FS_LOCAL;
+ FrameSlot okslot = cg_local(p->cg, &okd);
+ cg_push_local_typed(p->cg, okslot, ok_ty);
+ cg_swap(p->cg); cg_store(p->cg); cg_drop(p->cg);
+
+ FrameSlotDesc pd; memset(&pd, 0, sizeof pd);
+ pd.type = val_ty;
+ pd.size = abi_sizeof(p->abi, val_ty);
+ pd.align = abi_alignof(p->abi, val_ty);
+ pd.kind = FS_LOCAL;
+ FrameSlot pslot = cg_local(p->cg, &pd);
+ cg_push_local_typed(p->cg, pslot, val_ty);
+ cg_swap(p->cg); cg_store(p->cg); cg_drop(p->cg);
+
+ cg_push_local_typed(p->cg, okslot, ok_ty);
+ cg_load(p->cg);
+ CGLabel L_done = cg_label_new(p->cg);
+ cg_branch_true(p->cg, L_done);
+ cg_push_local_typed(p->cg, eslot, eptr_ty);
+ cg_load(p->cg);
+ cg_deref(p->cg, val_ty);
+ cg_push_local_typed(p->cg, pslot, val_ty);
+ cg_load(p->cg);
+ cg_store(p->cg); cg_drop(p->cg);
+ cg_label_place(p->cg, L_done);
+
+ cg_push_local_typed(p->cg, okslot, ok_ty);
+ cg_load(p->cg);
+ return 1;
+ }
+
+ AtomicOp op;
+ if (name == p->sym_a_exchange_n) op = AO_XCHG;
+ else if (name == p->sym_a_fetch_add) op = AO_ADD;
+ else if (name == p->sym_a_fetch_sub) op = AO_SUB;
+ else if (name == p->sym_a_fetch_and) op = AO_AND;
+ else if (name == p->sym_a_fetch_or) op = AO_OR;
+ else if (name == p->sym_a_fetch_xor) op = AO_XOR;
+ else { perr(p, "internal: unhandled builtin"); }
+
+ parse_assign_expr(p);
+ to_rvalue(p);
+ expect_punct(p, ',', "',' in atomic builtin");
+ parse_assign_expr(p);
+ to_rvalue(p);
+ expect_punct(p, ',', "',' in atomic builtin");
+ i64 ord = eval_const_int(p, p->cur.loc);
+ expect_punct(p, ')', "')' after atomic builtin");
+ cg_set_loc(p->cg, loc);
+ cg_atomic_rmw(p->cg, op, (MemOrder)ord);
+ return 1;
+}
+
+/* ============================================================
+ * parse_primary, parse_postfix, parse_unary
+ * ============================================================ */
+
+static void parse_primary(Parser* p) {
+ Tok t = p->cur;
+ if (t.kind == TOK_NUM) {
+ i64 v = parse_int_literal(p, &t);
+ const Type* lty = int_literal_type(p, &t);
+ advance(p);
+ cg_push_int(p->cg, v, lty);
+ return;
+ }
+ if (t.kind == TOK_FLT) {
+ double v = parse_float_literal(p, &t);
+ const Type* lty = float_literal_type(p, &t);
+ advance(p);
+ cg_push_float(p->cg, v, lty);
+ return;
+ }
+ if (is_punct(&t, '(')) {
+ advance(p);
+ parse_expr(p);
+ expect_punct(p, ')', "')'");
+ return;
+ }
+ if (t.kind == TOK_IDENT) {
+ SymEntry* e;
+ if (ident_kw(p, t.v.ident) != KW_NONE) {
+ perr(p, "unexpected keyword in expression");
+ }
+ {
+ Tok n = peek1(p);
+ if (is_punct(&n, '(') && try_parse_builtin_call(p)) return;
+ }
+ e = scope_lookup(p, t.v.ident);
+ if (!e) {
+ size_t nlen = 0;
+ const char* nm = pool_str(p->pool, t.v.ident, &nlen);
+ compiler_panic(p->c, t.loc, "undeclared identifier '%.*s'", (int)nlen,
+ nm ? nm : "?");
+ }
+ advance(p);
+ switch (e->kind) {
+ case SEK_LOCAL:
+ cg_push_local_typed(p->cg, e->v.slot, e->type);
+ if (e->vla_byte_slot != FRAME_SLOT_NONE) {
+ p->last_pushed_vla_slot = e->vla_byte_slot;
+ }
+ return;
+ case SEK_GLOBAL:
+ case SEK_FUNC:
+ cg_push_global(p->cg, e->v.sym, e->type);
+ return;
+ case SEK_ENUM_CST:
+ cg_push_int(p->cg, e->v.enum_value, e->type);
+ return;
+ case SEK_TYPEDEF:
+ default:
+ perr(p, "identifier is not a value");
+ }
+ }
+ if (t.kind == TOK_CHR) {
+ i64 v = decode_char_literal(p, &t);
+ advance(p);
+ cg_push_int(p->cg, v, ty_int(p));
+ return;
+ }
+ if (t.kind == TOK_STR) {
+ size_t n = 0;
+ u8* bytes = decode_string_literal(p, &t, &n);
+ ObjSymId sym = emit_string_to_rodata(p, bytes, n);
+ p->c->env->heap->free(p->c->env->heap, bytes, 0);
+ advance(p);
+ {
+ const Type* char_ty = type_prim(p->pool, TY_CHAR);
+ const Type* arr_ty = type_array(p->pool, char_ty, (u32)n, 0);
+ cg_push_global(p->cg, sym, arr_ty);
+ }
+ return;
+ }
+ perr(p, "expected expression");
+}
+
+static void parse_postfix(Parser* p) {
+ parse_primary(p);
+ for (;;) {
+ Tok t = p->cur;
+ if (is_punct(&t, P_INC)) {
+ advance(p);
+ cg_inc_dec(p->cg, BO_IADD, /*post=*/1);
+ continue;
+ }
+ if (is_punct(&t, P_DEC)) {
+ advance(p);
+ cg_inc_dec(p->cg, BO_ISUB, /*post=*/1);
+ continue;
+ }
+ if (is_punct(&t, '(')) {
+ const Type* top = cg_top_type(p->cg);
+ const Type* fn_type;
+ if (top && top->kind == TY_FUNC) {
+ fn_type = top;
+ } else if (top && top->kind == TY_PTR && top->ptr.pointee &&
+ top->ptr.pointee->kind == TY_FUNC) {
+ fn_type = top->ptr.pointee;
+ cg_load(p->cg);
+ } else {
+ perr(p, "called object is not a function");
+ }
+ advance(p); /* '(' */
+ u32 nargs = 0;
+ if (!is_punct(&p->cur, ')')) {
+ for (;;) {
+ parse_assign_expr(p);
+ to_rvalue(p);
+ ++nargs;
+ if (!accept_punct(p, ',')) break;
+ }
+ }
+ expect_punct(p, ')', "')' after argument list");
+ if (fn_type->fn.nparams != nargs && !fn_type->fn.variadic) {
+ perr(p, "wrong number of arguments");
+ }
+ if (fn_type->fn.variadic && nargs < fn_type->fn.nparams) {
+ perr(p, "too few arguments to variadic function");
+ }
+ cg_call(p->cg, nargs, fn_type);
+ if (fn_type->fn.ret && fn_type->fn.ret->kind == TY_VOID) {
+ cg_push_int(p->cg, 0, ty_int(p));
+ }
+ continue;
+ }
+ if (is_punct(&t, '[')) {
+ const Type* lt0 = cg_top_type(p->cg);
+ advance(p); /* '[' */
+ if (lt0 && lt0->kind == TY_ARRAY) {
+ cg_addr(p->cg);
+ cg_retag_top(p->cg, type_ptr(p->pool, lt0->arr.elem));
+ } else if (lt0 && lt0->kind == TY_PTR) {
+ cg_load(p->cg);
+ }
+ parse_expr(p);
+ {
+ const Type* it0 = cg_top_type(p->cg);
+ if (it0 && it0->kind == TY_ARRAY) {
+ cg_addr(p->cg);
+ cg_retag_top(p->cg, type_ptr(p->pool, it0->arr.elem));
+ } else {
+ to_rvalue(p);
+ }
+ }
+ expect_punct(p, ']', "']' after subscript");
+ {
+ const Type* lt = cg_top2_type(p->cg);
+ const Type* it = cg_top_type(p->cg);
+ const Type* elem;
+ if (lt && lt->kind == TY_PTR && type_is_int(it)) {
+ elem = lt->ptr.pointee;
+ } else if (it && it->kind == TY_PTR && type_is_int(lt)) {
+ cg_swap(p->cg);
+ elem = it->ptr.pointee;
+ } else {
+ perr(p, "invalid subscript: needs one pointer and one integer");
+ }
+ if (!elem) perr(p, "subscript on incomplete pointee");
+ u32 esz = abi_sizeof(p->abi, elem);
+ if (esz != 1) {
+ cg_push_int(p->cg, (i64)esz, ty_size_t(p));
+ cg_binop(p->cg, BO_IMUL);
+ }
+ cg_binop(p->cg, BO_IADD);
+ cg_deref(p->cg, elem);
+ }
+ continue;
+ }
+ if (is_punct(&t, '.')) {
+ const Type* lt = cg_top_type(p->cg);
+ Sym mname;
+ const Type* mty = NULL;
+ u32 moff = 0;
+ const Field* mf = NULL;
+ advance(p); /* '.' */
+ if (!lt || (lt->kind != TY_STRUCT && lt->kind != TY_UNION)) {
+ perr(p, "request for member in something that is not a struct or union");
+ }
+ if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) {
+ perr(p, "expected member name after '.'");
+ }
+ mname = p->cur.v.ident;
+ advance(p);
+ {
+ const ABIRecordLayout* L = abi_record_layout(p->abi, lt);
+ if (!L) perr(p, "no such member");
+ int found = 0;
+ for (u16 i = 0; i < lt->rec.nfields; ++i) {
+ const Field* f = <->rec.fields[i];
+ if (f->name == mname && mname != 0) {
+ mty = f->type;
+ moff = L->fields[i].offset;
+ mf = f;
+ found = 1;
+ break;
+ }
+ /* anonymous member flattening */
+ if ((f->flags & FIELD_ANON) && (f->type->kind == TY_STRUCT ||
+ f->type->kind == TY_UNION)) {
+ const Type* inner_ty = NULL;
+ u32 inner_off = 0;
+ const Field* inner_f = NULL;
+ const ABIRecordLayout* IL = abi_record_layout(p->abi, f->type);
+ if (IL) {
+ for (u16 j = 0; j < f->type->rec.nfields; ++j) {
+ const Field* ff = &f->type->rec.fields[j];
+ if (ff->name == mname && mname != 0) {
+ inner_ty = ff->type;
+ inner_off = IL->fields[j].offset;
+ inner_f = ff;
+ break;
+ }
+ }
+ }
+ if (inner_ty) {
+ mty = inner_ty;
+ moff = L->fields[i].offset + inner_off;
+ mf = inner_f;
+ found = 1;
+ break;
+ }
+ }
+ }
+ if (!found) perr(p, "no such member");
+ }
+ (void)mf;
+ cg_addr(p->cg);
+ cg_retag_top(p->cg, type_ptr(p->pool, mty));
+ if (moff > 0) {
+ cg_push_int(p->cg, (i64)moff, ty_size_t(p));
+ cg_binop(p->cg, BO_IADD);
+ }
+ cg_deref(p->cg, mty);
+ continue;
+ }
+ if (is_punct(&t, P_ARROW)) {
+ const Type* lt0;
+ const Type* rec_ty;
+ Sym mname;
+ const Type* mty = NULL;
+ u32 moff = 0;
+ const Field* mf = NULL;
+ advance(p); /* `->` */
+ to_rvalue(p);
+ lt0 = cg_top_type(p->cg);
+ if (!lt0 || lt0->kind != TY_PTR) {
+ perr(p, "'->' requires a pointer operand");
+ }
+ rec_ty = lt0->ptr.pointee;
+ if (!rec_ty || (rec_ty->kind != TY_STRUCT && rec_ty->kind != TY_UNION)) {
+ perr(p, "'->' on pointer to non-struct/union");
+ }
+ if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) {
+ perr(p, "expected member name after '->'");
+ }
+ mname = p->cur.v.ident;
+ advance(p);
+ {
+ const ABIRecordLayout* L = abi_record_layout(p->abi, rec_ty);
+ if (!L) perr(p, "no such member");
+ int found = 0;
+ for (u16 i = 0; i < rec_ty->rec.nfields; ++i) {
+ const Field* f = &rec_ty->rec.fields[i];
+ if (f->name == mname && mname != 0) {
+ mty = f->type;
+ moff = L->fields[i].offset;
+ mf = f;
+ found = 1;
+ break;
+ }
+ if ((f->flags & FIELD_ANON) && (f->type->kind == TY_STRUCT ||
+ f->type->kind == TY_UNION)) {
+ const ABIRecordLayout* IL = abi_record_layout(p->abi, f->type);
+ if (IL) {
+ for (u16 j = 0; j < f->type->rec.nfields; ++j) {
+ const Field* ff = &f->type->rec.fields[j];
+ if (ff->name == mname && mname != 0) {
+ mty = ff->type;
+ moff = L->fields[i].offset + IL->fields[j].offset;
+ mf = ff;
+ found = 1;
+ break;
+ }
+ }
+ }
+ if (found) break;
+ }
+ }
+ if (!found) perr(p, "no such member");
+ }
+ (void)mf;
+ if (moff > 0) {
+ cg_push_int(p->cg, (i64)moff, ty_size_t(p));
+ cg_binop(p->cg, BO_IADD);
+ }
+ cg_deref(p->cg, mty);
+ continue;
+ }
+ break;
+ }
+}
+
+void parse_unary(Parser* p) {
+ Tok t = p->cur;
+ if (is_punct(&t, '(')) {
+ Tok n = peek1(p);
+ if (starts_type_name(p, &n)) {
+ const Type* dst;
+ const Type* src;
+ advance(p); /* '(' */
+ dst = parse_type_name(p);
+ expect_punct(p, ')', "')' after type-name");
+ if (is_punct(&p->cur, '{')) {
+ FrameSlotDesc fsd;
+ FrameSlot slot;
+ const Type* lit_ty = dst;
+ if (lit_ty && lit_ty->kind == TY_ARRAY && lit_ty->arr.incomplete) {
+ lit_ty = complete_incomplete_array(p, lit_ty);
+ }
+ memset(&fsd, 0, sizeof fsd);
+ fsd.type = lit_ty;
+ fsd.size = abi_sizeof(p->abi, lit_ty);
+ fsd.align = abi_alignof(p->abi, lit_ty);
+ fsd.kind = FS_LOCAL;
+ fsd.flags = FSF_NONE;
+ slot = cg_local(p->cg, &fsd);
+ if (lit_ty && (lit_ty->kind == TY_ARRAY || lit_ty->kind == TY_STRUCT ||
+ lit_ty->kind == TY_UNION)) {
+ init_at(p, slot, lit_ty, 0, lit_ty);
+ } else {
+ init_at(p, slot, lit_ty, 0, lit_ty);
+ }
+ cg_push_local_typed(p->cg, slot, lit_ty);
+ return;
+ }
+ parse_unary(p);
+ to_rvalue(p);
+ if (dst && dst->kind == TY_VOID) {
+ cg_drop(p->cg);
+ cg_push_int(p->cg, 0, ty_int(p));
+ return;
+ }
+ src = cg_top_type(p->cg);
+ if (src && src->kind == TY_PTR && dst->kind == TY_PTR) {
+ cg_retag_top(p->cg, dst);
+ return;
+ }
+ cg_convert(p->cg, dst);
+ return;
+ }
+ }
+ if (is_punct(&t, '+')) {
+ advance(p);
+ parse_unary(p);
+ to_rvalue(p);
+ return;
+ }
+ if (is_punct(&t, '-')) {
+ advance(p);
+ parse_unary(p);
+ to_rvalue(p);
+ cg_unop(p->cg, UO_NEG);
+ return;
+ }
+ if (is_punct(&t, '!')) {
+ advance(p);
+ parse_unary(p);
+ to_rvalue(p);
+ cg_push_int(p->cg, 0, ty_int(p));
+ cg_cmp(p->cg, CMP_EQ);
+ return;
+ }
+ if (is_punct(&t, '~')) {
+ advance(p);
+ parse_unary(p);
+ to_rvalue(p);
+ cg_unop(p->cg, UO_BNOT);
+ return;
+ }
+ if (is_punct(&t, '&')) {
+ advance(p);
+ parse_unary(p);
+ cg_addr(p->cg);
+ return;
+ }
+ if (is_punct(&t, '*')) {
+ const Type* pty;
+ const Type* pointee;
+ advance(p);
+ parse_unary(p);
+ to_rvalue(p);
+ pty = cg_top_type(p->cg);
+ if (!pty || pty->kind != TY_PTR) {
+ perr(p, "indirection requires pointer operand");
+ }
+ pointee = pty->ptr.pointee;
+ if (pointee && pointee->kind == TY_VOID) {
+ perr(p, "dereferencing pointer to incomplete type");
+ }
+ cg_deref(p->cg, pointee);
+ return;
+ }
+ if (is_punct(&t, P_INC) || is_punct(&t, P_DEC)) {
+ BinOp bop = is_punct(&t, P_INC) ? BO_IADD : BO_ISUB;
+ advance(p);
+ parse_unary(p);
+ cg_inc_dec(p->cg, bop, /*post=*/0);
+ return;
+ }
+ if (is_kw(p, &t, KW_SIZEOF)) {
+ const Type* ty = NULL;
+ FrameSlot vla_slot = FRAME_SLOT_NONE;
+ advance(p);
+ if (is_punct(&p->cur, '(')) {
+ Tok n = peek1(p);
+ if (starts_type_name(p, &n)) {
+ advance(p);
+ ty = parse_type_name(p);
+ expect_punct(p, ')', "')'");
+ } else {
+ p->last_pushed_vla_slot = FRAME_SLOT_NONE;
+ parse_unary(p);
+ ty = cg_top_type(p->cg);
+ vla_slot = p->last_pushed_vla_slot;
+ cg_drop(p->cg);
+ }
+ } else {
+ p->last_pushed_vla_slot = FRAME_SLOT_NONE;
+ parse_unary(p);
+ ty = cg_top_type(p->cg);
+ vla_slot = p->last_pushed_vla_slot;
+ cg_drop(p->cg);
+ }
+ if (vla_slot != FRAME_SLOT_NONE) {
+ cg_push_local_typed(p->cg, vla_slot, ty_size_t(p));
+ cg_load(p->cg);
+ } else {
+ cg_push_int(p->cg, (i64)abi_sizeof(p->abi, ty), ty_size_t(p));
+ }
+ return;
+ }
+ if (is_kw(p, &t, KW_GENERIC)) {
+ advance(p);
+ expect_punct(p, '(', "'('");
+ parse_assign_expr(p);
+ to_rvalue(p);
+ const Type* ctl_ty = cg_top_type(p->cg);
+ cg_drop(p->cg);
+ expect_punct(p, ',', "','");
+ int emitted = 0;
+ Tok* default_buf = NULL;
+ u32 default_len = 0;
+ for (;;) {
+ const Type* assoc_ty = NULL;
+ int is_default = 0;
+ if (is_kw(p, &p->cur, KW_DEFAULT)) {
+ advance(p);
+ is_default = 1;
+ } else {
+ assoc_ty = parse_type_name(p);
+ }
+ expect_punct(p, ':', "':' in _Generic association");
+ int take = 0;
+ if (!emitted && !is_default && ctl_ty && assoc_ty &&
+ ctl_ty->kind == assoc_ty->kind) {
+ take = 1;
+ }
+ if (take) {
+ parse_assign_expr(p);
+ emitted = 1;
+ } else if (is_default && !default_buf) {
+ u32 cap = 16;
+ Tok* buf = arena_array(p->c->tu, Tok, cap);
+ u32 len = 0;
+ int paren_depth = 0, brack_depth = 0, brace_depth = 0;
+ while (p->cur.kind != TOK_EOF) {
+ if (paren_depth == 0 && brack_depth == 0 && brace_depth == 0) {
+ if (is_punct(&p->cur, ',') || is_punct(&p->cur, ')')) break;
+ }
+ if (len == cap) {
+ u32 new_cap = cap * 2;
+ Tok* nv = arena_array(p->c->tu, Tok, new_cap);
+ if (!nv) perr(p, "out of memory recording _Generic default");
+ memcpy(nv, buf, len * sizeof(Tok));
+ buf = nv;
+ cap = new_cap;
+ }
+ buf[len++] = p->cur;
+ if (is_punct(&p->cur, '(')) ++paren_depth;
+ else if (is_punct(&p->cur, ')')) --paren_depth;
+ else if (is_punct(&p->cur, '[')) ++brack_depth;
+ else if (is_punct(&p->cur, ']')) --brack_depth;
+ else if (is_punct(&p->cur, '{')) ++brace_depth;
+ else if (is_punct(&p->cur, '}')) --brace_depth;
+ advance(p);
+ }
+ if (len == cap) {
+ u32 new_cap = cap + 1;
+ Tok* nv = arena_array(p->c->tu, Tok, new_cap);
+ if (!nv) perr(p, "out of memory recording _Generic default");
+ memcpy(nv, buf, len * sizeof(Tok));
+ buf = nv;
+ cap = new_cap;
+ }
+ memset(&buf[len], 0, sizeof(Tok));
+ buf[len].kind = TOK_PUNCT;
+ buf[len].v.punct = ',';
+ ++len;
+ default_buf = buf;
+ default_len = len;
+ } else {
+ int paren_depth = 0;
+ int brack_depth = 0;
+ int brace_depth = 0;
+ while (p->cur.kind != TOK_EOF) {
+ if (paren_depth == 0 && brack_depth == 0 && brace_depth == 0) {
+ if (is_punct(&p->cur, ',') || is_punct(&p->cur, ')')) break;
+ }
+ if (is_punct(&p->cur, '(')) ++paren_depth;
+ else if (is_punct(&p->cur, ')')) --paren_depth;
+ else if (is_punct(&p->cur, '[')) ++brack_depth;
+ else if (is_punct(&p->cur, ']')) --brack_depth;
+ else if (is_punct(&p->cur, '{')) ++brace_depth;
+ else if (is_punct(&p->cur, '}')) --brace_depth;
+ advance(p);
+ }
+ }
+ if (!accept_punct(p, ',')) break;
+ }
+ if (!emitted && default_buf) {
+ Tok* save_replay = p->replay;
+ u32 save_cap = p->replay_cap;
+ u32 save_len = p->replay_len;
+ u32 save_pos = p->replay_pos;
+ u8 save_active = p->replay_active;
+ Tok save_cur = p->cur;
+ int save_has_next = p->has_next;
+ p->replay = default_buf;
+ p->replay_cap = default_len;
+ p->replay_len = default_len;
+ p->replay_pos = 1;
+ p->replay_active = 1;
+ p->cur = default_buf[0];
+ p->has_next = 0;
+ parse_assign_expr(p);
+ emitted = 1;
+ p->replay = save_replay;
+ p->replay_cap = save_cap;
+ p->replay_len = save_len;
+ p->replay_pos = save_pos;
+ p->replay_active = save_active;
+ p->cur = save_cur;
+ p->has_next = save_has_next;
+ }
+ expect_punct(p, ')', "')' after _Generic");
+ if (!emitted) {
+ perr(p, "_Generic: no association matched and no default present");
+ }
+ return;
+ }
+ if (is_kw(p, &t, KW_ALIGNOF)) {
+ const Type* ty;
+ advance(p);
+ expect_punct(p, '(', "'('");
+ if (starts_type_name(p, &p->cur)) {
+ ty = parse_type_name(p);
+ } else {
+ parse_unary(p);
+ ty = cg_top_type(p->cg);
+ cg_drop(p->cg);
+ }
+ expect_punct(p, ')', "')'");
+ cg_push_int(p->cg, (i64)abi_alignof(p->abi, ty), ty_size_t(p));
+ return;
+ }
+ parse_postfix(p);
+}
+
+/* ============================================================
+ * Binary operator levels
+ * ============================================================ */
+
+static int type_is_fp(const Type* t) {
+ return t && (t->kind == TY_FLOAT || t->kind == TY_DOUBLE ||
+ t->kind == TY_LDOUBLE);
+}
+
+static const Type* common_fp_type(Parser* p, const Type* a, const Type* b) {
+ if (!type_is_fp(a) && !type_is_fp(b)) return NULL;
+ if ((a && a->kind == TY_LDOUBLE) || (b && b->kind == TY_LDOUBLE)) {
+ return type_prim(p->pool, TY_LDOUBLE);
+ }
+ if ((a && a->kind == TY_DOUBLE) || (b && b->kind == TY_DOUBLE)) {
+ return type_prim(p->pool, TY_DOUBLE);
+ }
+ return type_prim(p->pool, TY_FLOAT);
+}
+
+static void emit_fp_binop(Parser* p, BinOp bop, const Type* common) {
+ if (cg_top_type(p->cg) != common) cg_convert(p->cg, common);
+ cg_swap(p->cg);
+ if (cg_top_type(p->cg) != common) cg_convert(p->cg, common);
+ cg_swap(p->cg);
+ BinOp fop;
+ switch (bop) {
+ case BO_IADD: fop = BO_FADD; break;
+ case BO_ISUB: fop = BO_FSUB; break;
+ case BO_IMUL: fop = BO_FMUL; break;
+ case BO_SDIV: fop = BO_FDIV; break;
+ default:
+ perr(p, "operator does not apply to floating types");
+ return;
+ }
+ cg_binop(p->cg, fop);
+}
+
+static void parse_mul(Parser* p) {
+ parse_unary(p);
+ for (;;) {
+ Tok t = p->cur;
+ BinOp bop;
+ if (is_punct(&t, '*')) {
+ bop = BO_IMUL;
+ } else if (is_punct(&t, '/')) {
+ bop = BO_SDIV;
+ } else if (is_punct(&t, '%')) {
+ bop = BO_SREM;
+ } else {
+ break;
+ }
+ advance(p);
+ to_rvalue(p);
+ parse_unary(p);
+ to_rvalue(p);
+ const Type* lt = cg_top2_type(p->cg);
+ const Type* rt = cg_top_type(p->cg);
+ const Type* common = common_fp_type(p, lt, rt);
+ if (common) {
+ emit_fp_binop(p, bop, common);
+ } else {
+ cg_binop(p->cg, bop);
+ }
+ }
+}
+
+static void emit_add_or_sub(Parser* p, BinOp bop) {
+ const Type* lt = cg_top2_type(p->cg);
+ const Type* rt = cg_top_type(p->cg);
+ int l_is_ptr = lt && lt->kind == TY_PTR;
+ int r_is_ptr = rt && rt->kind == TY_PTR;
+ if (bop == BO_IADD) {
+ if (l_is_ptr && type_is_int(rt)) {
+ u32 esz = abi_sizeof(p->abi, lt->ptr.pointee);
+ if (esz != 1) {
+ cg_push_int(p->cg, (i64)esz, ty_size_t(p));
+ cg_binop(p->cg, BO_IMUL);
+ }
+ cg_binop(p->cg, BO_IADD);
+ return;
+ }
+ if (r_is_ptr && type_is_int(lt)) {
+ cg_swap(p->cg);
+ u32 esz = abi_sizeof(p->abi, rt->ptr.pointee);
+ if (esz != 1) {
+ cg_push_int(p->cg, (i64)esz, ty_size_t(p));
+ cg_binop(p->cg, BO_IMUL);
+ }
+ cg_binop(p->cg, BO_IADD);
+ return;
+ }
+ } else { /* BO_ISUB */
+ if (l_is_ptr && type_is_int(rt)) {
+ u32 esz = abi_sizeof(p->abi, lt->ptr.pointee);
+ if (esz != 1) {
+ cg_push_int(p->cg, (i64)esz, ty_size_t(p));
+ cg_binop(p->cg, BO_IMUL);
+ }
+ cg_binop(p->cg, BO_ISUB);
+ return;
+ }
+ if (l_is_ptr && r_is_ptr) {
+ u32 esz = abi_sizeof(p->abi, lt->ptr.pointee);
+ cg_binop(p->cg, BO_ISUB);
+ if (esz != 1) {
+ cg_push_int(p->cg, (i64)esz, ty_size_t(p));
+ cg_binop(p->cg, BO_SDIV);
+ }
+ return;
+ }
+ }
+ const Type* common = common_fp_type(p, lt, rt);
+ if (common) {
+ emit_fp_binop(p, bop, common);
+ return;
+ }
+ cg_binop(p->cg, bop);
+}
+
+static void parse_add(Parser* p) {
+ parse_mul(p);
+ for (;;) {
+ Tok t = p->cur;
+ BinOp bop;
+ if (is_punct(&t, '+')) {
+ bop = BO_IADD;
+ } else if (is_punct(&t, '-')) {
+ bop = BO_ISUB;
+ } else {
+ break;
+ }
+ advance(p);
+ to_rvalue(p);
+ parse_mul(p);
+ to_rvalue(p);
+ emit_add_or_sub(p, bop);
+ }
+}
+
+static void parse_shift(Parser* p) {
+ parse_add(p);
+ for (;;) {
+ Tok t = p->cur;
+ BinOp bop;
+ if (is_punct(&t, P_SHL)) {
+ bop = BO_SHL;
+ } else if (is_punct(&t, P_SHR)) {
+ bop = BO_SHR_S;
+ } else {
+ break;
+ }
+ advance(p);
+ to_rvalue(p);
+ parse_add(p);
+ to_rvalue(p);
+ cg_binop(p->cg, bop);
+ }
+}
+
+static void parse_rel(Parser* p) {
+ parse_shift(p);
+ for (;;) {
+ Tok t = p->cur;
+ CmpOp cop;
+ if (is_punct(&t, '<')) {
+ cop = CMP_LT_S;
+ } else if (is_punct(&t, '>')) {
+ cop = CMP_GT_S;
+ } else if (is_punct(&t, P_LE)) {
+ cop = CMP_LE_S;
+ } else if (is_punct(&t, P_GE)) {
+ cop = CMP_GE_S;
+ } else {
+ break;
+ }
+ advance(p);
+ to_rvalue(p);
+ parse_shift(p);
+ to_rvalue(p);
+ cg_cmp(p->cg, cop);
+ }
+}
+
+static void parse_eq(Parser* p) {
+ parse_rel(p);
+ for (;;) {
+ Tok t = p->cur;
+ CmpOp cop;
+ if (is_punct(&t, P_EQ)) {
+ cop = CMP_EQ;
+ } else if (is_punct(&t, P_NE)) {
+ cop = CMP_NE;
+ } else {
+ break;
+ }
+ advance(p);
+ to_rvalue(p);
+ parse_rel(p);
+ to_rvalue(p);
+ cg_cmp(p->cg, cop);
+ }
+}
+
+static void parse_band(Parser* p) {
+ parse_eq(p);
+ while (is_punct(&p->cur, '&')) {
+ advance(p);
+ to_rvalue(p);
+ parse_eq(p);
+ to_rvalue(p);
+ cg_binop(p->cg, BO_AND);
+ }
+}
+
+static void parse_bxor(Parser* p) {
+ parse_band(p);
+ while (is_punct(&p->cur, '^')) {
+ advance(p);
+ to_rvalue(p);
+ parse_band(p);
+ to_rvalue(p);
+ cg_binop(p->cg, BO_XOR);
+ }
+}
+
+static void parse_bor(Parser* p) {
+ parse_bxor(p);
+ while (is_punct(&p->cur, '|')) {
+ advance(p);
+ to_rvalue(p);
+ parse_bxor(p);
+ to_rvalue(p);
+ cg_binop(p->cg, BO_OR);
+ }
+}
+
+static FrameSlot ll_tmp_slot(Parser* p, const Type* ty) {
+ FrameSlotDesc fsd;
+ memset(&fsd, 0, sizeof fsd);
+ fsd.type = ty;
+ fsd.size = abi_sizeof(p->abi, ty);
+ fsd.align = abi_alignof(p->abi, ty);
+ fsd.kind = FS_LOCAL;
+ fsd.flags = FSF_NONE;
+ return cg_local(p->cg, &fsd);
+}
+
+static void ll_store_const(Parser* p, FrameSlot tmp, const Type* ty, i64 v) {
+ cg_push_local_typed(p->cg, tmp, ty);
+ cg_push_int(p->cg, v, ty);
+ cg_store(p->cg);
+ cg_drop(p->cg);
+}
+
+static void parse_land(Parser* p) {
+ parse_bor(p);
+ while (is_punct(&p->cur, P_AND)) {
+ CGLabel L_false = cg_label_new(p->cg);
+ CGLabel L_end = cg_label_new(p->cg);
+ const Type* result_ty = ty_int(p);
+ FrameSlot tmp = ll_tmp_slot(p, result_ty);
+ advance(p);
+ to_rvalue(p);
+ cg_branch_false(p->cg, L_false);
+ parse_bor(p);
+ to_rvalue(p);
+ cg_branch_false(p->cg, L_false);
+ ll_store_const(p, tmp, result_ty, 1);
+ cg_jump(p->cg, L_end);
+ cg_label_place(p->cg, L_false);
+ ll_store_const(p, tmp, result_ty, 0);
+ cg_label_place(p->cg, L_end);
+ cg_push_local_typed(p->cg, tmp, result_ty);
+ }
+}
+
+static void parse_lor(Parser* p) {
+ parse_land(p);
+ while (is_punct(&p->cur, P_OR)) {
+ CGLabel L_true = cg_label_new(p->cg);
+ CGLabel L_end = cg_label_new(p->cg);
+ const Type* result_ty = ty_int(p);
+ FrameSlot tmp = ll_tmp_slot(p, result_ty);
+ advance(p);
+ to_rvalue(p);
+ cg_branch_true(p->cg, L_true);
+ parse_land(p);
+ to_rvalue(p);
+ cg_branch_true(p->cg, L_true);
+ ll_store_const(p, tmp, result_ty, 0);
+ cg_jump(p->cg, L_end);
+ cg_label_place(p->cg, L_true);
+ ll_store_const(p, tmp, result_ty, 1);
+ cg_label_place(p->cg, L_end);
+ cg_push_local_typed(p->cg, tmp, result_ty);
+ }
+}
+
+static void parse_ternary(Parser* p) {
+ parse_lor(p);
+ if (!is_punct(&p->cur, '?')) return;
+ CGLabel L_else = cg_label_new(p->cg);
+ CGLabel L_end = cg_label_new(p->cg);
+ const Type* result_ty = ty_int(p);
+ FrameSlot tmp;
+ FrameSlotDesc fsd;
+ advance(p); /* '?' */
+ to_rvalue(p);
+ cg_branch_false(p->cg, L_else);
+ parse_assign_expr(p);
+ to_rvalue(p);
+ result_ty = cg_top_type(p->cg);
+ if (!result_ty) result_ty = ty_int(p);
+ memset(&fsd, 0, sizeof fsd);
+ fsd.type = result_ty;
+ fsd.size = abi_sizeof(p->abi, result_ty);
+ fsd.align = abi_alignof(p->abi, result_ty);
+ fsd.kind = FS_LOCAL;
+ fsd.flags = FSF_NONE;
+ tmp = cg_local(p->cg, &fsd);
+ cg_push_local_typed(p->cg, tmp, result_ty);
+ cg_swap(p->cg);
+ cg_store(p->cg);
+ cg_drop(p->cg);
+ cg_jump(p->cg, L_end);
+ cg_label_place(p->cg, L_else);
+ expect_punct(p, ':', "':' in ternary");
+ parse_assign_expr(p);
+ to_rvalue(p);
+ if (cg_top_type(p->cg) != result_ty) {
+ cg_convert(p->cg, result_ty);
+ }
+ cg_push_local_typed(p->cg, tmp, result_ty);
+ cg_swap(p->cg);
+ cg_store(p->cg);
+ cg_drop(p->cg);
+ cg_label_place(p->cg, L_end);
+ cg_push_local_typed(p->cg, tmp, result_ty);
+}
+
+void parse_assign_expr(Parser* p) {
+ parse_ternary(p);
+ Tok t = p->cur;
+ BinOp compound;
+ int is_simple_assign;
+ if (is_punct(&t, '=')) {
+ is_simple_assign = 1;
+ compound = (BinOp)0;
+ } else if (is_punct(&t, P_ADD_ASSIGN)) {
+ is_simple_assign = 0; compound = BO_IADD;
+ } else if (is_punct(&t, P_SUB_ASSIGN)) {
+ is_simple_assign = 0; compound = BO_ISUB;
+ } else if (is_punct(&t, P_MUL_ASSIGN)) {
+ is_simple_assign = 0; compound = BO_IMUL;
+ } else if (is_punct(&t, P_DIV_ASSIGN)) {
+ is_simple_assign = 0; compound = BO_SDIV;
+ } else if (is_punct(&t, P_MOD_ASSIGN)) {
+ is_simple_assign = 0; compound = BO_SREM;
+ } else if (is_punct(&t, P_AND_ASSIGN)) {
+ is_simple_assign = 0; compound = BO_AND;
+ } else if (is_punct(&t, P_OR_ASSIGN)) {
+ is_simple_assign = 0; compound = BO_OR;
+ } else if (is_punct(&t, P_XOR_ASSIGN)) {
+ is_simple_assign = 0; compound = BO_XOR;
+ } else if (is_punct(&t, P_SHL_ASSIGN)) {
+ is_simple_assign = 0; compound = BO_SHL;
+ } else if (is_punct(&t, P_SHR_ASSIGN)) {
+ is_simple_assign = 0; compound = BO_SHR_S;
+ } else {
+ return;
+ }
+ advance(p);
+ if (is_simple_assign) {
+ parse_assign_expr(p);
+ to_rvalue(p);
+ coerce_top_to_lvalue(p);
+ cg_store(p->cg);
+ return;
+ }
+ cg_dup(p->cg);
+ cg_load(p->cg);
+ parse_assign_expr(p);
+ to_rvalue(p);
+ if (compound == BO_IADD || compound == BO_ISUB) {
+ emit_add_or_sub(p, compound);
+ } else {
+ cg_binop(p->cg, compound);
+ }
+ cg_store(p->cg);
+}
+
+void parse_expr(Parser* p) {
+ parse_assign_expr(p);
+ while (is_punct(&p->cur, ',')) {
+ advance(p);
+ cg_drop(p->cg);
+ parse_assign_expr(p);
+ }
+}
+
+/* parse_cond_expr is the ternary level, provided for completeness */
+void parse_cond_expr(Parser* p) {
+ parse_ternary(p);
+}
diff --git a/src/parse/parse_init.c b/src/parse/parse_init.c
@@ -0,0 +1,808 @@
+/* parse_init.c — runtime and static-storage initializers.
+ *
+ * Covers §6.7.9 (initializers):
+ * - Runtime aggregate/scalar initializers (init_at, init_elided,
+ * init_struct_fields, init_string_at, parse_designator_chain,
+ * push_subobject_lv, emit_copy_leaf, emit_walk_copy,
+ * emit_struct_copy_into_slot, zero_init_at)
+ * - Static-storage object definition (parse_static_init_at,
+ * parse_static_string_at, try_parse_addr_const, encode_int_le,
+ * pick_object_section, define_static_object, srl_push)
+ */
+
+#include "parse/parse_priv.h"
+
+/* ============================================================
+ * File-local helpers
+ * ============================================================ */
+
+static const Type* ty_size_t_init(Parser* p) {
+ return abi_size_type(p->abi, p->pool);
+}
+
+static SrcLoc tok_loc_init(const Tok* t) { return t->loc; }
+
+static CKw ident_kw_init(const Parser* p, Sym name) {
+ return ident_kw_inline(p, name);
+}
+
+/* True if `ty` is char/signed char/unsigned char. */
+int is_char_kind(const Type* ty) {
+ if (!ty) return 0;
+ return ty->kind == TY_CHAR || ty->kind == TY_SCHAR || ty->kind == TY_UCHAR;
+}
+
+/* Decode the string token at p->cur without advancing. Returns a heap-
+ * allocated byte buffer (caller frees) and writes length (including NUL)
+ * to *nlen_out. */
+static u8* peek_string_bytes(Parser* p, size_t* nlen_out) {
+ Tok t = p->cur;
+ if (t.kind != TOK_STR) perr(p, "internal: peek_string_bytes on non-string");
+ return decode_string_literal(p, &t, nlen_out);
+}
+
+/* ============================================================
+ * Runtime initializers
+ * ============================================================ */
+
+/* Forward declaration for mutual recursion. */
+void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset,
+ const Type* ty);
+static u32 init_elided(Parser* p, FrameSlot slot, const Type* arr_ty,
+ u32 offset, const Type* ty);
+
+/* Push the lvalue of a sub-object at byte offset `offset` within the array
+ * local `slot` (whose type is `arr_ty`), with element type `elem_ty`. */
+void push_subobject_lv(Parser* p, FrameSlot slot, const Type* arr_ty,
+ u32 offset, const Type* elem_ty) {
+ cg_push_local_typed(p->cg, slot, arr_ty);
+ cg_addr(p->cg);
+ cg_retag_top(p->cg, type_ptr(p->pool, elem_ty));
+ if (offset > 0) {
+ cg_push_int(p->cg, (i64)offset, ty_size_t_init(p));
+ cg_binop(p->cg, BO_IADD);
+ }
+ cg_deref(p->cg, elem_ty);
+}
+
+/* Emit a load+store for one scalar leaf. */
+static void emit_copy_leaf(Parser* p, FrameSlot dst_slot, const Type* dst_arr_ty,
+ u32 dst_off, FrameSlot src_ptr_slot,
+ const Type* src_ptr_ty, u32 src_off,
+ const Type* leaf_ty) {
+ push_subobject_lv(p, dst_slot, dst_arr_ty, dst_off, leaf_ty);
+ cg_push_local_typed(p->cg, src_ptr_slot, src_ptr_ty);
+ cg_load(p->cg);
+ cg_retag_top(p->cg, type_ptr(p->pool, leaf_ty));
+ if (src_off > 0) {
+ cg_push_int(p->cg, (i64)src_off, ty_size_t_init(p));
+ cg_binop(p->cg, BO_IADD);
+ }
+ cg_deref(p->cg, leaf_ty);
+ cg_load(p->cg);
+ cg_store(p->cg);
+ cg_drop(p->cg);
+}
+
+/* Walk a (possibly nested) aggregate, emitting a leaf load+store for each
+ * scalar member. */
+static void emit_walk_copy(Parser* p, FrameSlot dst_slot,
+ const Type* dst_arr_ty, u32 dst_off,
+ FrameSlot src_ptr_slot, const Type* src_ptr_ty,
+ u32 src_off, const Type* ty) {
+ if (ty->kind == TY_STRUCT) {
+ const ABIRecordLayout* L = abi_record_layout(p->abi, ty);
+ for (u16 i = 0; i < ty->rec.nfields; ++i) {
+ const Field* f = &ty->rec.fields[i];
+ if (f->flags & FIELD_BITFIELD) continue;
+ u32 foff = L->fields[i].offset;
+ emit_walk_copy(p, dst_slot, dst_arr_ty, dst_off + foff,
+ src_ptr_slot, src_ptr_ty, src_off + foff, f->type);
+ }
+ return;
+ }
+ if (ty->kind == TY_ARRAY) {
+ u32 esz = abi_sizeof(p->abi, ty->arr.elem);
+ for (u32 i = 0; i < ty->arr.count; ++i) {
+ emit_walk_copy(p, dst_slot, dst_arr_ty, dst_off + i * esz,
+ src_ptr_slot, src_ptr_ty, src_off + i * esz,
+ ty->arr.elem);
+ }
+ return;
+ }
+ if (ty->kind == TY_UNION) {
+ u32 sz = abi_sizeof(p->abi, ty);
+ const Type* uchar_ty = type_prim(p->pool, TY_UCHAR);
+ for (u32 i = 0; i < sz; ++i) {
+ emit_copy_leaf(p, dst_slot, dst_arr_ty, dst_off + i,
+ src_ptr_slot, src_ptr_ty, src_off + i, uchar_ty);
+ }
+ return;
+ }
+ emit_copy_leaf(p, dst_slot, dst_arr_ty, dst_off, src_ptr_slot, src_ptr_ty,
+ src_off, ty);
+}
+
+/* Source struct/union value is on top of the cg stack as an lvalue.
+ * Spill its address into a fresh pointer slot, then walk the type and
+ * copy each scalar leaf into the destination sub-object. */
+void emit_struct_copy_into_slot(Parser* p, FrameSlot dst_slot,
+ const Type* dst_arr_ty, u32 dst_off,
+ const Type* ty) {
+ const Type* ptr_ty = type_ptr(p->pool, ty);
+ FrameSlotDesc fsd;
+ FrameSlot src_ptr_slot;
+ cg_addr(p->cg);
+ memset(&fsd, 0, sizeof fsd);
+ fsd.type = ptr_ty;
+ fsd.size = abi_sizeof(p->abi, ptr_ty);
+ fsd.align = abi_alignof(p->abi, ptr_ty);
+ fsd.kind = FS_LOCAL;
+ fsd.flags = FSF_NONE;
+ src_ptr_slot = cg_local(p->cg, &fsd);
+ cg_push_local_typed(p->cg, src_ptr_slot, ptr_ty);
+ cg_swap(p->cg);
+ cg_store(p->cg);
+ cg_drop(p->cg);
+ emit_walk_copy(p, dst_slot, dst_arr_ty, dst_off, src_ptr_slot, ptr_ty, 0, ty);
+}
+
+/* Recursively zero-initialize the sub-object at `offset` of type `ty`. */
+static void zero_init_at(Parser* p, FrameSlot slot, const Type* arr_ty,
+ u32 offset, const Type* ty) {
+ if (ty->kind == TY_ARRAY) {
+ u32 esz = abi_sizeof(p->abi, ty->arr.elem);
+ for (u32 i = 0; i < ty->arr.count; ++i) {
+ zero_init_at(p, slot, arr_ty, offset + i * esz, ty->arr.elem);
+ }
+ return;
+ }
+ if (ty->kind == TY_STRUCT) {
+ const ABIRecordLayout* L = abi_record_layout(p->abi, ty);
+ for (u16 i = 0; i < ty->rec.nfields; ++i) {
+ const Field* f = &ty->rec.fields[i];
+ zero_init_at(p, slot, arr_ty, offset + L->fields[i].offset, f->type);
+ }
+ return;
+ }
+ if (ty->kind == TY_UNION) {
+ if (ty->rec.nfields > 0) {
+ const Field* f = &ty->rec.fields[0];
+ if (!(f->flags & FIELD_BITFIELD)) {
+ zero_init_at(p, slot, arr_ty, offset, f->type);
+ }
+ }
+ return;
+ }
+ push_subobject_lv(p, slot, arr_ty, offset, ty);
+ cg_push_int(p->cg, 0, ty);
+ cg_store(p->cg);
+ cg_drop(p->cg);
+}
+
+/* Emit byte stores for a string literal initializing a char-array sub-object. */
+static void init_string_at(Parser* p, FrameSlot slot, const Type* arr_ty,
+ u32 offset, const Type* elem_ty, u32 count) {
+ size_t n = 0;
+ u8* bytes = peek_string_bytes(p, &n);
+ size_t copy = n;
+ size_t i;
+ if (copy > count) copy = count;
+ for (i = 0; i < copy; ++i) {
+ push_subobject_lv(p, slot, arr_ty, offset + (u32)i, elem_ty);
+ cg_push_int(p->cg, (i64)bytes[i], elem_ty);
+ cg_store(p->cg);
+ cg_drop(p->cg);
+ }
+ for (; i < count; ++i) {
+ push_subobject_lv(p, slot, arr_ty, offset + (u32)i, elem_ty);
+ cg_push_int(p->cg, 0, elem_ty);
+ cg_store(p->cg);
+ cg_drop(p->cg);
+ }
+ p->c->env->heap->free(p->c->env->heap, bytes, 0);
+ advance(p); /* consume TOK_STR */
+}
+
+/* Parse a designator chain (`[const]` and `.ident` repeats) ending at `=`. */
+static void parse_designator_chain(Parser* p, const Type* outer_ty,
+ u32 outer_offset, const Type** sub_ty_out,
+ u32* sub_offset_out, u32* top_index_out) {
+ const Type* cur_ty = outer_ty;
+ u32 cur_off = outer_offset;
+ int first = 1;
+ for (;;) {
+ if (is_punct(&p->cur, '[')) {
+ i64 idx;
+ u32 esz;
+ SrcLoc cloc = tok_loc_init(&p->cur);
+ advance(p);
+ idx = eval_const_int(p, cloc);
+ expect_punct(p, ']', "']' after designator index");
+ if (!cur_ty || cur_ty->kind != TY_ARRAY) {
+ perr(p, "array designator on non-array");
+ }
+ if (idx < 0 || (u32)idx >= cur_ty->arr.count) {
+ perr(p, "array designator index out of range");
+ }
+ esz = abi_sizeof(p->abi, cur_ty->arr.elem);
+ cur_off += (u32)idx * esz;
+ cur_ty = cur_ty->arr.elem;
+ if (first) *top_index_out = (u32)idx;
+ first = 0;
+ } else if (is_punct(&p->cur, '.')) {
+ Sym fname;
+ const Type* fty;
+ u32 foff;
+ const Field* ff;
+ u16 fi;
+ advance(p);
+ if (p->cur.kind != TOK_IDENT || ident_kw_init(p, p->cur.v.ident) != KW_NONE) {
+ perr(p, "expected field name after '.'");
+ }
+ fname = p->cur.v.ident;
+ advance(p);
+ if (!cur_ty ||
+ (cur_ty->kind != TY_STRUCT && cur_ty->kind != TY_UNION)) {
+ perr(p, "field designator on non-record type");
+ }
+ if (!find_field(p->abi, cur_ty, fname, &fty, &foff, &ff)) {
+ perr(p, "no such field in designator");
+ }
+ cur_off += foff;
+ if (first) {
+ for (fi = 0; fi < cur_ty->rec.nfields; ++fi) {
+ const Field* g = &cur_ty->rec.fields[fi];
+ if (g->name == fname && fname != 0) {
+ *top_index_out = fi;
+ break;
+ }
+ if ((g->flags & FIELD_ANON) &&
+ (g->type->kind == TY_STRUCT || g->type->kind == TY_UNION)) {
+ const Type* tmp_ty;
+ u32 tmp_off;
+ const Field* tmp_f;
+ if (find_field(p->abi, g->type, fname, &tmp_ty, &tmp_off,
+ &tmp_f)) {
+ *top_index_out = fi;
+ break;
+ }
+ }
+ }
+ }
+ cur_ty = fty;
+ first = 0;
+ } else {
+ break;
+ }
+ }
+ if (first) perr(p, "internal: empty designator chain");
+ expect_punct(p, '=', "'=' after designator");
+ *sub_ty_out = cur_ty;
+ *sub_offset_out = cur_off;
+}
+
+static u32 init_struct_fields(Parser* p, FrameSlot slot, const Type* arr_ty,
+ u32 offset, const Type* ty, u32 start_field,
+ int braced) {
+ const ABIRecordLayout* L = abi_record_layout(p->abi, ty);
+ u32 i = start_field;
+ u32 zero_lo = start_field;
+ for (; i < ty->rec.nfields; ++i) {
+ const Field* f = &ty->rec.fields[i];
+ u32 foff = offset + L->fields[i].offset;
+ if (braced && (is_punct(&p->cur, '}') || p->cur.kind == TOK_EOF)) break;
+ if (braced && is_punct(&p->cur, '.')) {
+ const Type* sub_ty;
+ u32 sub_off;
+ u32 top_idx = 0;
+ parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, &top_idx);
+ while (zero_lo < top_idx) {
+ const Field* zf = &ty->rec.fields[zero_lo];
+ u32 zoff = offset + L->fields[zero_lo].offset;
+ zero_init_at(p, slot, arr_ty, zoff, zf->type);
+ ++zero_lo;
+ }
+ init_at(p, slot, arr_ty, sub_off, sub_ty);
+ i = top_idx;
+ if (zero_lo <= top_idx) zero_lo = top_idx + 1;
+ goto next_item_struct;
+ }
+ init_at(p, slot, arr_ty, foff, f->type);
+ if (zero_lo <= i) zero_lo = i + 1;
+ if (!braced) {
+ ++i;
+ break;
+ }
+ next_item_struct:
+ if (!accept_punct(p, ',')) {
+ ++i;
+ break;
+ }
+ if (is_punct(&p->cur, '}')) {
+ ++i;
+ break;
+ }
+ }
+ if (braced) {
+ u32 j;
+ for (j = zero_lo; j < ty->rec.nfields; ++j) {
+ const Field* f = &ty->rec.fields[j];
+ u32 foff = offset + L->fields[j].offset;
+ zero_init_at(p, slot, arr_ty, foff, f->type);
+ }
+ }
+ return i;
+}
+
+static u32 init_elided(Parser* p, FrameSlot slot, const Type* arr_ty,
+ u32 offset, const Type* ty) {
+ if (ty->kind == TY_ARRAY) {
+ u32 esz = abi_sizeof(p->abi, ty->arr.elem);
+ init_at(p, slot, arr_ty, offset, ty->arr.elem);
+ (void)esz;
+ return 1;
+ }
+ if (ty->kind == TY_STRUCT) {
+ init_struct_fields(p, slot, arr_ty, offset, ty, 0, /*braced=*/0);
+ return 1;
+ }
+ /* Scalar / pointer / union: consume one assignment-expr. */
+ int had_brace = accept_punct(p, '{');
+ push_subobject_lv(p, slot, arr_ty, offset, ty);
+ parse_assign_expr(p);
+ to_rvalue(p);
+ cg_store(p->cg);
+ cg_drop(p->cg);
+ if (had_brace) {
+ accept_punct(p, ',');
+ expect_punct(p, '}', "'}' after scalar initializer");
+ }
+ return 1;
+}
+
+void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset,
+ const Type* ty) {
+ if (ty->kind == TY_ARRAY) {
+ const Type* elem_ty = ty->arr.elem;
+ u32 esz = abi_sizeof(p->abi, elem_ty);
+ if (is_char_kind(elem_ty)) {
+ if (p->cur.kind == TOK_STR) {
+ init_string_at(p, slot, arr_ty, offset, elem_ty, ty->arr.count);
+ return;
+ }
+ if (is_punct(&p->cur, '{') && peek1(p).kind == TOK_STR) {
+ advance(p);
+ init_string_at(p, slot, arr_ty, offset, elem_ty, ty->arr.count);
+ accept_punct(p, ',');
+ expect_punct(p, '}', "'}' after string initializer");
+ return;
+ }
+ }
+ if (!is_punct(&p->cur, '{')) {
+ init_elided(p, slot, arr_ty, offset, elem_ty);
+ return;
+ }
+ advance(p); /* '{' */
+ {
+ u32 i = 0;
+ u32 zero_lo = 0;
+ if (!is_punct(&p->cur, '}')) {
+ for (;;) {
+ if (is_punct(&p->cur, '[')) {
+ const Type* sub_ty;
+ u32 sub_off;
+ u32 top_idx = 0;
+ parse_designator_chain(p, ty, offset, &sub_ty, &sub_off,
+ &top_idx);
+ while (zero_lo < top_idx) {
+ zero_init_at(p, slot, arr_ty, offset + zero_lo * esz, elem_ty);
+ ++zero_lo;
+ }
+ init_at(p, slot, arr_ty, sub_off, sub_ty);
+ i = top_idx + 1;
+ if (zero_lo < i) zero_lo = i;
+ } else {
+ if (i >= ty->arr.count) {
+ perr(p, "too many initializers for array");
+ }
+ init_at(p, slot, arr_ty, offset + i * esz, elem_ty);
+ ++i;
+ if (zero_lo < i) zero_lo = i;
+ }
+ if (!accept_punct(p, ',')) break;
+ if (is_punct(&p->cur, '}')) break;
+ }
+ }
+ expect_punct(p, '}', "'}' after array initializer");
+ {
+ u32 j;
+ for (j = zero_lo; j < ty->arr.count; ++j) {
+ zero_init_at(p, slot, arr_ty, offset + j * esz, elem_ty);
+ }
+ }
+ }
+ return;
+ }
+ if (ty->kind == TY_STRUCT) {
+ if (!is_punct(&p->cur, '{')) {
+ init_struct_fields(p, slot, arr_ty, offset, ty, 0, /*braced=*/0);
+ return;
+ }
+ advance(p); /* '{' */
+ init_struct_fields(p, slot, arr_ty, offset, ty, 0, /*braced=*/1);
+ expect_punct(p, '}', "'}' after struct initializer");
+ return;
+ }
+ if (ty->kind == TY_UNION) {
+ int had_brace = accept_punct(p, '{');
+ if (ty->rec.nfields == 0) {
+ if (had_brace) expect_punct(p, '}', "'}'");
+ return;
+ }
+ if (had_brace && is_punct(&p->cur, '.')) {
+ const Type* sub_ty;
+ u32 sub_off;
+ u32 top_idx = 0;
+ parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, &top_idx);
+ init_at(p, slot, arr_ty, sub_off, sub_ty);
+ } else {
+ const Field* f = &ty->rec.fields[0];
+ if (!(f->flags & FIELD_BITFIELD)) {
+ init_at(p, slot, arr_ty, offset, f->type);
+ }
+ }
+ if (had_brace) {
+ accept_punct(p, ',');
+ expect_punct(p, '}', "'}' after union initializer");
+ }
+ return;
+ }
+ /* Scalar (incl. pointer). */
+ int had_brace = accept_punct(p, '{');
+ push_subobject_lv(p, slot, arr_ty, offset, ty);
+ parse_assign_expr(p);
+ to_rvalue(p);
+ coerce_top_to_lvalue(p);
+ cg_store(p->cg);
+ cg_drop(p->cg);
+ if (had_brace) {
+ accept_punct(p, ',');
+ expect_punct(p, '}', "'}' after scalar initializer");
+ }
+}
+
+/* ============================================================
+ * Static-storage initializers
+ * ============================================================ */
+
+void encode_int_le(u8* dst, u32 size, i64 v) {
+ for (u32 i = 0; i < size; ++i) {
+ dst[i] = (u8)((v >> (8u * i)) & 0xffu);
+ }
+}
+
+/* Encode a string literal at *buf+offset for a char-array sub-object. */
+static void parse_static_string_at(Parser* p, u8* buf, u32 buflen, u32 offset,
+ u32 count) {
+ size_t n = 0;
+ u8* bytes = peek_string_bytes(p, &n);
+ size_t copy = n;
+ if (copy > count) copy = count;
+ if (offset + (u32)copy > buflen) perr(p, "string initializer overflows object");
+ memcpy(buf + offset, bytes, copy);
+ p->c->env->heap->free(p->c->env->heap, bytes, 0);
+ advance(p);
+}
+
+/* Append one pending relocation to the parser-side list. */
+void srl_push(Parser* p, u32 offset, u32 size, ObjSymId target, i64 addend) {
+ if (p->static_relocs_len == p->static_relocs_cap) {
+ u32 nc = p->static_relocs_cap ? p->static_relocs_cap * 2u : 4u;
+ void* nb = arena_array(p->c->tu, char,
+ nc * sizeof(*p->static_relocs));
+ if (!nb) perr(p, "out of memory recording static relocs");
+ if (p->static_relocs && p->static_relocs_len) {
+ memcpy(nb, p->static_relocs,
+ p->static_relocs_len * sizeof(*p->static_relocs));
+ }
+ p->static_relocs = nb;
+ p->static_relocs_cap = nc;
+ }
+ p->static_relocs[p->static_relocs_len].offset = offset;
+ p->static_relocs[p->static_relocs_len].size = size;
+ p->static_relocs[p->static_relocs_len].target = target;
+ p->static_relocs[p->static_relocs_len].addend = addend;
+ ++p->static_relocs_len;
+}
+
+/* Try to parse the current expression as an address constant. */
+static int try_parse_addr_const(Parser* p, const Type* ty, u8* buf,
+ u32 offset, u32 sz) {
+ Tok t = p->cur;
+ Sym name = 0;
+ SrcLoc nloc = tok_loc_init(&p->cur);
+ int saw_amp = 0;
+ i64 element_addend = 0;
+ i64 byte_addend = 0;
+ SymEntry* e;
+ const Type* tgt_ty;
+ ObjSymId tgt;
+ if (t.kind == TOK_STR) {
+ size_t n = 0;
+ u8* bytes = decode_string_literal(p, &t, &n);
+ ObjSymId str_sym = emit_string_to_rodata(p, bytes, n);
+ p->c->env->heap->free(p->c->env->heap, bytes, 0);
+ advance(p);
+ (void)ty;
+ (void)buf;
+ srl_push(p, offset, sz, str_sym, 0);
+ return 1;
+ }
+ if (is_punct(&t, '&')) {
+ saw_amp = 1;
+ advance(p);
+ if (p->cur.kind != TOK_IDENT || ident_kw_init(p, p->cur.v.ident) != KW_NONE) {
+ perr(p, "expected identifier after '&' in static initializer");
+ }
+ name = p->cur.v.ident;
+ nloc = tok_loc_init(&p->cur);
+ advance(p);
+ } else if (t.kind == TOK_IDENT && ident_kw_init(p, t.v.ident) == KW_NONE) {
+ name = t.v.ident;
+ advance(p);
+ } else {
+ return 0;
+ }
+ e = scope_lookup(p, name);
+ if (!e || (e->kind != SEK_GLOBAL && e->kind != SEK_FUNC)) {
+ perr(p, "static initializer is not a constant address expression");
+ }
+ tgt = e->v.sym;
+ tgt_ty = e->type;
+ if (saw_amp && is_punct(&p->cur, '[')) {
+ SrcLoc cloc;
+ advance(p);
+ cloc = tok_loc_init(&p->cur);
+ element_addend = eval_const_int(p, cloc);
+ expect_punct(p, ']', "']' after array-subscript constant");
+ if (tgt_ty && tgt_ty->kind == TY_ARRAY) {
+ byte_addend +=
+ element_addend * (i64)abi_sizeof(p->abi, tgt_ty->arr.elem);
+ } else {
+ byte_addend += element_addend;
+ }
+ }
+ while (is_punct(&p->cur, '+') || is_punct(&p->cur, '-')) {
+ int neg = is_punct(&p->cur, '-');
+ SrcLoc cloc;
+ i64 v;
+ advance(p);
+ cloc = tok_loc_init(&p->cur);
+ v = eval_const_int(p, cloc);
+ if (neg) v = -v;
+ if (tgt_ty && tgt_ty->kind == TY_ARRAY) {
+ byte_addend += v * (i64)abi_sizeof(p->abi, tgt_ty->arr.elem);
+ } else if (tgt_ty && tgt_ty->kind == TY_PTR) {
+ byte_addend += v * (i64)abi_sizeof(p->abi, tgt_ty->ptr.pointee);
+ } else if (saw_amp) {
+ byte_addend += v * (i64)abi_sizeof(p->abi, tgt_ty);
+ } else {
+ byte_addend += v;
+ }
+ }
+ (void)nloc;
+ (void)ty;
+ (void)buf;
+ srl_push(p, offset, sz, tgt, byte_addend);
+ return 1;
+}
+
+void parse_static_init_at(Parser* p, u8* buf, u32 buflen, u32 offset,
+ const Type* ty) {
+ if (ty->kind == TY_ARRAY) {
+ const Type* elem = ty->arr.elem;
+ u32 esz = abi_sizeof(p->abi, elem);
+ u32 i = 0;
+ int had_brace;
+ if (is_char_kind(elem)) {
+ if (p->cur.kind == TOK_STR) {
+ parse_static_string_at(p, buf, buflen, offset, ty->arr.count);
+ return;
+ }
+ if (is_punct(&p->cur, '{') && peek1(p).kind == TOK_STR) {
+ advance(p);
+ parse_static_string_at(p, buf, buflen, offset, ty->arr.count);
+ accept_punct(p, ',');
+ expect_punct(p, '}', "'}' after string initializer");
+ return;
+ }
+ }
+ had_brace = accept_punct(p, '{');
+ if (!had_brace) {
+ perr(p, "expected '{' for static-storage array initializer");
+ }
+ if (!is_punct(&p->cur, '}')) {
+ for (;;) {
+ if (is_punct(&p->cur, '[')) {
+ const Type* sub_ty;
+ u32 sub_off;
+ u32 top_idx = 0;
+ parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, &top_idx);
+ parse_static_init_at(p, buf, buflen, sub_off, sub_ty);
+ i = top_idx + 1;
+ } else {
+ if (i >= ty->arr.count) {
+ perr(p, "too many initializers for array");
+ }
+ parse_static_init_at(p, buf, buflen, offset + i * esz, elem);
+ ++i;
+ }
+ if (!accept_punct(p, ',')) break;
+ if (is_punct(&p->cur, '}')) break;
+ }
+ }
+ expect_punct(p, '}', "'}' after array initializer");
+ return;
+ }
+ if (ty->kind == TY_STRUCT) {
+ int had_brace = accept_punct(p, '{');
+ const ABIRecordLayout* L = abi_record_layout(p->abi, ty);
+ u32 i = 0;
+ if (!had_brace) {
+ perr(p, "expected '{' for static-storage struct initializer");
+ }
+ while (i < ty->rec.nfields && !is_punct(&p->cur, '}')) {
+ const Field* f = &ty->rec.fields[i];
+ if (is_punct(&p->cur, '.')) {
+ const Type* sub_ty;
+ u32 sub_off;
+ u32 top_idx = 0;
+ parse_designator_chain(p, ty, offset, &sub_ty, &sub_off, &top_idx);
+ parse_static_init_at(p, buf, buflen, sub_off, sub_ty);
+ i = top_idx + 1;
+ if (!accept_punct(p, ',')) break;
+ continue;
+ }
+ parse_static_init_at(p, buf, buflen, offset + L->fields[i].offset,
+ f->type);
+ ++i;
+ if (!accept_punct(p, ',')) break;
+ }
+ expect_punct(p, '}', "'}' after struct initializer");
+ return;
+ }
+ if (ty->kind == TY_UNION) {
+ perr(p, "static-storage union initializer not supported in Phase 4");
+ }
+ /* Scalar / pointer. */
+ {
+ int had_brace = accept_punct(p, '{');
+ SrcLoc cloc = tok_loc_init(&p->cur);
+ u32 sz = abi_sizeof(p->abi, ty);
+ if (offset + sz > buflen) perr(p, "initializer overflows object");
+ if (ty->kind == TY_PTR && try_parse_addr_const(p, ty, buf, offset, sz)) {
+ /* Address constant recorded as a reloc. */
+ } else {
+ i64 v = eval_const_int(p, cloc);
+ encode_int_le(buf + offset, sz, v);
+ }
+ if (had_brace) {
+ accept_punct(p, ',');
+ expect_punct(p, '}', "'}' after scalar initializer");
+ }
+ }
+}
+
+/* Choose the section a defining object decl should land in. */
+static ObjSecId pick_object_section(Parser* p, u16 quals, int has_nonzero) {
+ if ((quals & Q_CONST) != 0 && has_nonzero) {
+ Sym secname = pool_intern_cstr(p->pool, ".rodata");
+ return obj_section(decl_obj(p->decls), secname, SEC_RODATA, SF_ALLOC, 1u);
+ }
+ return OBJ_SEC_NONE;
+}
+
+/* Define a static-storage object. */
+void define_static_object(Parser* p, ObjSymId sym, const Type* var_ty,
+ u16 quals, int has_init, SrcLoc loc,
+ u32 align_override) {
+ ObjBuilder* ob = decl_obj(p->decls);
+ u32 size = abi_sizeof(p->abi, var_ty);
+ u32 align = abi_alignof(p->abi, var_ty);
+ if (align_override > align) align = align_override;
+ u8* buf = NULL;
+ int has_nonzero = 0;
+ ObjSecId override_sec;
+ const ObjSym* os = obj_symbol_get(ob, sym);
+ int is_tls = (os && os->kind == SK_TLS);
+
+ if (has_init) {
+ buf = (u8*)arena_array(p->c->tu, u8, size ? size : 1u);
+ memset(buf, 0, size);
+ p->static_relocs_len = 0;
+ parse_static_init_at(p, buf, size, 0, var_ty);
+ for (u32 i = 0; i < size; ++i) {
+ if (buf[i]) { has_nonzero = 1; break; }
+ }
+ if (p->static_relocs_len) has_nonzero = 1;
+ }
+
+ if (is_tls) {
+ Sym sname;
+ ObjSecId sec;
+ u32 a = align ? align : 1u;
+ u32 base;
+ if (!has_init || !has_nonzero) {
+ sname = obj_secname_tbss(p->c);
+ sec = obj_section_ex(ob, sname, SEC_BSS, SSEM_NOBITS,
+ SF_ALLOC | SF_WRITE | SF_TLS, a, 0, OBJ_SEC_NONE, 0);
+ base = obj_align_to(ob, sec, a);
+ obj_reserve_bss(ob, sec, base + size, a);
+ obj_symbol_define(ob, sym, sec, base, size);
+ return;
+ }
+ sname = obj_secname_tdata(p->c);
+ sec = obj_section(ob, sname, SEC_DATA, SF_ALLOC | SF_WRITE | SF_TLS, a);
+ base = obj_align_to(ob, sec, a);
+ {
+ u8* dst = obj_reserve(ob, sec, size);
+ if (dst) memcpy(dst, buf, size);
+ }
+ obj_symbol_define(ob, sym, sec, base, size);
+ for (u32 i = 0; i < p->static_relocs_len; ++i) {
+ RelocKind rk = (p->static_relocs[i].size == 8) ? R_ABS64 : R_ABS32;
+ obj_reloc(ob, sec, base + p->static_relocs[i].offset, rk,
+ p->static_relocs[i].target, p->static_relocs[i].addend);
+ }
+ p->static_relocs_len = 0;
+ (void)loc;
+ return;
+ }
+
+ override_sec = pick_object_section(p, quals, has_nonzero);
+ if (override_sec != OBJ_SEC_NONE) {
+ u32 base = obj_align_to(ob, override_sec, align > 1u ? align : 1u);
+ {
+ u8* dst = obj_reserve(ob, override_sec, size);
+ if (dst && buf) memcpy(dst, buf, size);
+ }
+ obj_symbol_define(ob, sym, override_sec, base, size);
+ for (u32 i = 0; i < p->static_relocs_len; ++i) {
+ RelocKind rk = (p->static_relocs[i].size == 8) ? R_ABS64 : R_ABS32;
+ obj_reloc(ob, override_sec, base + p->static_relocs[i].offset, rk,
+ p->static_relocs[i].target, p->static_relocs[i].addend);
+ }
+ p->static_relocs_len = 0;
+ (void)loc;
+ return;
+ }
+
+ if (!has_init || !has_nonzero) {
+ Sym sname = pool_intern_cstr(p->pool, ".bss");
+ ObjSecId sec = obj_section_ex(ob, sname, SEC_BSS, SSEM_NOBITS,
+ SF_ALLOC | SF_WRITE,
+ align ? align : 1u, 0, OBJ_SEC_NONE, 0);
+ u32 a = align ? align : 1u;
+ u32 base = obj_align_to(ob, sec, a);
+ obj_reserve_bss(ob, sec, base + size, a);
+ obj_symbol_define(ob, sym, sec, base, size);
+ return;
+ }
+ /* .data path. */
+ {
+ Sym sname = pool_intern_cstr(p->pool, ".data");
+ ObjSecId sec = obj_section(ob, sname, SEC_DATA, SF_ALLOC | SF_WRITE,
+ align ? align : 1u);
+ u32 base = obj_align_to(ob, sec, align ? align : 1u);
+ u8* dst = obj_reserve(ob, sec, size);
+ if (dst) memcpy(dst, buf, size);
+ obj_symbol_define(ob, sym, sec, base, size);
+ for (u32 i = 0; i < p->static_relocs_len; ++i) {
+ RelocKind rk = (p->static_relocs[i].size == 8) ? R_ABS64 : R_ABS32;
+ obj_reloc(ob, sec, base + p->static_relocs[i].offset, rk,
+ p->static_relocs[i].target, p->static_relocs[i].addend);
+ }
+ p->static_relocs_len = 0;
+ }
+}
diff --git a/src/parse/parse_priv.h b/src/parse/parse_priv.h
@@ -0,0 +1,431 @@
+/* parse_priv.h — private header shared across parse_*.c modules.
+ *
+ * Declares: Parser struct, shared types (Scope, SymEntry, TagEntry,
+ * DeclSpecs, TypeSpecAccum, CKw, TagDeclKind), forward decls of
+ * cross-module functions, and inline/shared helpers. */
+
+#pragma once
+
+#include "parse/parse.h"
+
+#include <stdarg.h>
+#include <string.h>
+
+#include "abi/abi.h"
+#include "arch/arch.h"
+#include "cg/cg.h"
+#include "core/arena.h"
+#include "core/core.h"
+#include "core/heap.h"
+#include "core/pool.h"
+#include "debug/debug.h"
+#include "decl/decl.h"
+#include "decl/decl_attrs.h"
+#include "lex/lex.h"
+#include "obj/obj.h"
+#include "parse/attr.h"
+#include "pp/pp.h"
+#include "type/type.h"
+
+/* Type-aware push for locals — exposed by cg.c, not in cg.h. */
+extern void cg_push_local_typed(CG*, FrameSlot, const Type*);
+/* Pop pointer rvalue, push INDIRECT lvalue of given pointee. */
+extern void cg_deref(CG*, const Type* pointee);
+/* Read SValue.type at top of stack without popping. */
+extern const Type* cg_top_type(CG*);
+/* Read SValue.type at second-from-top. */
+extern const Type* cg_top2_type(CG*);
+/* Replace the type tag on the top SValue without emitting code. */
+extern void cg_retag_top(CG*, const Type*);
+
+/* ============================================================
+ * Keywords
+ * ============================================================ */
+typedef enum CKw {
+ KW_NONE = 0,
+ KW_AUTO,
+ KW_BREAK,
+ KW_CASE,
+ KW_CHAR,
+ KW_CONST,
+ KW_CONTINUE,
+ KW_DEFAULT,
+ KW_DO,
+ KW_DOUBLE,
+ KW_ELSE,
+ KW_ENUM,
+ KW_EXTERN,
+ KW_FLOAT,
+ KW_FOR,
+ KW_GOTO,
+ KW_IF,
+ KW_INLINE,
+ KW_INT,
+ KW_LONG,
+ KW_REGISTER,
+ KW_RESTRICT,
+ KW_RETURN,
+ KW_SHORT,
+ KW_SIGNED,
+ KW_SIZEOF,
+ KW_STATIC,
+ KW_STRUCT,
+ KW_SWITCH,
+ KW_TYPEDEF,
+ KW_UNION,
+ KW_UNSIGNED,
+ KW_VOID,
+ KW_VOLATILE,
+ KW_WHILE,
+ KW_BOOL, /* _Bool */
+ KW_COMPLEX, /* _Complex */
+ KW_IMAGINARY, /* _Imaginary */
+ KW_ALIGNAS, /* _Alignas */
+ KW_ALIGNOF, /* _Alignof */
+ KW_ATOMIC, /* _Atomic */
+ KW_GENERIC, /* _Generic */
+ KW_NORETURN, /* _Noreturn */
+ KW_STATIC_ASSERT, /* _Static_assert */
+ KW_THREAD_LOCAL, /* _Thread_local */
+ KW_ASM, /* GNU `asm` */
+ KW_BUILTIN_ASM, /* GNU `__asm__` */
+ KW_COUNT
+} CKw;
+
+/* ============================================================
+ * Scope stack types
+ * ============================================================ */
+
+typedef enum SymEntryKind {
+ SEK_LOCAL, /* local variable, OPK_LOCAL via FrameSlot */
+ SEK_GLOBAL, /* global var, OPK_GLOBAL via ObjSymId */
+ SEK_FUNC, /* function decl, OPK_GLOBAL via ObjSymId */
+ SEK_TYPEDEF, /* typedef name */
+ SEK_ENUM_CST, /* enumeration constant */
+} SymEntryKind;
+
+typedef struct SymEntry SymEntry;
+struct SymEntry {
+ Sym name;
+ u8 kind; /* SymEntryKind */
+ u8 pad[3];
+ const Type* type;
+ union {
+ FrameSlot slot;
+ ObjSymId sym;
+ i64 enum_value;
+ } v;
+ FrameSlot vla_byte_slot;
+ struct Attr* attrs;
+ SymEntry* next;
+};
+
+typedef struct TagEntry TagEntry;
+struct TagEntry {
+ Sym name;
+ u8 kind; /* TagDeclKind */
+ u8 complete;
+ u16 pad;
+ Type* type;
+ struct Attr* attrs;
+ TagEntry* next;
+};
+
+typedef struct Scope Scope;
+struct Scope {
+ SymEntry* entries; /* LIFO */
+ TagEntry* tags; /* LIFO */
+ Scope* parent;
+};
+
+/* ============================================================
+ * Switch/goto control-flow types
+ * ============================================================ */
+
+typedef struct CaseEntry CaseEntry;
+struct CaseEntry {
+ i64 value;
+ CGLabel label;
+ CaseEntry* next;
+};
+
+typedef struct SwitchCtx SwitchCtx;
+struct SwitchCtx {
+ CaseEntry* cases;
+ CGLabel default_label;
+ FrameSlot value_slot;
+ const Type* value_type;
+ SwitchCtx* parent;
+};
+
+typedef struct GotoLabel GotoLabel;
+struct GotoLabel {
+ Sym name;
+ CGLabel label;
+ u8 placed;
+ u8 pad[3];
+ SrcLoc first_use;
+ GotoLabel* next;
+};
+
+/* ============================================================
+ * Parser context
+ * ============================================================ */
+
+typedef struct Parser {
+ Compiler* c;
+ Pp* pp;
+ DeclTable* decls;
+ CG* cg;
+ Debug* debug;
+ TargetABI* abi;
+ Pool* pool;
+
+ Tok cur;
+ Tok next;
+ int has_next;
+
+ Tok pending;
+ int has_pending;
+
+ Sym kw_sym[KW_COUNT];
+
+ Sym sym_b_alloca;
+ Sym sym_b_ctz;
+ Sym sym_b_expect;
+ Sym sym_b_offsetof;
+ Sym sym_b_va_list;
+ Sym sym_b_va_start;
+ Sym sym_b_va_arg;
+ Sym sym_b_va_end;
+ Sym sym_b_va_copy;
+ Sym sym_attribute;
+ Sym sym_volatile_alias;
+ Sym sym_alignof_alias;
+ Sym sym_a_load_n;
+ Sym sym_a_store_n;
+ Sym sym_a_exchange_n;
+ Sym sym_a_fetch_add;
+ Sym sym_a_fetch_sub;
+ Sym sym_a_fetch_and;
+ Sym sym_a_fetch_or;
+ Sym sym_a_fetch_xor;
+ Sym sym_a_cas_n;
+ Sym sym_a_thread_fence;
+ Sym sym_a_signal_fence;
+
+ Scope* scope;
+
+ ObjSecId text_sec;
+
+ CGLabel cur_break;
+ CGLabel cur_continue;
+
+ SwitchCtx* cur_switch;
+
+ GotoLabel* goto_labels;
+
+ u8 vla_pending;
+ FrameSlot vla_pending_count_slot;
+
+ FrameSlot last_pushed_vla_slot;
+
+ u8 in_param_decl;
+
+ u32 static_local_counter;
+
+ u32 compound_literal_counter;
+
+ Tok* replay;
+ u32 replay_cap;
+ u32 replay_len;
+ u32 replay_pos;
+ u8 replay_active;
+
+ struct {
+ u32 offset;
+ u32 size;
+ ObjSymId target;
+ i64 addend;
+ } *static_relocs;
+ u32 static_relocs_len;
+ u32 static_relocs_cap;
+} Parser;
+
+/* ============================================================
+ * DeclSpecs and TypeSpecAccum
+ * ============================================================ */
+
+typedef struct DeclSpecs {
+ const Type* type;
+ DeclStorage storage;
+ u32 flags; /* DeclFlag */
+ u16 quals;
+ u32 align;
+ FrameSlot vla_byte_slot;
+ Attr* attrs;
+} DeclSpecs;
+
+typedef struct TypeSpecAccum {
+ u8 saw_void;
+ u8 saw_char;
+ u8 saw_int;
+ u8 saw_short;
+ u8 long_count;
+ u8 saw_signed;
+ u8 saw_unsigned;
+ u8 saw_bool;
+ u8 saw_float;
+ u8 saw_double;
+ u8 saw_explicit_type;
+} TypeSpecAccum;
+
+/* ============================================================
+ * Shared token/diagnostic helpers (defined in parse.c)
+ * ============================================================ */
+
+_Noreturn void perr(Parser* p, const char* fmt, ...);
+void advance(Parser* p);
+Tok peek1(Parser* p);
+void expect_punct(Parser* p, u32 punct, const char* what);
+int accept_punct(Parser* p, u32 punct);
+
+/* ============================================================
+ * Scope/tag ops (defined in parse.c)
+ * ============================================================ */
+
+Scope* scope_new(Parser* p, Scope* parent);
+void scope_push(Parser* p);
+void scope_pop(Parser* p);
+SymEntry* scope_define(Parser* p, Sym name, SymEntryKind kind, const Type* type);
+SymEntry* scope_lookup(Parser* p, Sym name);
+TagEntry* tag_define(Parser* p, Sym name, TagDeclKind kind, Type* type, int complete);
+TagEntry* tag_lookup(Parser* p, Sym name);
+TagEntry* tag_lookup_local(Parser* p, Sym name);
+
+/* ============================================================
+ * Token predicate helpers (defined in parse.c — file-scope static,
+ * exposed here as inline equivalents; each .c file sees its own copy)
+ * ============================================================ */
+
+static inline int is_punct(const Tok* t, u32 punct) {
+ return t->kind == TOK_PUNCT && t->v.punct == punct;
+}
+
+static inline int is_pp_hash(const Tok* t) { return t->kind == TOK_PP_HASH; }
+
+static inline CKw ident_kw_inline(const Parser* p, Sym name) {
+ CKw i;
+ for (i = (CKw)1; i < KW_COUNT; ++i) {
+ if (p->kw_sym[i] == name) return i;
+ }
+ if (name == p->sym_alignof_alias) return KW_ALIGNOF;
+ return KW_NONE;
+}
+
+static inline int is_kw(const Parser* p, const Tok* t, CKw k) {
+ if (t->kind != TOK_IDENT) return 0;
+ if (t->v.ident == p->kw_sym[k]) return 1;
+ if (k == KW_ALIGNOF && t->v.ident == p->sym_alignof_alias) return 1;
+ return 0;
+}
+
+/* ============================================================
+ * Shared types (needed across multiple modules)
+ * ============================================================ */
+
+typedef struct ParamInfo {
+ Sym name;
+ const Type* type;
+ SrcLoc loc;
+} ParamInfo;
+
+/* ============================================================
+ * Declarator suffix types (defined in parse_type.c, shared here)
+ * ============================================================ */
+
+typedef enum DSuffKind { DS_ARRAY, DS_FUNC } DSuffKind;
+typedef struct DeclSuffix {
+ u8 kind; /* DSuffKind */
+ u32 count; /* element count; meaningful when !vla and !incomplete */
+ u8 incomplete; /* true for `[]` (no size given) */
+ u8 vla; /* true for `[expr]` with a non-constant size */
+ FrameSlot vla_count_slot;
+ ParamInfo* params;
+ u16 nparams;
+ u8 variadic;
+} DeclSuffix;
+
+/* ============================================================
+ * Cross-module forward declarations
+ * ============================================================ */
+
+/* parse_type.c */
+
+int parse_decl_specs(Parser* p, DeclSpecs* out);
+const Type* parse_struct_or_union(Parser* p, TypeKind kind, Attr** anon_attrs_out);
+const Type* parse_enum(Parser* p, Attr** anon_attrs_out);
+const Type* resolve_type_specs(Parser* p, const TypeSpecAccum* a, SrcLoc loc);
+const Type* parse_type_name(Parser* p);
+const Type* parse_pointer_layer(Parser* p, const Type* base);
+const Type* parse_declarator_full(Parser* p, const Type* base, int allow_abstract,
+ Sym* name_out, SrcLoc* loc_out);
+const Type* parse_declarator_full_ex(Parser* p, const Type* base, int allow_abstract,
+ Sym* name_out, SrcLoc* loc_out, Attr** attrs_out);
+const Type* parse_declarator(Parser* p, const Type* base, Sym* name_out, SrcLoc* loc_out);
+const Type* complete_incomplete_array(Parser* p, const Type* ty);
+int starts_type_name(const Parser* p, const Tok* t);
+int starts_attr(const Parser* p);
+Attr* parse_attribute_spec_list(Parser* p);
+void parse_and_discard_attributes(Parser* p);
+int find_field(TargetABI* abi, const Type* rec, Sym name,
+ const Type** out_type, u32* out_offset, const Field** out_field);
+u32 attrs_pick_aligned(const Attr* a);
+void attr_list_append(Attr** head, Attr* add);
+void parse_attrs_into(Parser* p, Attr** sink);
+int parse_decl_suffix(Parser* p, DeclSuffix* out);
+const Type* apply_decl_suffix(Parser* p, const Type* base, const DeclSuffix* s);
+
+/* parse_expr.c */
+void parse_expr(Parser* p);
+void parse_assign_expr(Parser* p);
+void parse_cond_expr(Parser* p);
+void parse_unary(Parser* p);
+i64 eval_const_int(Parser* p, SrcLoc loc);
+i64 parse_int_literal(Parser* p, const Tok* t);
+i64 decode_char_literal(Parser* p, const Tok* t);
+u8* decode_string_literal(Parser* p, const Tok* t, size_t* nlen_out);
+void to_rvalue(Parser* p);
+void coerce_top_to_lvalue(Parser* p);
+ObjSymId emit_string_to_rodata(Parser* p, const u8* bytes, size_t n);
+
+/* parse_init.c */
+void init_at(Parser* p, FrameSlot slot, const Type* arr_ty, u32 offset, const Type* ty);
+void parse_static_init_at(Parser* p, u8* buf, u32 buflen, u32 offset, const Type* ty);
+void define_static_object(Parser* p, ObjSymId sym, const Type* var_ty, u16 quals,
+ int has_init, SrcLoc loc, u32 align_override);
+void srl_push(Parser* p, u32 offset, u32 size, ObjSymId target, i64 addend);
+void encode_int_le(u8* dst, u32 size, i64 v);
+void push_subobject_lv(Parser* p, FrameSlot slot, const Type* arr_ty,
+ u32 offset, const Type* elem_ty);
+void emit_struct_copy_into_slot(Parser* p, FrameSlot dst_slot,
+ const Type* dst_arr_ty, u32 dst_off,
+ const Type* ty);
+int is_char_kind(const Type* ty);
+
+/* parse_stmt.c */
+void parse_stmt(Parser* p);
+void parse_compound_stmt(Parser* p);
+void parse_static_assert(Parser* p);
+
+/* parse.c (residual — TU driver) */
+void parse_param_list(Parser* p, ParamInfo** infos_out, u16* nparams_out,
+ u8* variadic_out);
+void parse_local_decl(Parser* p, const DeclSpecs* specs);
+FrameSlot make_local(Parser* p, Sym name, const Type* type, SrcLoc loc);
+FrameSlot make_local_aligned(Parser* p, Sym name, const Type* type, SrcLoc loc,
+ u32 align_override);
+Sym mint_static_local_sym(Parser* p, Sym orig);
+void record_braced_block(Parser* p);
+void replay_rewind(Parser* p);
+u32 count_recorded_top_level_items(const Tok* vec, u32 len);
diff --git a/src/parse/parse_stmt.c b/src/parse/parse_stmt.c
@@ -0,0 +1,689 @@
+/* parse_stmt.c — statement parsers.
+ *
+ * Covers §6.8: if, while, for, do-while, return, break, continue, goto,
+ * labeled, case, default, switch, _Static_assert, asm, compound,
+ * and the top-level parse_stmt dispatcher.
+ */
+
+#include "parse/parse_priv.h"
+
+/* ============================================================
+ * File-local helpers
+ * ============================================================ */
+
+static CKw ident_kw_stmt(const Parser* p, Sym name) {
+ return ident_kw_inline(p, name);
+}
+
+static SrcLoc tok_loc_stmt(const Tok* t) { return t->loc; }
+
+static int accept_kw_stmt(Parser* p, CKw k) {
+ if (!is_kw(p, &p->cur, k)) return 0;
+ advance(p);
+ return 1;
+}
+
+/* ============================================================
+ * Statement parsers
+ * ============================================================ */
+
+static void parse_if_stmt(Parser* p) {
+ CGLabel L_else = cg_label_new(p->cg);
+ CGLabel L_end = cg_label_new(p->cg);
+ expect_punct(p, '(', "'('");
+ parse_expr(p);
+ to_rvalue(p);
+ expect_punct(p, ')', "')'");
+ cg_branch_false(p->cg, L_else);
+ parse_stmt(p);
+ if (accept_kw_stmt(p, KW_ELSE)) {
+ cg_jump(p->cg, L_end);
+ cg_label_place(p->cg, L_else);
+ parse_stmt(p);
+ cg_label_place(p->cg, L_end);
+ } else {
+ cg_label_place(p->cg, L_else);
+ }
+}
+
+static void parse_while_stmt(Parser* p) {
+ CGLabel L_top = cg_label_new(p->cg);
+ CGLabel L_end = cg_label_new(p->cg);
+ CGLabel saved_break = p->cur_break;
+ CGLabel saved_continue = p->cur_continue;
+ expect_punct(p, '(', "'('");
+ cg_label_place(p->cg, L_top);
+ parse_expr(p);
+ to_rvalue(p);
+ expect_punct(p, ')', "')'");
+ cg_branch_false(p->cg, L_end);
+ p->cur_break = L_end;
+ p->cur_continue = L_top;
+ parse_stmt(p);
+ p->cur_break = saved_break;
+ p->cur_continue = saved_continue;
+ cg_jump(p->cg, L_top);
+ cg_label_place(p->cg, L_end);
+}
+
+static void parse_for_stmt(Parser* p) {
+ CGLabel L_top = cg_label_new(p->cg);
+ CGLabel L_step = cg_label_new(p->cg);
+ CGLabel L_end = cg_label_new(p->cg);
+ CGLabel saved_break = p->cur_break;
+ CGLabel saved_continue = p->cur_continue;
+
+ scope_push(p);
+ expect_punct(p, '(', "'('");
+
+ /* init: declaration | expr | ; */
+ if (!accept_punct(p, ';')) {
+ DeclSpecs specs;
+ if (parse_decl_specs(p, &specs)) {
+ parse_local_decl(p, &specs);
+ } else {
+ parse_expr(p);
+ cg_drop(p->cg);
+ expect_punct(p, ';', "';'");
+ }
+ }
+
+ cg_label_place(p->cg, L_top);
+ if (!is_punct(&p->cur, ';')) {
+ parse_expr(p);
+ to_rvalue(p);
+ cg_branch_false(p->cg, L_end);
+ }
+ expect_punct(p, ';', "';'");
+
+ {
+ CGLabel L_body = cg_label_new(p->cg);
+ cg_jump(p->cg, L_body);
+ cg_label_place(p->cg, L_step);
+ if (!is_punct(&p->cur, ')')) {
+ parse_expr(p);
+ cg_drop(p->cg);
+ }
+ cg_jump(p->cg, L_top);
+ expect_punct(p, ')', "')'");
+ cg_label_place(p->cg, L_body);
+
+ p->cur_break = L_end;
+ p->cur_continue = L_step;
+ parse_stmt(p);
+ p->cur_break = saved_break;
+ p->cur_continue = saved_continue;
+
+ cg_jump(p->cg, L_step);
+ cg_label_place(p->cg, L_end);
+ }
+ scope_pop(p);
+}
+
+static void parse_return_stmt(Parser* p) {
+ if (accept_punct(p, ';')) {
+ cg_ret(p->cg, 0);
+ return;
+ }
+ parse_expr(p);
+ to_rvalue(p);
+ expect_punct(p, ';', "';' after return value");
+ cg_ret(p->cg, 1);
+}
+
+static void parse_break_stmt(Parser* p) {
+ if (p->cur_break == 0) perr(p, "'break' outside of loop or switch");
+ cg_jump(p->cg, p->cur_break);
+ expect_punct(p, ';', "';' after break");
+}
+
+static void parse_continue_stmt(Parser* p) {
+ if (p->cur_continue == 0) perr(p, "'continue' outside of loop");
+ cg_jump(p->cg, p->cur_continue);
+ expect_punct(p, ';', "';' after continue");
+}
+
+static void parse_do_stmt(Parser* p) {
+ CGLabel L_top = cg_label_new(p->cg);
+ CGLabel L_cond = cg_label_new(p->cg);
+ CGLabel L_end = cg_label_new(p->cg);
+ CGLabel saved_break = p->cur_break;
+ CGLabel saved_continue = p->cur_continue;
+ cg_label_place(p->cg, L_top);
+ p->cur_break = L_end;
+ p->cur_continue = L_cond;
+ parse_stmt(p);
+ p->cur_break = saved_break;
+ p->cur_continue = saved_continue;
+ cg_label_place(p->cg, L_cond);
+ if (!is_kw(p, &p->cur, KW_WHILE)) perr(p, "expected 'while' after do-body");
+ advance(p); /* while */
+ expect_punct(p, '(', "'('");
+ parse_expr(p);
+ to_rvalue(p);
+ expect_punct(p, ')', "')' after do-while condition");
+ expect_punct(p, ';', "';' after do-while");
+ cg_branch_true(p->cg, L_top);
+ cg_label_place(p->cg, L_end);
+}
+
+static GotoLabel* label_get_or_create(Parser* p, Sym name, SrcLoc loc) {
+ GotoLabel* gl;
+ for (gl = p->goto_labels; gl; gl = gl->next) {
+ if (gl->name == name) return gl;
+ }
+ gl = arena_new(p->c->tu, GotoLabel);
+ if (!gl) perr(p, "out of memory in label_get_or_create");
+ memset(gl, 0, sizeof *gl);
+ gl->name = name;
+ gl->label = cg_label_new(p->cg);
+ gl->placed = 0;
+ gl->first_use = loc;
+ gl->next = p->goto_labels;
+ p->goto_labels = gl;
+ return gl;
+}
+
+static void parse_goto_stmt(Parser* p) {
+ Sym name;
+ SrcLoc loc;
+ GotoLabel* gl;
+ if (p->cur.kind != TOK_IDENT || ident_kw_stmt(p, p->cur.v.ident) != KW_NONE) {
+ perr(p, "expected label name after 'goto'");
+ }
+ name = p->cur.v.ident;
+ loc = tok_loc_stmt(&p->cur);
+ advance(p);
+ expect_punct(p, ';', "';' after goto");
+ gl = label_get_or_create(p, name, loc);
+ cg_jump(p->cg, gl->label);
+}
+
+static void parse_label_stmt(Parser* p) {
+ Sym name = p->cur.v.ident;
+ SrcLoc loc = tok_loc_stmt(&p->cur);
+ GotoLabel* gl;
+ advance(p); /* IDENT */
+ advance(p); /* ':' */
+ gl = label_get_or_create(p, name, loc);
+ if (gl->placed) perr(p, "duplicate label");
+ gl->placed = 1;
+ cg_label_place(p->cg, gl->label);
+ parse_stmt(p);
+}
+
+static void parse_case_stmt(Parser* p) {
+ i64 v;
+ CGLabel L;
+ CaseEntry* ce;
+ SrcLoc loc = tok_loc_stmt(&p->cur);
+ if (!p->cur_switch) perr(p, "'case' label not in switch statement");
+ v = eval_const_int(p, loc);
+ expect_punct(p, ':', "':' after case constant");
+ L = cg_label_new(p->cg);
+ cg_label_place(p->cg, L);
+ ce = arena_new(p->c->tu, CaseEntry);
+ if (!ce) perr(p, "out of memory in parse_case_stmt");
+ ce->value = v;
+ ce->label = L;
+ ce->next = p->cur_switch->cases;
+ p->cur_switch->cases = ce;
+ parse_stmt(p);
+}
+
+static void parse_default_stmt(Parser* p) {
+ CGLabel L;
+ if (!p->cur_switch) perr(p, "'default' label not in switch statement");
+ expect_punct(p, ':', "':' after default");
+ if (p->cur_switch->default_label != 0) perr(p, "duplicate 'default' label");
+ L = cg_label_new(p->cg);
+ cg_label_place(p->cg, L);
+ p->cur_switch->default_label = L;
+ parse_stmt(p);
+}
+
+static void parse_switch_stmt(Parser* p) {
+ CGLabel L_dispatch = cg_label_new(p->cg);
+ CGLabel L_end = cg_label_new(p->cg);
+ CGLabel saved_break = p->cur_break;
+ SwitchCtx ctx;
+ SwitchCtx* saved_switch = p->cur_switch;
+ FrameSlotDesc fsd;
+ const Type* vty;
+ CaseEntry* it;
+ CaseEntry* prev;
+ CaseEntry* head;
+
+ expect_punct(p, '(', "'('");
+ parse_expr(p);
+ to_rvalue(p);
+ vty = cg_top_type(p->cg);
+ if (!vty) vty = type_prim(p->pool, TY_INT);
+ expect_punct(p, ')', "')' after switch expression");
+
+ memset(&ctx, 0, sizeof ctx);
+ memset(&fsd, 0, sizeof fsd);
+ fsd.type = vty;
+ fsd.size = abi_sizeof(p->abi, vty);
+ fsd.align = abi_alignof(p->abi, vty);
+ fsd.kind = FS_LOCAL;
+ ctx.value_slot = cg_local(p->cg, &fsd);
+ ctx.value_type = vty;
+ ctx.parent = saved_switch;
+
+ cg_push_local_typed(p->cg, ctx.value_slot, vty);
+ cg_swap(p->cg);
+ cg_store(p->cg);
+ cg_drop(p->cg);
+
+ cg_jump(p->cg, L_dispatch);
+
+ p->cur_switch = &ctx;
+ p->cur_break = L_end;
+ parse_stmt(p);
+ p->cur_break = saved_break;
+ p->cur_switch = saved_switch;
+
+ cg_jump(p->cg, L_end);
+
+ cg_label_place(p->cg, L_dispatch);
+ prev = NULL;
+ head = ctx.cases;
+ while (head) {
+ CaseEntry* nxt = head->next;
+ head->next = prev;
+ prev = head;
+ head = nxt;
+ }
+ for (it = prev; it; it = it->next) {
+ cg_push_local_typed(p->cg, ctx.value_slot, vty);
+ cg_load(p->cg);
+ cg_push_int(p->cg, it->value, vty);
+ cg_cmp(p->cg, CMP_EQ);
+ cg_branch_true(p->cg, it->label);
+ }
+ if (ctx.default_label) {
+ cg_jump(p->cg, ctx.default_label);
+ }
+ cg_label_place(p->cg, L_end);
+}
+
+void parse_static_assert(Parser* p) {
+ SrcLoc loc = tok_loc_stmt(&p->cur);
+ i64 v;
+ if (!accept_kw_stmt(p, KW_STATIC_ASSERT)) {
+ perr(p, "expected _Static_assert");
+ }
+ expect_punct(p, '(', "'(' after _Static_assert");
+ v = eval_const_int(p, tok_loc_stmt(&p->cur));
+ expect_punct(p, ',', "',' separating _Static_assert args");
+ if (p->cur.kind != TOK_STR) {
+ perr(p, "expected string literal as _Static_assert message");
+ }
+ {
+ Tok msg = p->cur;
+ advance(p);
+ expect_punct(p, ')', "')' after _Static_assert");
+ expect_punct(p, ';', "';' after _Static_assert");
+ if (!v) {
+ size_t mlen = 0;
+ const char* mstr = pool_str(p->pool, msg.spelling, &mlen);
+ compiler_panic(p->c, loc, "static assertion failed: %.*s",
+ (int)mlen, mstr ? mstr : "");
+ }
+ }
+}
+
+/* GNU inline-asm statement. The leading 'asm'/'__asm__' keyword has
+ * already been consumed by parse_stmt. */
+typedef struct AsmOutLValue {
+ FrameSlot addr_slot;
+ const Type* ptr_ty;
+ const Type* val_ty;
+} AsmOutLValue;
+
+static Sym parse_asm_operand_name(Parser* p) {
+ Sym name = 0;
+ if (!is_punct(&p->cur, '[')) return 0;
+ advance(p);
+ if (p->cur.kind != TOK_IDENT) {
+ perr(p, "expected identifier inside '[name]' on asm operand");
+ }
+ name = p->cur.v.ident;
+ advance(p);
+ expect_punct(p, ']', "']' after asm operand name");
+ return name;
+}
+
+static const char* parse_asm_str(Parser* p, const char* what) {
+ u8* bytes;
+ size_t nlen = 0;
+ Sym s;
+ Tok t;
+ if (p->cur.kind != TOK_STR) {
+ perr(p, "expected string literal in %s", what);
+ }
+ t = p->cur;
+ advance(p);
+ bytes = decode_string_literal(p, &t, &nlen);
+ if (nlen > 0) nlen -= 1;
+ s = pool_intern(p->pool, (const char*)bytes, nlen);
+ p->c->env->heap->free(p->c->env->heap, bytes, 0);
+ return pool_str(p->pool, s, NULL);
+}
+
+static void parse_asm_stmt(Parser* p) {
+ const char* tmpl;
+ AsmConstraint* outs = NULL;
+ AsmConstraint* ins = NULL;
+ Sym* clobbers = NULL;
+ AsmOutLValue* out_lvs = NULL;
+ u32 nout = 0, nin = 0, nclob = 0;
+ u32 cap_out = 0, cap_in = 0, cap_clob = 0;
+ int saw_goto = 0;
+ SrcLoc loc = tok_loc_stmt(&p->cur);
+
+ for (;;) {
+ if (accept_kw_stmt(p, KW_VOLATILE)) continue;
+ if (p->cur.kind == TOK_IDENT && p->cur.v.ident == p->sym_volatile_alias) {
+ advance(p);
+ continue;
+ }
+ break;
+ }
+ if (accept_kw_stmt(p, KW_GOTO)) saw_goto = 1;
+
+ expect_punct(p, '(', "'(' after asm");
+ tmpl = parse_asm_str(p, "asm template");
+
+ if (accept_punct(p, ':')) {
+ if (!is_punct(&p->cur, ':') && !is_punct(&p->cur, ')')) {
+ cap_out = 4;
+ outs = (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, cap_out);
+ out_lvs = (AsmOutLValue*)arena_array(p->c->tu, AsmOutLValue, cap_out);
+ for (;;) {
+ AsmConstraint c;
+ AsmOutLValue lv;
+ const Type* val_ty;
+ const Type* ptr_ty;
+ FrameSlotDesc fsd;
+ FrameSlot slot;
+ memset(&c, 0, sizeof c);
+ memset(&lv, 0, sizeof lv);
+ c.name = parse_asm_operand_name(p);
+ c.str = parse_asm_str(p, "asm output constraint");
+ if (c.str && c.str[0] == '+') c.dir = ASM_INOUT;
+ else c.dir = ASM_OUT;
+ expect_punct(p, '(', "'(' before asm output lvalue");
+ parse_assign_expr(p);
+ val_ty = cg_top_type(p->cg);
+ if (!val_ty) perr(p, "asm output: cannot determine lvalue type");
+ c.type = val_ty;
+ cg_addr(p->cg);
+ ptr_ty = cg_top_type(p->cg);
+ if (!ptr_ty) perr(p, "asm output: cannot take address");
+ memset(&fsd, 0, sizeof fsd);
+ fsd.type = ptr_ty;
+ fsd.size = 8;
+ fsd.align = 8;
+ fsd.kind = FS_LOCAL;
+ slot = cg_local(p->cg, &fsd);
+ cg_push_local_typed(p->cg, slot, ptr_ty);
+ cg_swap(p->cg);
+ cg_store(p->cg);
+ cg_drop(p->cg);
+ lv.addr_slot = slot;
+ lv.ptr_ty = ptr_ty;
+ lv.val_ty = val_ty;
+ expect_punct(p, ')', "')' after asm output lvalue");
+ if (nout == cap_out) {
+ u32 nc = cap_out * 2;
+ AsmConstraint* nb =
+ (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, nc);
+ AsmOutLValue* nlv =
+ (AsmOutLValue*)arena_array(p->c->tu, AsmOutLValue, nc);
+ memcpy(nb, outs, sizeof(AsmConstraint) * nout);
+ memcpy(nlv, out_lvs, sizeof(AsmOutLValue) * nout);
+ outs = nb;
+ out_lvs = nlv;
+ cap_out = nc;
+ }
+ outs[nout] = c;
+ out_lvs[nout] = lv;
+ nout++;
+ if (!accept_punct(p, ',')) break;
+ }
+ }
+
+ if (accept_punct(p, ':')) {
+ if (!is_punct(&p->cur, ':') && !is_punct(&p->cur, ')')) {
+ cap_in = 4;
+ ins = (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, cap_in);
+ for (;;) {
+ AsmConstraint c;
+ memset(&c, 0, sizeof c);
+ c.name = parse_asm_operand_name(p);
+ c.str = parse_asm_str(p, "asm input constraint");
+ c.dir = ASM_IN;
+ expect_punct(p, '(', "'(' before asm input expression");
+ parse_assign_expr(p);
+ to_rvalue(p);
+ c.type = cg_top_type(p->cg);
+ expect_punct(p, ')', "')' after asm input expression");
+ if (nin == cap_in) {
+ u32 nc = cap_in * 2;
+ AsmConstraint* nb =
+ (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, nc);
+ memcpy(nb, ins, sizeof(AsmConstraint) * nin);
+ ins = nb;
+ cap_in = nc;
+ }
+ ins[nin++] = c;
+ if (!accept_punct(p, ',')) break;
+ }
+ }
+
+ if (accept_punct(p, ':')) {
+ if (!is_punct(&p->cur, ':') && !is_punct(&p->cur, ')')) {
+ cap_clob = 4;
+ clobbers = (Sym*)arena_array(p->c->tu, Sym, cap_clob);
+ for (;;) {
+ const char* cstr;
+ Sym cs;
+ cstr = parse_asm_str(p, "asm clobber");
+ cs = pool_intern_cstr(p->pool, cstr);
+ if (nclob == cap_clob) {
+ u32 nc = cap_clob * 2;
+ Sym* nb = (Sym*)arena_array(p->c->tu, Sym, nc);
+ memcpy(nb, clobbers, sizeof(Sym) * nclob);
+ clobbers = nb;
+ cap_clob = nc;
+ }
+ clobbers[nclob++] = cs;
+ if (!accept_punct(p, ',')) break;
+ }
+ }
+
+ if (accept_punct(p, ':')) {
+ if (!is_punct(&p->cur, ')')) {
+ for (;;) {
+ if (p->cur.kind != TOK_IDENT) {
+ perr(p, "expected label identifier in asm-goto label list");
+ }
+ advance(p);
+ if (!accept_punct(p, ',')) break;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ expect_punct(p, ')', "')' to close asm");
+ expect_punct(p, ';', "';' after asm statement");
+
+ (void)saw_goto;
+
+ u32 ninout = 0;
+ for (u32 i = 0; i < nout; ++i) {
+ if (outs[i].dir == ASM_INOUT) ninout++;
+ }
+ if (ninout > 0) {
+ static const char* const k_match_strs[10] = {
+ "0", "1", "2", "3", "4", "5", "6", "7", "8", "9"};
+ u32 need = nin + ninout;
+ if (need > cap_in) {
+ u32 nc = cap_in ? cap_in : 4;
+ while (nc < need) nc *= 2;
+ AsmConstraint* nb =
+ (AsmConstraint*)arena_array(p->c->tu, AsmConstraint, nc);
+ if (nin) memcpy(nb, ins, sizeof(AsmConstraint) * nin);
+ ins = nb;
+ cap_in = nc;
+ }
+ for (u32 i = 0; i < nout; ++i) {
+ if (outs[i].dir != ASM_INOUT) continue;
+ if (i >= 10) {
+ perr(p, "asm: '+r' constraint at output index >9 exceeds "
+ "matching-digit syntax");
+ }
+ AsmOutLValue* lv = &out_lvs[i];
+ cg_push_local_typed(p->cg, lv->addr_slot, lv->ptr_ty);
+ cg_load(p->cg);
+ cg_deref(p->cg, lv->val_ty);
+ cg_load(p->cg);
+ AsmConstraint mc;
+ memset(&mc, 0, sizeof mc);
+ mc.str = k_match_strs[i];
+ mc.dir = ASM_IN;
+ mc.type = lv->val_ty;
+ ins[nin++] = mc;
+ }
+ }
+
+ cg_set_loc(p->cg, loc);
+ cg_inline_asm(p->cg, tmpl, outs, nout, ins, nin, clobbers, nclob);
+
+ if (nout > 0) {
+ u32 i;
+ for (i = nout; i-- > 0;) {
+ AsmOutLValue* lv = &out_lvs[i];
+ cg_push_local_typed(p->cg, lv->addr_slot, lv->ptr_ty);
+ cg_load(p->cg);
+ cg_deref(p->cg, lv->val_ty);
+ cg_swap(p->cg);
+ cg_store(p->cg);
+ cg_drop(p->cg);
+ }
+ }
+}
+
+void parse_compound_stmt(Parser* p) {
+ expect_punct(p, '{', "'{'");
+ scope_push(p);
+ while (!is_punct(&p->cur, '}') && p->cur.kind != TOK_EOF) {
+ if (p->cur.kind == TOK_NEWLINE || is_pp_hash(&p->cur)) {
+ advance(p);
+ continue;
+ }
+ if (is_kw(p, &p->cur, KW_STATIC_ASSERT)) {
+ parse_static_assert(p);
+ continue;
+ }
+ {
+ DeclSpecs specs;
+ Tok save_tok = p->cur;
+ (void)save_tok;
+ if (parse_decl_specs(p, &specs)) {
+ parse_local_decl(p, &specs);
+ } else {
+ parse_stmt(p);
+ }
+ }
+ }
+ expect_punct(p, '}', "'}'");
+ scope_pop(p);
+}
+
+void parse_stmt(Parser* p) {
+ cg_set_loc(p->cg, tok_loc_stmt(&p->cur));
+ if (p->cur.kind == TOK_IDENT && ident_kw_stmt(p, p->cur.v.ident) == KW_NONE) {
+ Tok n = peek1(p);
+ if (is_punct(&n, ':')) {
+ parse_label_stmt(p);
+ return;
+ }
+ }
+ if (is_punct(&p->cur, '{')) {
+ parse_compound_stmt(p);
+ return;
+ }
+ if (is_punct(&p->cur, ';')) {
+ advance(p);
+ return;
+ }
+ if (is_kw(p, &p->cur, KW_IF)) {
+ advance(p);
+ parse_if_stmt(p);
+ return;
+ }
+ if (is_kw(p, &p->cur, KW_WHILE)) {
+ advance(p);
+ parse_while_stmt(p);
+ return;
+ }
+ if (is_kw(p, &p->cur, KW_FOR)) {
+ advance(p);
+ parse_for_stmt(p);
+ return;
+ }
+ if (is_kw(p, &p->cur, KW_DO)) {
+ advance(p);
+ parse_do_stmt(p);
+ return;
+ }
+ if (is_kw(p, &p->cur, KW_RETURN)) {
+ advance(p);
+ parse_return_stmt(p);
+ return;
+ }
+ if (is_kw(p, &p->cur, KW_BREAK)) {
+ advance(p);
+ parse_break_stmt(p);
+ return;
+ }
+ if (is_kw(p, &p->cur, KW_CONTINUE)) {
+ advance(p);
+ parse_continue_stmt(p);
+ return;
+ }
+ if (is_kw(p, &p->cur, KW_GOTO)) {
+ advance(p);
+ parse_goto_stmt(p);
+ return;
+ }
+ if (is_kw(p, &p->cur, KW_SWITCH)) {
+ advance(p);
+ parse_switch_stmt(p);
+ return;
+ }
+ if (is_kw(p, &p->cur, KW_CASE)) {
+ advance(p);
+ parse_case_stmt(p);
+ return;
+ }
+ if (is_kw(p, &p->cur, KW_DEFAULT)) {
+ advance(p);
+ parse_default_stmt(p);
+ return;
+ }
+ if (is_kw(p, &p->cur, KW_ASM) || is_kw(p, &p->cur, KW_BUILTIN_ASM)) {
+ advance(p);
+ parse_asm_stmt(p);
+ return;
+ }
+ /* Expression statement. */
+ parse_expr(p);
+ cg_drop(p->cg);
+ expect_punct(p, ';', "';' after expression");
+}
diff --git a/src/parse/parse_type.c b/src/parse/parse_type.c
@@ -0,0 +1,1121 @@
+/* parse_type.c — decl-specs, struct/union/enum, declarators,
+ * __attribute__ parsing. */
+
+#include "parse/parse_priv.h"
+
+/* ============================================================
+ * Type helpers
+ * ============================================================ */
+
+static const Type* ty_int(Parser* p) { return type_prim(p->pool, TY_INT); }
+static const Type* ty_size_t(Parser* p) {
+ return abi_size_type(p->abi, p->pool);
+}
+
+/* ============================================================
+ * GNU __attribute__ (Phase 1 — parse + carry; no semantic wire-up)
+ * ============================================================ */
+
+static const struct {
+ const char* name;
+ AttrKind kind;
+ AttrArgShape shape;
+} kAttrTable[] = {
+ {"packed", ATTR_PACKED, AS_NONE},
+ {"aligned", ATTR_ALIGNED, AS_INT_OPT},
+ {"section", ATTR_SECTION, AS_STRING},
+ {"used", ATTR_USED, AS_NONE},
+ {"noreturn", ATTR_NORETURN, AS_NONE},
+ {"alias", ATTR_ALIAS, AS_STRING},
+ {"weak", ATTR_WEAK, AS_NONE},
+ {"visibility", ATTR_VISIBILITY, AS_STRING},
+ {"always_inline", ATTR_ALWAYS_INLINE, AS_NONE},
+ {"noinline", ATTR_NOINLINE, AS_NONE},
+ {"unused", ATTR_UNUSED, AS_NONE},
+ {"deprecated", ATTR_DEPRECATED, AS_OPAQUE},
+ {"warn_unused_result", ATTR_WARN_UNUSED_RESULT, AS_NONE},
+ {"format", ATTR_FORMAT, AS_FORMAT},
+ {"nonnull", ATTR_NONNULL, AS_OPAQUE},
+ {"returns_nonnull", ATTR_RETURNS_NONNULL, AS_NONE},
+ {"pure", ATTR_PURE, AS_NONE},
+ {"const", ATTR_CONST, AS_NONE},
+ {"malloc", ATTR_MALLOC, AS_OPAQUE},
+ {"nothrow", ATTR_NOTHROW, AS_NONE},
+ {"leaf", ATTR_LEAF, AS_NONE},
+ {"cold", ATTR_COLD, AS_NONE},
+ {"hot", ATTR_HOT, AS_NONE},
+ {"constructor", ATTR_CONSTRUCTOR, AS_INT_OPT},
+ {"destructor", ATTR_DESTRUCTOR, AS_INT_OPT},
+ {"cleanup", ATTR_CLEANUP, AS_IDENT},
+ {"mode", ATTR_MODE, AS_IDENT},
+ {"vector_size", ATTR_VECTOR_SIZE, AS_INT},
+ {"transparent_union", ATTR_TRANSPARENT_UNION, AS_NONE},
+ {"gnu_inline", ATTR_GNU_INLINE, AS_NONE},
+ {"fallthrough", ATTR_FALLTHROUGH, AS_NONE},
+ {"sentinel", ATTR_SENTINEL, AS_OPAQUE},
+ {"no_instrument_function", ATTR_NO_INSTRUMENT_FUNCTION, AS_NONE},
+ {"no_sanitize", ATTR_NO_SANITIZE, AS_OPAQUE},
+};
+
+static SrcLoc tok_loc(const Tok* t) { return t->loc; }
+
+static int accept_kw(Parser* p, CKw k) {
+ if (is_kw(p, &p->cur, k)) {
+ advance(p);
+ return 1;
+ }
+ return 0;
+}
+
+static CKw ident_kw(const Parser* p, Sym name) {
+ return ident_kw_inline(p, name);
+}
+
+int starts_attr(const Parser* p) {
+ return p->cur.kind == TOK_IDENT && p->cur.v.ident == p->sym_attribute;
+}
+
+static void attr_canon_range(const char* s, size_t len,
+ const char** out_p, size_t* out_len) {
+ if (len >= 4 && s[0] == '_' && s[1] == '_' &&
+ s[len - 1] == '_' && s[len - 2] == '_') {
+ *out_p = s + 2;
+ *out_len = len - 4;
+ return;
+ }
+ *out_p = s;
+ *out_len = len;
+}
+
+static AttrKind classify_attr(Parser* p, Sym name, AttrArgShape* shape_out) {
+ size_t len = 0;
+ const char* s = pool_str(p->pool, name, &len);
+ const char* cs;
+ size_t clen;
+ size_t i;
+ if (!s) {
+ *shape_out = AS_OPAQUE;
+ return ATTR_UNKNOWN;
+ }
+ attr_canon_range(s, len, &cs, &clen);
+ for (i = 0; i < sizeof(kAttrTable) / sizeof(kAttrTable[0]); ++i) {
+ const char* tn = kAttrTable[i].name;
+ size_t tlen = strlen(tn);
+ if (tlen == clen && memcmp(tn, cs, clen) == 0) {
+ *shape_out = kAttrTable[i].shape;
+ return kAttrTable[i].kind;
+ }
+ }
+ *shape_out = AS_OPAQUE;
+ return ATTR_UNKNOWN;
+}
+
+static void skip_balanced_parens(Parser* p) {
+ int depth;
+ if (!is_punct(&p->cur, '(')) perr(p, "internal: skip_balanced_parens");
+ depth = 1;
+ advance(p);
+ while (depth > 0) {
+ if (p->cur.kind == TOK_EOF) {
+ perr(p, "unexpected EOF inside attribute arguments");
+ }
+ if (is_punct(&p->cur, '(')) ++depth;
+ else if (is_punct(&p->cur, ')')) {
+ --depth;
+ if (depth == 0) { advance(p); return; }
+ }
+ advance(p);
+ }
+}
+
+static void parse_attr_args(Parser* p, Attr* a, AttrArgShape shape,
+ const char* attr_diag_name) {
+ if (!is_punct(&p->cur, '(')) {
+ if (shape == AS_NONE || shape == AS_OPTIONAL || shape == AS_INT_OPT ||
+ shape == AS_OPAQUE) {
+ return;
+ }
+ perr(p, "attribute '%s' expects '(' arguments", attr_diag_name);
+ }
+ switch (shape) {
+ case AS_NONE: {
+ advance(p); /* '(' */
+ if (!accept_punct(p, ')')) {
+ perr(p, "attribute '%s' takes no arguments", attr_diag_name);
+ }
+ return;
+ }
+ case AS_OPTIONAL: {
+ skip_balanced_parens(p);
+ return;
+ }
+ case AS_INT:
+ case AS_INT_OPT: {
+ SrcLoc loc;
+ advance(p); /* '(' */
+ if (is_punct(&p->cur, ')')) {
+ if (shape == AS_INT) {
+ perr(p, "attribute '%s' expects an integer argument",
+ attr_diag_name);
+ }
+ advance(p);
+ return;
+ }
+ loc = tok_loc(&p->cur);
+ a->v.i = eval_const_int(p, loc);
+ a->nargs = 1;
+ expect_punct(p, ')', "')' after attribute integer argument");
+ return;
+ }
+ case AS_STRING: {
+ advance(p); /* '(' */
+ if (p->cur.kind != TOK_STR) {
+ perr(p, "attribute '%s' expects a string literal", attr_diag_name);
+ }
+ {
+ Tok t = p->cur;
+ size_t nlen = 0;
+ u8* bytes = decode_string_literal(p, &t, &nlen);
+ u32 ilen = (nlen > 0) ? (u32)(nlen - 1) : 0;
+ a->v.sym = pool_intern(p->c->global, (const char*)bytes, ilen);
+ p->c->env->heap->free(p->c->env->heap, bytes, 0);
+ }
+ a->nargs = 1;
+ advance(p);
+ expect_punct(p, ')', "')' after attribute string argument");
+ return;
+ }
+ case AS_IDENT: {
+ advance(p); /* '(' */
+ if (p->cur.kind != TOK_IDENT) {
+ perr(p, "attribute '%s' expects an identifier", attr_diag_name);
+ }
+ a->v.sym = p->cur.v.ident;
+ a->nargs = 1;
+ advance(p);
+ expect_punct(p, ')', "')' after attribute identifier argument");
+ return;
+ }
+ case AS_FORMAT: {
+ SrcLoc mloc, nloc;
+ i64 mv, nv;
+ advance(p); /* '(' */
+ if (p->cur.kind != TOK_IDENT) {
+ perr(p, "attribute 'format' expects (archetype, m, n)");
+ }
+ advance(p);
+ expect_punct(p, ',', "',' after format archetype");
+ mloc = tok_loc(&p->cur);
+ mv = eval_const_int(p, mloc);
+ expect_punct(p, ',', "',' after format string-index");
+ nloc = tok_loc(&p->cur);
+ nv = eval_const_int(p, nloc);
+ if (mv < 0 || mv > 0xFFFF || nv < 0 || nv > 0xFFFF) {
+ perr(p, "attribute 'format' indices out of range");
+ }
+ a->v.format.fmt_idx = (u16)mv;
+ a->v.format.first = (u16)nv;
+ a->nargs = 3;
+ expect_punct(p, ')', "')' after format arguments");
+ return;
+ }
+ case AS_OPAQUE:
+ default: {
+ skip_balanced_parens(p);
+ return;
+ }
+ }
+}
+
+Attr* parse_attribute_spec_list(Parser* p) {
+ Attr* head = NULL;
+ Attr* tail = NULL;
+ while (starts_attr(p)) {
+ SrcLoc kw_loc = tok_loc(&p->cur);
+ advance(p); /* __attribute__ */
+ expect_punct(p, '(', "'(' after __attribute__");
+ expect_punct(p, '(', "'((' after __attribute__");
+ for (;;) {
+ Sym aname;
+ AttrArgShape shape;
+ Attr* a;
+ const char* diag_name;
+ size_t diag_len;
+ const char* canon;
+ size_t canon_len;
+ while (accept_punct(p, ',')) { /* skip */ }
+ if (is_punct(&p->cur, ')')) break;
+ if (p->cur.kind != TOK_IDENT) {
+ perr(p, "expected attribute name");
+ }
+ aname = p->cur.v.ident;
+ a = arena_new(p->c->tu, Attr);
+ if (!a) perr(p, "out of memory in parse_attribute_spec_list");
+ memset(a, 0, sizeof *a);
+ a->loc = tok_loc(&p->cur);
+ a->name = aname;
+ a->kind = (u16)classify_attr(p, aname, &shape);
+ advance(p);
+ diag_name = pool_str(p->pool, aname, &diag_len);
+ attr_canon_range(diag_name, diag_len, &canon, &canon_len);
+ (void)canon; (void)canon_len;
+ parse_attr_args(p, a, shape, diag_name ? diag_name : "<unknown>");
+ if (tail) tail->next = a; else head = a;
+ tail = a;
+ if (!accept_punct(p, ',')) break;
+ }
+ expect_punct(p, ')', "')' after attribute list");
+ expect_punct(p, ')', "'))' after attribute list");
+ (void)kw_loc;
+ }
+ return head;
+}
+
+void parse_and_discard_attributes(Parser* p) {
+ (void)parse_attribute_spec_list(p);
+}
+
+/* Append `add` to the end of `*head` (linked via Attr.next). */
+void attr_list_append(Attr** head, Attr* add) {
+ if (!add) return;
+ if (!*head) { *head = add; return; }
+ Attr* tail = *head;
+ while (tail->next) tail = tail->next;
+ tail->next = add;
+}
+
+/* If `starts_attr`, parse and append to `*sink`. No-op otherwise. */
+void parse_attrs_into(Parser* p, Attr** sink) {
+ if (starts_attr(p)) attr_list_append(sink, parse_attribute_spec_list(p));
+}
+
+#define PARSE_ATTR_ALIGNED_DEFAULT 16u
+
+static void attrs_to_record_opts(const Attr* a, TypeRecordOpts* opts) {
+ for (; a; a = a->next) {
+ if (a->kind == ATTR_PACKED) {
+ opts->packed = 1;
+ } else if (a->kind == ATTR_ALIGNED) {
+ u32 v = (a->nargs == 0) ? PARSE_ATTR_ALIGNED_DEFAULT : (u32)a->v.i;
+ if (v > opts->align_override) opts->align_override = (u16)v;
+ }
+ }
+}
+
+static void attrs_to_field(const Attr* a, Field* f) {
+ for (; a; a = a->next) {
+ if (a->kind == ATTR_PACKED) {
+ f->packed = 1;
+ } else if (a->kind == ATTR_ALIGNED) {
+ u32 v = (a->nargs == 0) ? PARSE_ATTR_ALIGNED_DEFAULT : (u32)a->v.i;
+ if (v > f->align_override) f->align_override = (u16)v;
+ }
+ }
+}
+
+u32 attrs_pick_aligned(const Attr* a) {
+ u32 best = 0;
+ for (; a; a = a->next) {
+ if (a->kind == ATTR_ALIGNED) {
+ u32 v = (a->nargs == 0) ? PARSE_ATTR_ALIGNED_DEFAULT : (u32)a->v.i;
+ if (v > best) best = v;
+ }
+ }
+ return best;
+}
+
+/* ============================================================
+ * resolve_type_specs
+ * ============================================================ */
+
+const Type* resolve_type_specs(Parser* p, const TypeSpecAccum* a, SrcLoc loc) {
+ if (!a->saw_explicit_type) return NULL;
+ if (a->saw_void) {
+ if (a->saw_char || a->saw_int || a->saw_short || a->long_count ||
+ a->saw_signed || a->saw_unsigned || a->saw_bool || a->saw_float ||
+ a->saw_double) {
+ compiler_panic(p->c, loc, "conflicting type specifiers (void mixed)");
+ }
+ return type_void(p->pool);
+ }
+ if (a->saw_bool) {
+ return type_prim(p->pool, TY_BOOL);
+ }
+ if (a->saw_char) {
+ if (a->saw_unsigned) return type_prim(p->pool, TY_UCHAR);
+ if (a->saw_signed) return type_prim(p->pool, TY_SCHAR);
+ return type_prim(p->pool, TY_CHAR);
+ }
+ if (a->saw_float) return type_prim(p->pool, TY_FLOAT);
+ if (a->saw_double) {
+ return type_prim(p->pool, a->long_count ? TY_LDOUBLE : TY_DOUBLE);
+ }
+ if (a->saw_short) {
+ return type_prim(p->pool, a->saw_unsigned ? TY_USHORT : TY_SHORT);
+ }
+ if (a->long_count == 2) {
+ return type_prim(p->pool, a->saw_unsigned ? TY_ULLONG : TY_LLONG);
+ }
+ if (a->long_count == 1) {
+ return type_prim(p->pool, a->saw_unsigned ? TY_ULONG : TY_LONG);
+ }
+ if (a->saw_unsigned) return type_prim(p->pool, TY_UINT);
+ if (a->saw_signed || a->saw_int) return type_prim(p->pool, TY_INT);
+ return type_prim(p->pool, TY_INT);
+}
+
+/* ============================================================
+ * parse_decl_specs
+ * ============================================================ */
+
+int parse_decl_specs(Parser* p, DeclSpecs* out) {
+ TypeSpecAccum acc;
+ SrcLoc loc;
+ int seen = 0;
+ const Type* tagged_ty = NULL;
+ memset(&acc, 0, sizeof acc);
+ out->type = NULL;
+ out->storage = DS_AUTO;
+ out->flags = DF_NONE;
+ out->quals = 0;
+ out->align = 0;
+ out->vla_byte_slot = FRAME_SLOT_NONE;
+ out->attrs = NULL;
+ loc = tok_loc(&p->cur);
+ for (;;) {
+ Tok t = p->cur;
+ if (starts_attr(p)) {
+ Attr* a = parse_attribute_spec_list(p);
+ if (a) {
+ Attr* tail = a;
+ while (tail->next) tail = tail->next;
+ tail->next = out->attrs;
+ out->attrs = a;
+ }
+ seen = 1;
+ continue;
+ }
+ if (is_kw(p, &t, KW_STRUCT) || is_kw(p, &t, KW_UNION)) {
+ TypeKind kind = is_kw(p, &t, KW_STRUCT) ? TY_STRUCT : TY_UNION;
+ Attr* anon_attrs = NULL;
+ if (tagged_ty || acc.saw_explicit_type) {
+ perr(p, "conflicting type specifiers (struct/union mixed)");
+ }
+ advance(p);
+ tagged_ty = parse_struct_or_union(p, kind, &anon_attrs);
+ attr_list_append(&out->attrs, anon_attrs);
+ acc.saw_explicit_type = 1;
+ seen = 1;
+ continue;
+ }
+ if (is_kw(p, &t, KW_ENUM)) {
+ Attr* anon_attrs = NULL;
+ if (tagged_ty || acc.saw_explicit_type) {
+ perr(p, "conflicting type specifiers (enum mixed)");
+ }
+ advance(p);
+ tagged_ty = parse_enum(p, &anon_attrs);
+ attr_list_append(&out->attrs, anon_attrs);
+ acc.saw_explicit_type = 1;
+ seen = 1;
+ continue;
+ }
+ if (is_kw(p, &t, KW_VOID)) {
+ acc.saw_void = 1; acc.saw_explicit_type = 1; advance(p); seen = 1;
+ } else if (is_kw(p, &t, KW_CHAR)) {
+ acc.saw_char = 1; acc.saw_explicit_type = 1; advance(p); seen = 1;
+ } else if (is_kw(p, &t, KW_INT)) {
+ acc.saw_int = 1; acc.saw_explicit_type = 1; advance(p); seen = 1;
+ } else if (is_kw(p, &t, KW_SHORT)) {
+ acc.saw_short = 1; acc.saw_explicit_type = 1; advance(p); seen = 1;
+ } else if (is_kw(p, &t, KW_LONG)) {
+ acc.long_count++; acc.saw_explicit_type = 1; advance(p); seen = 1;
+ } else if (is_kw(p, &t, KW_SIGNED)) {
+ acc.saw_signed = 1; acc.saw_explicit_type = 1; advance(p); seen = 1;
+ } else if (is_kw(p, &t, KW_UNSIGNED)) {
+ acc.saw_unsigned = 1; acc.saw_explicit_type = 1; advance(p); seen = 1;
+ } else if (is_kw(p, &t, KW_BOOL)) {
+ acc.saw_bool = 1; acc.saw_explicit_type = 1; advance(p); seen = 1;
+ } else if (is_kw(p, &t, KW_FLOAT)) {
+ acc.saw_float = 1; acc.saw_explicit_type = 1; advance(p); seen = 1;
+ } else if (is_kw(p, &t, KW_DOUBLE)) {
+ acc.saw_double = 1; acc.saw_explicit_type = 1; advance(p); seen = 1;
+ } else if (is_kw(p, &t, KW_STATIC)) {
+ out->storage = DS_STATIC; advance(p); seen = 1;
+ } else if (is_kw(p, &t, KW_EXTERN)) {
+ out->storage = DS_EXTERN; advance(p); seen = 1;
+ } else if (is_kw(p, &t, KW_CONST)) {
+ out->quals |= Q_CONST; advance(p); seen = 1;
+ } else if (is_kw(p, &t, KW_VOLATILE)) {
+ out->quals |= Q_VOLATILE; advance(p); seen = 1;
+ } else if (is_kw(p, &t, KW_RESTRICT)) {
+ out->quals |= Q_RESTRICT; advance(p); seen = 1;
+ } else if (is_kw(p, &t, KW_ATOMIC)) {
+ Tok n = peek1(p);
+ if (is_punct(&n, '(')) {
+ const Type* inner;
+ if (tagged_ty || acc.saw_explicit_type) {
+ perr(p, "conflicting type specifiers (_Atomic(T) mixed)");
+ }
+ advance(p); /* `_Atomic` */
+ advance(p); /* `(` */
+ inner = parse_type_name(p);
+ expect_punct(p, ')', "')' after _Atomic type");
+ tagged_ty = type_qualified(p->pool, inner, Q_ATOMIC);
+ acc.saw_explicit_type = 1;
+ seen = 1;
+ continue;
+ }
+ out->quals |= Q_ATOMIC; advance(p); seen = 1;
+ } else if (is_kw(p, &t, KW_TYPEDEF)) {
+ out->storage = DS_TYPEDEF; advance(p); seen = 1;
+ } else if (is_kw(p, &t, KW_ALIGNAS)) {
+ u32 a = 0;
+ advance(p); /* `_Alignas` */
+ expect_punct(p, '(', "'(' after _Alignas");
+ if (starts_type_name(p, &p->cur)) {
+ const Type* tn = parse_type_name(p);
+ a = abi_alignof(p->abi, tn);
+ } else {
+ i64 v = eval_const_int(p, tok_loc(&p->cur));
+ if (v < 0) perr(p, "_Alignas requires a non-negative alignment");
+ a = (u32)v;
+ }
+ expect_punct(p, ')', "')' after _Alignas argument");
+ if (a > out->align) out->align = a;
+ seen = 1;
+ } else if (is_kw(p, &t, KW_INLINE)) {
+ out->flags |= DF_INLINE; advance(p); seen = 1;
+ } else if (is_kw(p, &t, KW_THREAD_LOCAL)) {
+ out->flags |= DF_THREAD; advance(p); seen = 1;
+ } else if (is_kw(p, &t, KW_NORETURN) || is_kw(p, &t, KW_REGISTER) ||
+ is_kw(p, &t, KW_AUTO)) {
+ advance(p); seen = 1;
+ } else if (!acc.saw_explicit_type && !tagged_ty &&
+ t.kind == TOK_IDENT && ident_kw(p, t.v.ident) == KW_NONE) {
+ if (t.v.ident == p->sym_b_va_list) {
+ tagged_ty = abi_va_list_type(p->abi, p->pool);
+ acc.saw_explicit_type = 1;
+ advance(p);
+ seen = 1;
+ continue;
+ }
+ SymEntry* e = scope_lookup(p, t.v.ident);
+ if (e && e->kind == SEK_TYPEDEF) {
+ tagged_ty = e->type;
+ if (e->vla_byte_slot != FRAME_SLOT_NONE) {
+ out->vla_byte_slot = e->vla_byte_slot;
+ }
+ acc.saw_explicit_type = 1;
+ advance(p);
+ seen = 1;
+ continue;
+ }
+ break;
+ } else {
+ break;
+ }
+ }
+ if (seen) {
+ if (tagged_ty) {
+ out->type = tagged_ty;
+ } else {
+ out->type = resolve_type_specs(p, &acc, loc);
+ if (!out->type) {
+ out->type = ty_int(p);
+ }
+ }
+ }
+ return seen;
+}
+
+/* ============================================================
+ * struct / union / enum
+ * ============================================================ */
+
+int find_field(TargetABI* abi, const Type* rec, Sym name,
+ const Type** out_type, u32* out_offset,
+ const Field** out_field) {
+ if (!rec || (rec->kind != TY_STRUCT && rec->kind != TY_UNION)) return 0;
+ const ABIRecordLayout* L = abi_record_layout(abi, rec);
+ if (!L) return 0;
+ for (u16 i = 0; i < rec->rec.nfields; ++i) {
+ const Field* f = &rec->rec.fields[i];
+ if (f->name == name && name != 0) {
+ *out_type = f->type;
+ *out_offset = L->fields[i].offset;
+ *out_field = f;
+ return 1;
+ }
+ if ((f->flags & FIELD_ANON) && (f->type->kind == TY_STRUCT ||
+ f->type->kind == TY_UNION)) {
+ const Type* inner_ty = NULL;
+ u32 inner_off = 0;
+ const Field* inner_f = NULL;
+ if (find_field(abi, f->type, name, &inner_ty, &inner_off, &inner_f)) {
+ *out_type = inner_ty;
+ *out_offset = L->fields[i].offset + inner_off;
+ *out_field = inner_f;
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+static void parse_member_decls(Parser* p, TypeRecordBuilder* b) {
+ while (!is_punct(&p->cur, '}') && p->cur.kind != TOK_EOF) {
+ DeclSpecs specs;
+ if (!parse_decl_specs(p, &specs)) {
+ perr(p, "expected member declaration");
+ }
+ if (is_punct(&p->cur, ';')) {
+ if (specs.type && (specs.type->kind == TY_STRUCT ||
+ specs.type->kind == TY_UNION)) {
+ Field f;
+ memset(&f, 0, sizeof f);
+ f.name = 0;
+ f.type = specs.type;
+ f.flags = FIELD_ANON;
+ type_record_field(b, f);
+ advance(p);
+ continue;
+ }
+ perr(p, "declaration without declarator must be anonymous aggregate");
+ }
+ for (;;) {
+ Sym mname = 0;
+ SrcLoc mloc = tok_loc(&p->cur);
+ const Type* mty;
+ Field f;
+ memset(&f, 0, sizeof f);
+ if (is_punct(&p->cur, ':')) {
+ advance(p);
+ i64 w = eval_const_int(p, mloc);
+ f.name = 0;
+ f.type = specs.type;
+ f.bitfield_width = (u16)w;
+ f.flags = FIELD_BITFIELD;
+ if (w == 0) f.flags |= FIELD_ZERO_WIDTH;
+ attrs_to_field(specs.attrs, &f);
+ type_record_field(b, f);
+ if (!accept_punct(p, ',')) break;
+ continue;
+ }
+ Attr* mattrs = NULL;
+ mty = parse_declarator_full_ex(p, specs.type, /*allow_abstract=*/0,
+ &mname, &mloc, &mattrs);
+ if (accept_punct(p, ':')) {
+ i64 w = eval_const_int(p, mloc);
+ f.name = mname;
+ f.type = mty;
+ f.bitfield_width = (u16)w;
+ f.flags = FIELD_BITFIELD;
+ if (w == 0) f.flags |= FIELD_ZERO_WIDTH;
+ } else {
+ f.name = mname;
+ f.type = mty;
+ f.flags = FIELD_NONE;
+ }
+ attrs_to_field(specs.attrs, &f);
+ attrs_to_field(mattrs, &f);
+ {
+ Attr* trailing = NULL;
+ parse_attrs_into(p, &trailing);
+ attrs_to_field(trailing, &f);
+ }
+ type_record_field(b, f);
+ if (!accept_punct(p, ',')) break;
+ }
+ expect_punct(p, ';', "';' after struct member declaration");
+ }
+}
+
+const Type* parse_struct_or_union(Parser* p, TypeKind kind,
+ Attr** anon_attrs_out) {
+ Sym tag_name = 0;
+ SrcLoc tag_loc;
+ TagDeclKind tdk = (kind == TY_STRUCT) ? TAG_STRUCT : TAG_UNION;
+ Attr* rec_attrs = NULL;
+ parse_attrs_into(p, &rec_attrs);
+ tag_loc = tok_loc(&p->cur);
+ if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) {
+ tag_name = p->cur.v.ident;
+ advance(p);
+ }
+ int has_body = is_punct(&p->cur, '{');
+ if (!has_body && tag_name == 0) {
+ perr(p, "expected tag name or '{' after struct/union");
+ }
+ if (!has_body) {
+ TagEntry* e = tag_lookup(p, tag_name);
+ if (e) {
+ if (e->kind != tdk) {
+ perr(p, "use of tag with wrong kind (struct vs union)");
+ }
+ attr_list_append(&e->attrs, rec_attrs);
+ return e->type;
+ }
+ {
+ TagId tid = type_tag_new(p->pool, tdk, tag_name, tag_loc);
+ Type* t = type_record_forward(p->pool, kind, tid, tag_name);
+ TagEntry* te = tag_define(p, tag_name, tdk, t, /*complete=*/0);
+ attr_list_append(&te->attrs, rec_attrs);
+ return t;
+ }
+ }
+ Type* target = NULL;
+ TagEntry* existing = tag_name ? tag_lookup_local(p, tag_name) : NULL;
+ TagEntry* te = NULL;
+ if (existing) {
+ if (existing->kind != tdk) {
+ perr(p, "tag redeclared with wrong kind");
+ }
+ if (existing->complete) {
+ perr(p, "redefinition of tag");
+ }
+ target = existing->type;
+ te = existing;
+ } else {
+ TagId tid = type_tag_new(p->pool, tdk, tag_name, tag_loc);
+ target = type_record_forward(p->pool, kind, tid, tag_name);
+ if (tag_name) {
+ te = tag_define(p, tag_name, tdk, target, /*complete=*/0);
+ }
+ }
+ expect_punct(p, '{', "'{' to start aggregate body");
+ TypeRecordBuilder* b =
+ type_record_begin(p->pool, kind, target->rec.tag_id, tag_name);
+ parse_member_decls(p, b);
+ expect_punct(p, '}', "'}' after aggregate body");
+ parse_attrs_into(p, &rec_attrs);
+ if (te) {
+ attr_list_append(&te->attrs, rec_attrs);
+ } else if (anon_attrs_out) {
+ attr_list_append(anon_attrs_out, rec_attrs);
+ }
+ {
+ const Type* fresh = type_record_end(p->pool, b);
+ type_record_install(target, (Field*)fresh->rec.fields,
+ fresh->rec.nfields);
+ }
+ {
+ TypeRecordOpts opts;
+ memset(&opts, 0, sizeof opts);
+ attrs_to_record_opts(rec_attrs, &opts);
+ if (opts.packed) target->rec.packed = 1;
+ if (opts.align_override > target->rec.align_override)
+ target->rec.align_override = opts.align_override;
+ }
+ if (existing) {
+ existing->complete = 1;
+ }
+ return target;
+}
+
+const Type* parse_enum(Parser* p, Attr** anon_attrs_out) {
+ Sym tag_name = 0;
+ SrcLoc tag_loc;
+ Attr* rec_attrs = NULL;
+ parse_attrs_into(p, &rec_attrs);
+ tag_loc = tok_loc(&p->cur);
+ if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) {
+ tag_name = p->cur.v.ident;
+ advance(p);
+ }
+ int has_body = is_punct(&p->cur, '{');
+ if (!has_body && tag_name == 0) {
+ perr(p, "expected tag name or '{' after enum");
+ }
+ if (!has_body) {
+ TagEntry* e = tag_lookup(p, tag_name);
+ if (e && e->kind == TAG_ENUM) {
+ attr_list_append(&e->attrs, rec_attrs);
+ return e->type;
+ }
+ TagId tid = type_tag_new(p->pool, TAG_ENUM, tag_name, tag_loc);
+ const Type* et = type_enum(p->pool, tid, tag_name, ty_int(p));
+ {
+ TagEntry* te = tag_define(p, tag_name, TAG_ENUM, (Type*)et,
+ /*complete=*/0);
+ attr_list_append(&te->attrs, rec_attrs);
+ }
+ return et;
+ }
+ TagId tid = type_tag_new(p->pool, TAG_ENUM, tag_name, tag_loc);
+ const Type* et = type_enum(p->pool, tid, tag_name, ty_int(p));
+ expect_punct(p, '{', "'{'");
+ i64 next_val = 0;
+ for (;;) {
+ Sym name;
+ SrcLoc nloc = tok_loc(&p->cur);
+ SymEntry* e;
+ if (p->cur.kind != TOK_IDENT || ident_kw(p, p->cur.v.ident) != KW_NONE) {
+ perr(p, "expected enumerator name");
+ }
+ name = p->cur.v.ident;
+ advance(p);
+ i64 val = next_val;
+ if (accept_punct(p, '=')) {
+ val = eval_const_int(p, nloc);
+ }
+ e = scope_define(p, name, SEK_ENUM_CST, et);
+ e->v.enum_value = val;
+ next_val = val + 1;
+ if (!accept_punct(p, ',')) break;
+ if (is_punct(&p->cur, '}')) break;
+ }
+ expect_punct(p, '}', "'}' after enumerator list");
+ parse_attrs_into(p, &rec_attrs);
+ if (tag_name) {
+ TagEntry* existing = tag_lookup_local(p, tag_name);
+ if (existing) {
+ if (existing->kind != TAG_ENUM) {
+ perr(p, "tag redeclared with wrong kind");
+ }
+ existing->complete = 1;
+ attr_list_append(&existing->attrs, rec_attrs);
+ } else {
+ TagEntry* te = tag_define(p, tag_name, TAG_ENUM, (Type*)et,
+ /*complete=*/1);
+ attr_list_append(&te->attrs, rec_attrs);
+ }
+ } else if (anon_attrs_out) {
+ attr_list_append(anon_attrs_out, rec_attrs);
+ }
+ return et;
+}
+
+/* ============================================================
+ * starts_type_name, parse_pointer_layer, parse_type_name
+ * ============================================================ */
+
+int starts_type_name(const Parser* p, const Tok* t) {
+ if (t->kind != TOK_IDENT) return 0;
+ CKw k = ident_kw(p, t->v.ident);
+ switch (k) {
+ case KW_VOID:
+ case KW_CHAR:
+ case KW_SHORT:
+ case KW_INT:
+ case KW_LONG:
+ case KW_FLOAT:
+ case KW_DOUBLE:
+ case KW_SIGNED:
+ case KW_UNSIGNED:
+ case KW_BOOL:
+ case KW_STRUCT:
+ case KW_UNION:
+ case KW_ENUM:
+ case KW_CONST:
+ case KW_VOLATILE:
+ case KW_RESTRICT:
+ case KW_ATOMIC:
+ case KW_STATIC:
+ case KW_EXTERN:
+ case KW_INLINE:
+ case KW_NORETURN:
+ case KW_REGISTER:
+ case KW_AUTO:
+ case KW_TYPEDEF:
+ case KW_ALIGNAS:
+ case KW_THREAD_LOCAL:
+ return 1;
+ case KW_NONE: {
+ if (t->v.ident == p->sym_b_va_list) return 1;
+ SymEntry* e = scope_lookup((Parser*)p, t->v.ident);
+ return e && e->kind == SEK_TYPEDEF;
+ }
+ default:
+ return 0;
+ }
+}
+
+const Type* parse_pointer_layer(Parser* p, const Type* base) {
+ while (accept_punct(p, '*')) {
+ u16 q = 0;
+ base = type_ptr(p->pool, base);
+ for (;;) {
+ if (accept_kw(p, KW_CONST)) { q |= Q_CONST; continue; }
+ if (accept_kw(p, KW_VOLATILE)) { q |= Q_VOLATILE; continue; }
+ if (accept_kw(p, KW_RESTRICT)) { q |= Q_RESTRICT; continue; }
+ if (accept_kw(p, KW_ATOMIC)) { q |= Q_ATOMIC; continue; }
+ if (starts_attr(p)) { parse_and_discard_attributes(p); continue; }
+ break;
+ }
+ if (q) base = type_qualified(p->pool, base, q);
+ }
+ return base;
+}
+
+const Type* parse_type_name(Parser* p) {
+ DeclSpecs specs;
+ Sym dummy_name = 0;
+ SrcLoc dummy_loc = {0, 0, 0};
+ if (!parse_decl_specs(p, &specs)) {
+ perr(p, "expected type-name");
+ }
+ return parse_declarator_full(p, specs.type, /*allow_abstract=*/1,
+ &dummy_name, &dummy_loc);
+}
+
+/* ============================================================
+ * Declarator suffix helpers
+ * (DeclSuffix / DSuffKind defined in parse_priv.h)
+ * ============================================================ */
+
+int parse_decl_suffix(Parser* p, DeclSuffix* out) {
+ if (accept_punct(p, '[')) {
+ out->kind = DS_ARRAY;
+ out->count = 0;
+ out->incomplete = 0;
+ out->vla = 0;
+ for (;;) {
+ if (accept_kw(p, KW_STATIC) || accept_kw(p, KW_CONST) ||
+ accept_kw(p, KW_VOLATILE) || accept_kw(p, KW_RESTRICT) ||
+ accept_kw(p, KW_ATOMIC)) {
+ continue;
+ }
+ break;
+ }
+ if (accept_punct(p, ']')) {
+ out->incomplete = 1;
+ return 1;
+ }
+ if (p->in_param_decl) {
+ int depth = 1;
+ while (depth > 0) {
+ if (p->cur.kind == TOK_EOF) {
+ perr(p, "unexpected EOF in parameter array bound");
+ }
+ if (is_punct(&p->cur, '[')) ++depth;
+ else if (is_punct(&p->cur, ']')) {
+ --depth;
+ if (depth == 0) break;
+ }
+ advance(p);
+ }
+ out->incomplete = 1;
+ expect_punct(p, ']', "']' after array size");
+ return 1;
+ }
+ {
+ Tok t = p->cur;
+ int is_const_start = (t.kind == TOK_NUM || t.kind == TOK_CHR);
+ if (!is_const_start && t.kind == TOK_IDENT) {
+ SymEntry* e = scope_lookup(p, t.v.ident);
+ if (e && e->kind == SEK_ENUM_CST) is_const_start = 1;
+ if (!is_const_start) {
+ CKw k = ident_kw(p, t.v.ident);
+ if (k == KW_SIZEOF || k == KW_ALIGNOF) is_const_start = 1;
+ }
+ }
+ if (is_const_start) {
+ SrcLoc cloc = tok_loc(&p->cur);
+ i64 v = eval_const_int(p, cloc);
+ if (v < 0) perr(p, "negative array size");
+ out->count = (u32)v;
+ } else {
+ FrameSlotDesc fsd;
+ if (p->vla_pending) {
+ perr(p, "v1 supports only one VLA dimension per declarator");
+ }
+ out->vla = 1;
+ memset(&fsd, 0, sizeof fsd);
+ fsd.type = ty_size_t(p);
+ fsd.size = abi_sizeof(p->abi, fsd.type);
+ fsd.align = abi_alignof(p->abi, fsd.type);
+ fsd.kind = FS_LOCAL;
+ out->vla_count_slot = cg_local(p->cg, &fsd);
+ parse_assign_expr(p);
+ to_rvalue(p);
+ cg_push_local_typed(p->cg, out->vla_count_slot, fsd.type);
+ cg_swap(p->cg);
+ cg_store(p->cg);
+ cg_drop(p->cg);
+ p->vla_pending = 1;
+ p->vla_pending_count_slot = out->vla_count_slot;
+ }
+ }
+ expect_punct(p, ']', "']' after array size");
+ return 1;
+ }
+ if (accept_punct(p, '(')) {
+ out->kind = DS_FUNC;
+ out->params = NULL;
+ out->nparams = 0;
+ out->variadic = 0;
+ parse_param_list(p, &out->params, &out->nparams, &out->variadic);
+ expect_punct(p, ')', "')' after parameter list");
+ return 1;
+ }
+ return 0;
+}
+
+const Type* apply_decl_suffix(Parser* p, const Type* base,
+ const DeclSuffix* s) {
+ if (s->kind == DS_ARRAY) {
+ return type_array(p->pool, base, s->count, s->incomplete || s->vla);
+ }
+ {
+ const Type** ptypes = NULL;
+ if (s->nparams) {
+ ptypes = (const Type**)arena_array(p->c->tu, const Type*, s->nparams);
+ for (u16 i = 0; i < s->nparams; ++i) ptypes[i] = s->params[i].type;
+ }
+ return type_func(p->pool, base, ptypes, s->nparams, (int)s->variadic);
+ }
+}
+
+/* ============================================================
+ * parse_declarator_full, parse_declarator_full_ex, parse_declarator
+ * ============================================================ */
+
+const Type* parse_declarator_full(Parser* p, const Type* base,
+ int allow_abstract, Sym* name_out,
+ SrcLoc* loc_out) {
+ return parse_declarator_full_ex(p, base, allow_abstract, name_out, loc_out,
+ NULL);
+}
+
+const Type* parse_declarator_full_ex(Parser* p, const Type* base,
+ int allow_abstract, Sym* name_out,
+ SrcLoc* loc_out,
+ Attr** attrs_out) {
+ base = parse_pointer_layer(p, base);
+
+ Sym name = 0;
+ SrcLoc nloc = {0, 0, 0};
+ u8 nptrs_inner = 0;
+ u16 inner_quals[8];
+ int has_inner_parens = 0;
+ DeclSuffix inner_suffs[8];
+ int n_inner_suffs = 0;
+
+ if (is_punct(&p->cur, '(')) {
+ Tok n = peek1(p);
+ int is_inner = 0;
+ if (is_punct(&n, '*')) {
+ is_inner = 1;
+ } else if (n.kind == TOK_IDENT && ident_kw(p, n.v.ident) == KW_NONE) {
+ SymEntry* e = scope_lookup(p, n.v.ident);
+ if (!(e && e->kind == SEK_TYPEDEF)) is_inner = 1;
+ }
+ if (is_inner) {
+ has_inner_parens = 1;
+ advance(p); /* '(' */
+ while (accept_punct(p, '*')) {
+ u16 q = 0;
+ if (nptrs_inner >= 8) perr(p, "too many pointer levels");
+ for (;;) {
+ if (accept_kw(p, KW_CONST)) { q |= Q_CONST; continue; }
+ if (accept_kw(p, KW_VOLATILE)) { q |= Q_VOLATILE; continue; }
+ if (accept_kw(p, KW_RESTRICT)) { q |= Q_RESTRICT; continue; }
+ if (accept_kw(p, KW_ATOMIC)) { q |= Q_ATOMIC; continue; }
+ if (starts_attr(p)) { parse_and_discard_attributes(p); continue; }
+ break;
+ }
+ inner_quals[nptrs_inner++] = q;
+ }
+ if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) {
+ name = p->cur.v.ident;
+ nloc = tok_loc(&p->cur);
+ advance(p);
+ } else if (!allow_abstract) {
+ perr(p, "expected declarator name");
+ }
+ if (starts_attr(p)) parse_and_discard_attributes(p);
+ while (n_inner_suffs < 8) {
+ if (!parse_decl_suffix(p, &inner_suffs[n_inner_suffs])) break;
+ ++n_inner_suffs;
+ if (starts_attr(p)) parse_and_discard_attributes(p);
+ }
+ expect_punct(p, ')', "')' after inner declarator");
+ }
+ }
+
+ if (!has_inner_parens) {
+ if (p->cur.kind == TOK_IDENT && ident_kw(p, p->cur.v.ident) == KW_NONE) {
+ name = p->cur.v.ident;
+ nloc = tok_loc(&p->cur);
+ advance(p);
+ } else if (!allow_abstract) {
+ perr(p, "expected declarator name");
+ }
+ }
+
+ if (starts_attr(p)) {
+ if (attrs_out) parse_attrs_into(p, attrs_out);
+ else parse_and_discard_attributes(p);
+ }
+
+ DeclSuffix suffs[8];
+ int nsuffs = 0;
+ while (nsuffs < 8) {
+ if (!parse_decl_suffix(p, &suffs[nsuffs])) break;
+ ++nsuffs;
+ if (starts_attr(p)) {
+ if (attrs_out) parse_attrs_into(p, attrs_out);
+ else parse_and_discard_attributes(p);
+ }
+ }
+ if (nsuffs == 8 && (is_punct(&p->cur, '[') || is_punct(&p->cur, '('))) {
+ perr(p, "too many declarator suffixes (raise the cap if needed)");
+ }
+ for (int i = nsuffs - 1; i >= 0; --i) {
+ base = apply_decl_suffix(p, base, &suffs[i]);
+ }
+
+ for (int i = (int)nptrs_inner - 1; i >= 0; --i) {
+ base = type_ptr(p->pool, base);
+ if (inner_quals[i]) {
+ base = type_qualified(p->pool, base, inner_quals[i]);
+ }
+ }
+
+ for (int i = n_inner_suffs - 1; i >= 0; --i) {
+ base = apply_decl_suffix(p, base, &inner_suffs[i]);
+ }
+
+ if (name_out) *name_out = name;
+ if (loc_out) *loc_out = nloc;
+ return base;
+}
+
+const Type* parse_declarator(Parser* p, const Type* base, Sym* name_out,
+ SrcLoc* loc_out) {
+ return parse_declarator_full(p, base, /*allow_abstract=*/0, name_out, loc_out);
+}
+
+/* ============================================================
+ * complete_incomplete_array
+ * ============================================================ */
+
+const Type* complete_incomplete_array(Parser* p, const Type* ty) {
+ const Type* elem;
+ if (!ty || ty->kind != TY_ARRAY || !ty->arr.incomplete) return ty;
+ elem = ty->arr.elem;
+ if (is_char_kind(elem) && p->cur.kind == TOK_STR) {
+ Tok t = p->cur;
+ size_t n = 0;
+ u8* bytes = decode_string_literal(p, &t, &n);
+ p->c->env->heap->free(p->c->env->heap, bytes, 0);
+ return type_array(p->pool, elem, (u32)n, /*incomplete=*/0);
+ }
+ if (is_punct(&p->cur, '{')) {
+ u32 cnt;
+ record_braced_block(p);
+ cnt = count_recorded_top_level_items(p->replay, p->replay_len);
+ if (cnt == 1 && p->replay_len >= 3 && p->replay[1].kind == TOK_STR &&
+ is_char_kind(elem)) {
+ Tok t = p->replay[1];
+ size_t n = 0;
+ u8* bytes = decode_string_literal(p, &t, &n);
+ p->c->env->heap->free(p->c->env->heap, bytes, 0);
+ cnt = (u32)n;
+ }
+ replay_rewind(p);
+ return type_array(p->pool, elem, cnt, /*incomplete=*/0);
+ }
+ perr(p, "initializer cannot complete incomplete array type");
+}
diff --git a/src/pp/pp.c b/src/pp/pp.c
@@ -7,352 +7,12 @@
* The token-source stack carries either a Lexer (file or #include'd file) or
* a pre-built Tok[] buffer (macro expansion). Each buffer token carries a
* hideset (Prosser, the standard's "nested-replacement" rule) recording
- * which macro names it must not be re-expanded by during rescan. */
-
-#include "pp/pp.h"
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "core/arena.h"
-#include "core/diag.h"
-#include "core/heap.h"
-#include "core/pool.h"
-
-/* ============================================================
- * Internal types
- * ============================================================ */
-
-typedef struct Macro {
- Sym name;
- SrcLoc def_loc;
- u8 is_func;
- u8 is_variadic;
- u8 pad[2];
- u32 n_params;
- Sym* params; /* parameter names */
- Tok* body; /* body tokens; TOK_PP_PARAM kind + v.punct=idx */
- u32 body_len;
-} Macro;
-
-/* Internal token kinds. Outside the range used by the lexer
- * (TOK_KW_LAST = 0x1000). */
-#define TOK_PP_PARAM ((u16)0x1100)
-#define TOK_PP_PLACEMARKER ((u16)0x1101) /* empty-arg substitution marker */
-
-typedef u32 HidesetId;
-#define HS_EMPTY 0u
-
-typedef struct Hideset {
- u32 n;
- Sym names[1]; /* flexible; allocated with extra trailing slots */
-} Hideset;
-
-typedef enum { SRC_LEX = 1, SRC_BUF = 2 } SrcKind;
-
-typedef struct TokSrc {
- u8 kind;
- /* When set on a SRC_BUF: src_next_raw returns TOK_EOF when this is
- * the top source and it's exhausted, instead of popping. The caller
- * (e.g. argument pre-expansion) explicitly pops the scope when done.
- * This bounds expansion to a single argument's token stream. */
- u8 scope_top;
- u8 pad[2];
- /* SRC_LEX */
- Lexer* lex;
- /* SRC_BUF */
- Tok* toks;
- HidesetId* hs;
- u32 i;
- u32 n;
- /* #line state (SRC_LEX only). line_delta is added to every emitted
- * token's loc.line on its way out so __LINE__ and the output cursor
- * see user-visible numbering. file_override is the Sym (without
- * surrounding quotes) used by __FILE__ when set. */
- i32 line_delta;
- Sym file_override;
-} TokSrc;
-
-/* MacroMap = Sym -> Macro*. Generated open-addressed hashmap with
- * deletion (#undef). See core/hashmap.h. */
-#include "core/hashmap.h"
-static inline u32 macro_hash_(Sym s) { return hash_u32((u32)s); }
-HASHMAP_DEFINE(MacroMap, Sym, Macro*, macro_hash_);
-
-typedef enum IfState {
- IF_INCLUDE = 1, /* group active, emit code */
- IF_SEEK_TRUE = 2, /* skip, looking for the first true elif/else */
- IF_DONE = 3, /* skip, already had a true branch */
-} IfState;
-
-typedef struct IfFrame {
- u8 state;
- u8 has_else;
- u8 pad[2];
- SrcLoc loc;
-} IfFrame;
-
-struct Pp {
- Compiler* c;
-
- /* Source stack — top of stack is sources[nsources-1]. */
- TokSrc* sources;
- u32 nsources;
- u32 sources_cap;
-
- /* Macro table (open-addressed; key = Sym, value = Macro*). */
- MacroMap mtab;
-
- /* Conditional inclusion stack (#if / #ifdef / #ifndef → #endif). */
- IfFrame* ifstk;
- u32 ifstk_n;
- u32 ifstk_cap;
-
- /* Hideset table. Element 0 reserved as HS_EMPTY. */
- Hideset** hsets;
- u32 hsets_n;
- u32 hsets_cap;
-
- /* Include directories (stage 9). */
- struct {
- const char* path;
- u8 system;
- }* inc_dirs;
- u32 ninc_dirs;
- u32 inc_dirs_cap;
-
- /* Internal arena: macro bodies, hidesets, expansion buffers, file
- * data for #include. Lives until pp_free. */
- Arena arena;
-
- /* Cached interned identifiers used for directive recognition. */
- Sym sym_define;
- Sym sym_undef;
- Sym sym_include;
- Sym sym_if;
- Sym sym_ifdef;
- Sym sym_ifndef;
- Sym sym_elif;
- Sym sym_else;
- Sym sym_endif;
- Sym sym_line;
- Sym sym_pragma;
- Sym sym_error;
- Sym sym_embed;
- Sym sym_defined;
- Sym sym_va_args;
- Sym sym_line__; /* __LINE__ */
- Sym sym_file__; /* __FILE__ */
- Sym sym_date__; /* __DATE__ */
- Sym sym_time__; /* __TIME__ */
- Sym sym_stdc__; /* __STDC__ */
- Sym sym_stdc_hosted__;
- Sym sym_stdc_version__;
- Sym sym__pragma; /* _Pragma operator */
- Sym sym_pragma_kw; /* "pragma" — for synthesized #pragma */
-
- /* Pre-formatted "Mmm dd yyyy" / "hh:mm:ss" string spellings for
- * __DATE__ and __TIME__, derived from SOURCE_DATE_EPOCH (or
- * time(NULL) if unset). */
- Sym val_date_str;
- Sym val_time_str;
-};
-
-/* ============================================================
- * Allocation helpers
- * ============================================================ */
-
-static Heap* pp_heap(Pp* pp) { return (Heap*)pp->c->env->heap; }
-
-static void* pp_xrealloc(Pp* pp, void* p, size_t old_n, size_t new_n,
- size_t align) {
- Heap* h = pp_heap(pp);
- void* q = h->realloc(h, p, old_n, new_n, align);
- if (!q) compiler_panic(pp->c, (SrcLoc){0, 0, 0}, "pp: out of memory");
- return q;
-}
-
-static void pp_xfree(Pp* pp, void* p, size_t n) {
- if (p) pp_heap(pp)->free(pp_heap(pp), p, n);
-}
-
-/* ============================================================
- * Token-vector helpers (used by directive readers, macro expansion,
- * pre-expansion of arguments, and the substitute / paste phases).
- * ============================================================ */
-
-typedef struct TokVec {
- Tok* data;
- u32 n;
- u32 cap;
-} TokVec;
-
-static void tv_grow(Pp* pp, TokVec* v, u32 want) {
- u32 nc;
- if (v->cap >= want) return;
- nc = v->cap ? v->cap * 2 : 8;
- while (nc < want) nc *= 2;
- {
- Tok* nb = arena_array(&pp->arena, Tok, nc);
- if (v->n) memcpy(nb, v->data, sizeof(Tok) * v->n);
- v->data = nb;
- v->cap = nc;
- }
-}
-
-static void tv_push(Pp* pp, TokVec* v, Tok t) {
- tv_grow(pp, v, v->n + 1);
- v->data[v->n++] = t;
-}
-
-/* Growable char buffer (arena-backed) used by stringize, #error message
- * concat, and a few other byte-level helpers. */
-typedef struct CharBuf {
- char* data;
- u32 len;
- u32 cap;
-} CharBuf;
-
-static void cb_append(Pp* pp, CharBuf* b, const char* s, u32 n) {
- if (b->len + n > b->cap) {
- u32 nc = b->cap ? b->cap * 2 : 64;
- while (nc < b->len + n) nc *= 2;
- {
- char* nb = (char*)arena_alloc(&pp->arena, nc, 1);
- if (b->len) memcpy(nb, b->data, b->len);
- b->data = nb;
- b->cap = nc;
- }
- }
- if (n) memcpy(b->data + b->len, s, n);
- b->len += n;
-}
-
-static void cb_putc(Pp* pp, CharBuf* b, char c) { cb_append(pp, b, &c, 1); }
-
-/* ============================================================
- * Hideset table
- * ============================================================ */
-
-static int sym_in_array(const Sym* a, u32 n, Sym s) {
- u32 i;
- for (i = 0; i < n; ++i)
- if (a[i] == s) return 1;
- return 0;
-}
-
-static HidesetId hs_register(Pp* pp, const Sym* names, u32 n) {
- Hideset* h;
- u32 i;
- if (n == 0) return HS_EMPTY;
-
- /* Linear search for an existing identical hideset. Hidesets are tiny. */
- for (i = 1; i < pp->hsets_n; ++i) {
- Hideset* e = pp->hsets[i];
- if (e->n != n) continue;
- {
- u32 j;
- for (j = 0; j < n; ++j)
- if (e->names[j] != names[j]) break;
- if (j == n) return (HidesetId)i;
- }
- }
-
- if (pp->hsets_n == pp->hsets_cap) {
- u32 nc = pp->hsets_cap ? pp->hsets_cap * 2 : 8;
- pp->hsets =
- (Hideset**)pp_xrealloc(pp, pp->hsets, sizeof(Hideset*) * pp->hsets_cap,
- sizeof(Hideset*) * nc, _Alignof(Hideset*));
- pp->hsets_cap = nc;
- }
- h = (Hideset*)arena_alloc(&pp->arena,
- sizeof(Hideset) + sizeof(Sym) * (n ? n - 1 : 0),
- _Alignof(Hideset));
- h->n = n;
- for (i = 0; i < n; ++i) h->names[i] = names[i];
- pp->hsets[pp->hsets_n] = h;
- return (HidesetId)pp->hsets_n++;
-}
-
-static int hs_contains(Pp* pp, HidesetId id, Sym s) {
- Hideset* h;
- if (id == HS_EMPTY || s == 0) return 0;
- h = pp->hsets[id];
- return sym_in_array(h->names, h->n, s);
-}
-
-static HidesetId hs_add(Pp* pp, HidesetId id, Sym s) {
- Sym buf[64];
- Hideset* h;
- u32 n;
- u32 i;
-
- if (s == 0) return id;
- if (hs_contains(pp, id, s)) return id;
-
- n = (id == HS_EMPTY) ? 0 : pp->hsets[id]->n;
- if (n + 1 > sizeof(buf) / sizeof(buf[0])) {
- compiler_panic(pp->c, (SrcLoc){0, 0, 0}, "pp: hideset overflow");
- }
- if (id != HS_EMPTY) {
- h = pp->hsets[id];
- for (i = 0; i < h->n; ++i) buf[i] = h->names[i];
- }
- /* Keep sorted (numerically) for canonical hideset identity. */
- {
- u32 pos = n;
- while (pos > 0 && buf[pos - 1] > s) {
- buf[pos] = buf[pos - 1];
- --pos;
- }
- buf[pos] = s;
- }
- return hs_register(pp, buf, n + 1);
-}
-
-/* Used by token-paste in stage 5; declared early so the rest of the file
- * doesn't grow forward decls. */
-__attribute__((unused)) static HidesetId hs_intersect(Pp* pp, HidesetId a,
- HidesetId b) {
- Sym buf[64];
- Hideset *ha, *hb;
- u32 i, j, k;
- if (a == HS_EMPTY || b == HS_EMPTY) return HS_EMPTY;
- if (a == b) return a;
- ha = pp->hsets[a];
- hb = pp->hsets[b];
- /* Both sorted; standard merge intersection. */
- i = j = k = 0;
- while (i < ha->n && j < hb->n) {
- if (ha->names[i] == hb->names[j]) {
- buf[k++] = ha->names[i];
- ++i;
- ++j;
- } else if (ha->names[i] < hb->names[j]) {
- ++i;
- } else {
- ++j;
- }
- }
- return hs_register(pp, buf, k);
-}
-
-/* ============================================================
- * Macro table
- * ============================================================ */
-
-/* Thin wrappers over the generated MacroMap_* functions; preserved
- * because the call sites are tagged "mt_*" throughout this TU. */
-static Macro* mt_get(Pp* pp, Sym name) {
- Macro** v = MacroMap_get(&pp->mtab, name);
- return v ? *v : NULL;
-}
-
-static void mt_put(Pp* pp, Sym name, Macro* m) {
- (void)MacroMap_set(&pp->mtab, name, m);
-}
+ * which macro names it must not be re-expanded by during rescan.
+ *
+ * Residual module: source stack, pp_next / pp_next_raw (public streaming),
+ * pp_new/free, predefined macros, lifecycle, keyword interning. */
-static void mt_del(Pp* pp, Sym name) { MacroMap_del(&pp->mtab, name); }
+#include "pp/pp_priv.h"
/* ============================================================
* Source stack
@@ -362,7 +22,7 @@ static TokSrc* src_top(Pp* pp) {
return pp->nsources ? &pp->sources[pp->nsources - 1] : NULL;
}
-static void src_push(Pp* pp, TokSrc s) {
+void src_push(Pp* pp, TokSrc s) {
if (pp->nsources == pp->sources_cap) {
u32 nc = pp->sources_cap ? pp->sources_cap * 2 : 8;
pp->sources =
@@ -373,7 +33,7 @@ static void src_push(Pp* pp, TokSrc s) {
pp->sources[pp->nsources++] = s;
}
-static void src_pop(Pp* pp) {
+void src_pop(Pp* pp) {
TokSrc* t;
if (!pp->nsources) return;
t = &pp->sources[pp->nsources - 1];
@@ -390,7 +50,7 @@ static void src_pop(Pp* pp) {
* (SRC_LEX vs SRC_BUF). Used by pp_next_raw to gate directive recognition
* to lex-sourced tokens only — a `#` produced by macro expansion never
* starts a directive (§6.10.3.4 ¶3, covered by `63_rescan_not_directive`). */
-static Tok src_next_raw(Pp* pp, HidesetId* hs_out, u8* src_kind_out) {
+Tok src_next_raw(Pp* pp, HidesetId* hs_out, u8* src_kind_out) {
Tok t;
TokSrc* s;
while ((s = src_top(pp)) != NULL) {
@@ -444,7 +104,7 @@ static Tok src_next_raw(Pp* pp, HidesetId* hs_out, u8* src_kind_out) {
* Buffer source push helpers
* ============================================================ */
-static void push_buf(Pp* pp, Tok* toks, HidesetId* hs, u32 n) {
+void push_buf(Pp* pp, Tok* toks, HidesetId* hs, u32 n) {
TokSrc s;
memset(&s, 0, sizeof(s));
s.kind = SRC_BUF;
@@ -456,2109 +116,9 @@ static void push_buf(Pp* pp, Tok* toks, HidesetId* hs, u32 n) {
}
/* ============================================================
- * Directive parsing
- * ============================================================ */
-
-/* Read tokens up through (and including) the next TOK_NEWLINE / TOK_EOF.
- * Drops the newline; collected tokens are arena-allocated and returned via
- * *out_toks/out_n. */
-static void read_directive_line(Pp* pp, Tok** out_toks, u32* out_n) {
- Tok* buf = NULL;
- u32 cap = 0, n = 0;
- Tok t;
- HidesetId hs;
- for (;;) {
- t = src_next_raw(pp, &hs, NULL);
- if (t.kind == TOK_NEWLINE || t.kind == TOK_EOF) break;
- if (n == cap) {
- u32 nc = cap ? cap * 2 : 8;
- Tok* nb = (Tok*)arena_alloc(&pp->arena, sizeof(Tok) * nc, _Alignof(Tok));
- if (cap) memcpy(nb, buf, sizeof(Tok) * cap);
- buf = nb;
- cap = nc;
- }
- buf[n++] = t;
- }
- *out_toks = buf;
- *out_n = n;
-}
-
-static int body_tokens_equal(const Tok* a, u32 na, const Tok* b, u32 nb) {
- u32 i;
- if (na != nb) return 0;
- for (i = 0; i < na; ++i) {
- if (a[i].kind != b[i].kind) return 0;
- if (a[i].spelling != b[i].spelling) return 0;
- /* Whitespace separation must match (§6.10.3 ¶2). The first body
- * token's leading-space bit is meaningless (it's whatever was
- * between macro name and body); skip i==0 for that bit. */
- if (i > 0) {
- if ((a[i].flags & TF_HAS_SPACE) != (b[i].flags & TF_HAS_SPACE)) {
- return 0;
- }
- }
- }
- return 1;
-}
-
-static int macros_equal(const Macro* a, const Macro* b) {
- if (a->is_func != b->is_func) return 0;
- if (a->is_variadic != b->is_variadic) return 0;
- if (a->n_params != b->n_params) return 0;
- {
- u32 i;
- for (i = 0; i < a->n_params; ++i) {
- if (a->params[i] != b->params[i]) return 0;
- }
- }
- return body_tokens_equal(a->body, a->body_len, b->body, b->body_len);
-}
-
-static void do_define(Pp* pp, const Tok* line, u32 n) {
- Macro* m;
- u32 i = 0;
- Sym name;
- SrcLoc def_loc;
- Macro* existing;
-
- if (i >= n || line[i].kind != TOK_IDENT) {
- compiler_panic(pp->c, n ? line[0].loc : (SrcLoc){0, 0, 0},
- "#define: expected macro name");
- }
- name = line[i].v.ident;
- def_loc = line[i].loc;
- ++i;
-
- m = arena_znew(&pp->arena, Macro);
- m->name = name;
- m->def_loc = def_loc;
-
- /* Function-like vs object-like: '(' immediately after the name with no
- * intervening whitespace. */
- if (i < n && line[i].kind == TOK_PUNCT && line[i].v.punct == '(' &&
- (line[i].flags & TF_HAS_SPACE) == 0) {
- Sym* params = NULL;
- u32 pcap = 0, pn = 0;
- ++i;
- m->is_func = 1;
- if (i < n && line[i].kind == TOK_PUNCT && line[i].v.punct == ')') {
- ++i;
- } else {
- for (;;) {
- if (i >= n) {
- compiler_panic(pp->c, def_loc,
- "#define: unterminated parameter list");
- }
- if (line[i].kind == TOK_PUNCT && line[i].v.punct == P_ELLIPSIS) {
- /* Append a synthetic __VA_ARGS__ param so body-rewrite
- * matches the standard identifier directly. */
- if (pn == pcap) {
- u32 nc = pcap ? pcap * 2 : 4;
- Sym* nb = arena_array(&pp->arena, Sym, nc);
- if (pcap) memcpy(nb, params, sizeof(Sym) * pcap);
- params = nb;
- pcap = nc;
- }
- params[pn++] = pp->sym_va_args;
- m->is_variadic = 1;
- ++i;
- } else if (line[i].kind == TOK_IDENT) {
- if (pn == pcap) {
- u32 nc = pcap ? pcap * 2 : 4;
- Sym* nb = arena_array(&pp->arena, Sym, nc);
- if (pcap) memcpy(nb, params, sizeof(Sym) * pcap);
- params = nb;
- pcap = nc;
- }
- params[pn++] = line[i].v.ident;
- ++i;
- } else {
- compiler_panic(pp->c, line[i].loc, "#define: bad parameter list");
- }
- if (i >= n) {
- compiler_panic(pp->c, def_loc,
- "#define: unterminated parameter list");
- }
- if (line[i].kind == TOK_PUNCT && line[i].v.punct == ')') {
- ++i;
- break;
- }
- if (m->is_variadic) {
- compiler_panic(pp->c, line[i].loc,
- "#define: '...' must be last parameter");
- }
- if (line[i].kind == TOK_PUNCT && line[i].v.punct == ',') {
- ++i;
- continue;
- }
- compiler_panic(pp->c, line[i].loc, "#define: expected ',' or ')'");
- }
- }
- m->params = params;
- m->n_params = pn;
- }
-
- /* Refuse define/undef of a few names the spec reserves: `defined`
- * and a small set of mandatory predefined macros. */
- if (name == pp->sym_defined || name == pp->sym_line__ ||
- name == pp->sym_file__ || name == pp->sym_date__ ||
- name == pp->sym_time__) {
- compiler_panic(pp->c, def_loc,
- "#define of a reserved / predefined name is not allowed");
- }
- /* Static predefineds are already in the macro table; redefining
- * with a different body is caught by the existing macros_equal
- * check below, but #define of __STDC__ et al. with the SAME body
- * should also be rejected. */
- if (name == pp->sym_stdc__ || name == pp->sym_stdc_hosted__ ||
- name == pp->sym_stdc_version__) {
- /* Allow re-registration of the predefined value at pp_new time
- * but reject user-level redefinition. We detect "user-level"
- * by checking whether it's already in the table — at pp_new the
- * first call goes through cleanly. */
- if (mt_get(pp, name)) {
- compiler_panic(pp->c, def_loc,
- "#define of a mandatory predefined macro is not allowed");
- }
- }
-
- /* Body: rewrite parameter occurrences to TOK_PP_PARAM. */
- {
- u32 body_n = n - i;
- u32 j;
- m->body = body_n ? arena_array(&pp->arena, Tok, body_n) : NULL;
- m->body_len = body_n;
- for (j = 0; j < body_n; ++j) {
- Tok t = line[i + j];
- if (m->is_func && t.kind == TOK_IDENT) {
- u32 p;
- for (p = 0; p < m->n_params; ++p) {
- if (m->params[p] == t.v.ident) {
- t.kind = TOK_PP_PARAM;
- t.v.punct = p;
- break;
- }
- }
- }
- /* §6.10.3 ¶5: __VA_ARGS__ outside a variadic macro is
- * undefined behavior; we diagnose. */
- if (!m->is_variadic && t.kind == TOK_IDENT &&
- t.v.ident == pp->sym_va_args) {
- compiler_panic(pp->c, t.loc,
- "__VA_ARGS__ may only appear in a variadic macro body");
- }
- m->body[j] = t;
- }
- /* Drop the leading-space bit on the first body token: it reflects
- * the whitespace between the macro name (or close-paren) and the
- * body, which is irrelevant to expansion output. */
- if (m->body_len) m->body[0].flags &= (u16)~TF_HAS_SPACE;
- }
-
- existing = mt_get(pp, name);
- if (existing) {
- if (!macros_equal(existing, m)) {
- compiler_panic(pp->c, def_loc,
- "macro redefined with different replacement");
- }
- return;
- }
- mt_put(pp, name, m);
-}
-
-static void do_undef(Pp* pp, const Tok* line, u32 n) {
- Sym name;
- if (!n || line[0].kind != TOK_IDENT) {
- compiler_panic(pp->c, n ? line[0].loc : (SrcLoc){0, 0, 0},
- "#undef: expected identifier");
- }
- name = line[0].v.ident;
- if (name == pp->sym_defined || name == pp->sym_line__ ||
- name == pp->sym_file__ || name == pp->sym_date__ ||
- name == pp->sym_time__ || name == pp->sym_stdc__ ||
- name == pp->sym_stdc_hosted__ || name == pp->sym_stdc_version__) {
- compiler_panic(pp->c, line[0].loc,
- "#undef of a mandatory predefined name is not allowed");
- }
- mt_del(pp, name);
-}
-
-/* ============================================================
- * Conditional inclusion (§6.10.1)
+ * Public streaming entries
* ============================================================ */
-static void expand_arg_to_eof(Pp* pp, Tok* in, u32 nin, TokVec* out);
-static int peek_for_invoke_paren(Pp* pp, int* ws_has_space_out);
-
-static void if_push(Pp* pp, IfFrame f) {
- if (pp->ifstk_n == pp->ifstk_cap) {
- u32 nc = pp->ifstk_cap ? pp->ifstk_cap * 2 : 4;
- pp->ifstk = pp_xrealloc(pp, pp->ifstk, sizeof(IfFrame) * pp->ifstk_cap,
- sizeof(IfFrame) * nc, _Alignof(IfFrame));
- pp->ifstk_cap = nc;
- }
- pp->ifstk[pp->ifstk_n++] = f;
-}
-
-static IfFrame* if_top(Pp* pp) {
- return pp->ifstk_n ? &pp->ifstk[pp->ifstk_n - 1] : NULL;
-}
-
-static void if_pop(Pp* pp) {
- if (pp->ifstk_n) --pp->ifstk_n;
-}
-
-/* Parse a C integer constant from a pp-number's spelling. Suffixes (u, l,
- * etc.) are ignored. Recognizes decimal, hex (0x...), and octal (0...). */
-static i64 parse_pp_int(const char* s, size_t n) {
- int base = 10;
- size_t i = 0;
- i64 val = 0;
- if (n >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) {
- base = 16;
- i = 2;
- } else if (n >= 1 && s[0] == '0') {
- base = 8;
- i = 1;
- }
- for (; i < n; ++i) {
- char c = s[i];
- int d;
- if (c >= '0' && c <= '9')
- d = c - '0';
- else if (base == 16 && c >= 'a' && c <= 'f')
- d = c - 'a' + 10;
- else if (base == 16 && c >= 'A' && c <= 'F')
- d = c - 'A' + 10;
- else
- break;
- if (d >= base) break;
- val = val * (i64)base + (i64)d;
- }
- return val;
-}
-
-/* Pre-pass: replace `defined X` / `defined ( X )` with a 0/1 pp-number,
- * preserving the rest of the token sequence. The operand of `defined` is
- * NOT macro-expanded. Output is a fresh TokVec. */
-static void prepass_defined(Pp* pp, const Tok* in, u32 nin, TokVec* out) {
- u32 i;
- for (i = 0; i < nin; ++i) {
- if (in[i].kind == TOK_IDENT && in[i].v.ident == pp->sym_defined) {
- int has_paren = 0;
- Sym ident = 0;
- u32 j = i + 1;
- if (j < nin && in[j].kind == TOK_PUNCT && in[j].v.punct == '(') {
- has_paren = 1;
- ++j;
- }
- if (j >= nin || in[j].kind != TOK_IDENT) {
- compiler_panic(pp->c, in[i].loc,
- "operand of 'defined' must be an identifier");
- }
- ident = in[j].v.ident;
- ++j;
- if (has_paren) {
- if (j >= nin || in[j].kind != TOK_PUNCT || in[j].v.punct != ')') {
- compiler_panic(pp->c, in[i].loc,
- "expected ')' after 'defined' operand");
- }
- ++j;
- }
- {
- Tok t;
- memset(&t, 0, sizeof(t));
- t.kind = TOK_NUM;
- t.flags = in[i].flags & (TF_AT_BOL | TF_HAS_SPACE);
- t.loc = in[i].loc;
- t.spelling =
- pool_intern_cstr(pp->c->global, mt_get(pp, ident) ? "1" : "0");
- tv_push(pp, out, t);
- }
- i = j - 1;
- } else {
- tv_push(pp, out, in[i]);
- }
- }
-}
-
-/* Macro-expand a sequence of pre-#if tokens to completion. Wraps the
- * fixed-buffer arg pre-expansion machinery with TOK_IDENT → 0
- * substitution per §6.10.1 ¶4. */
-static void expand_for_if(Pp* pp, const Tok* in, u32 nin, TokVec* out) {
- Tok* slice;
- if (nin == 0) return;
- slice = arena_array(&pp->arena, Tok, nin);
- memcpy(slice, in, sizeof(Tok) * nin);
- expand_arg_to_eof(pp, slice, nin, out);
- /* Replace remaining identifiers with `0`. */
- {
- u32 i;
- Sym zero = pool_intern_cstr(pp->c->global, "0");
- for (i = 0; i < out->n; ++i) {
- if (out->data[i].kind == TOK_IDENT) {
- out->data[i].kind = TOK_NUM;
- out->data[i].spelling = zero;
- }
- }
- }
-}
-
-/* Recursive-descent expression evaluator over an expanded token list. */
-typedef struct EE {
- Pp* pp;
- const Tok* toks;
- u32 n;
- u32 pos;
- SrcLoc loc;
-} EE;
-
-static i64 ee_ternary(EE* e);
-
-static const Tok* ee_peek(EE* e) {
- return e->pos < e->n ? &e->toks[e->pos] : NULL;
-}
-
-static int ee_match_punct(EE* e, u32 p) {
- const Tok* t = ee_peek(e);
- if (t && t->kind == TOK_PUNCT && t->v.punct == p) {
- ++e->pos;
- return 1;
- }
- return 0;
-}
-
-static i64 ee_primary(EE* e) {
- const Tok* t = ee_peek(e);
- if (!t) compiler_panic(e->pp->c, e->loc, "#if: missing operand");
- if (t->kind == TOK_NUM) {
- size_t slen;
- const char* s = pool_str(e->pp->c->global, t->spelling, &slen);
- ++e->pos;
- return parse_pp_int(s, slen);
- }
- if (t->kind == TOK_CHR) {
- /* Treat as the codepoint of the first character (post-decoding
- * not implemented; cover the common case of a single ASCII
- * char). */
- size_t slen;
- const char* s = pool_str(e->pp->c->global, t->spelling, &slen);
- ++e->pos;
- if (slen >= 3 && s[0] == '\'') return (unsigned char)s[1];
- return 0;
- }
- if (t->kind == TOK_PUNCT && t->v.punct == '(') {
- i64 v;
- ++e->pos;
- v = ee_ternary(e);
- if (!ee_match_punct(e, ')')) {
- compiler_panic(e->pp->c, t->loc, "#if: expected ')'");
- }
- return v;
- }
- compiler_panic(e->pp->c, t->loc, "#if: unexpected token in expression");
- return 0;
-}
-
-static i64 ee_unary(EE* e) {
- const Tok* t = ee_peek(e);
- if (t && t->kind == TOK_PUNCT) {
- u32 p = t->v.punct;
- if (p == '!' || p == '-' || p == '+' || p == '~') {
- i64 v;
- ++e->pos;
- v = ee_unary(e);
- switch (p) {
- case '!':
- return v ? 0 : 1;
- case '-':
- return -v;
- case '+':
- return v;
- case '~':
- return ~v;
- }
- }
- }
- return ee_primary(e);
-}
-
-static i64 ee_mul(EE* e) {
- i64 v = ee_unary(e);
- for (;;) {
- const Tok* t = ee_peek(e);
- if (!t || t->kind != TOK_PUNCT) break;
- if (t->v.punct == '*') {
- ++e->pos;
- v = v * ee_unary(e);
- } else if (t->v.punct == '/') {
- i64 r;
- ++e->pos;
- r = ee_unary(e);
- if (r == 0) compiler_panic(e->pp->c, t->loc, "#if: division by zero");
- v = v / r;
- } else if (t->v.punct == '%') {
- i64 r;
- ++e->pos;
- r = ee_unary(e);
- if (r == 0) compiler_panic(e->pp->c, t->loc, "#if: modulo by zero");
- v = v % r;
- } else
- break;
- }
- return v;
-}
-
-static i64 ee_add(EE* e) {
- i64 v = ee_mul(e);
- for (;;) {
- const Tok* t = ee_peek(e);
- if (!t || t->kind != TOK_PUNCT) break;
- if (t->v.punct == '+') {
- ++e->pos;
- v = v + ee_mul(e);
- } else if (t->v.punct == '-') {
- ++e->pos;
- v = v - ee_mul(e);
- } else
- break;
- }
- return v;
-}
-
-static i64 ee_shift(EE* e) {
- i64 v = ee_add(e);
- for (;;) {
- const Tok* t = ee_peek(e);
- if (!t || t->kind != TOK_PUNCT) break;
- if (t->v.punct == P_SHL) {
- ++e->pos;
- v = v << ee_add(e);
- } else if (t->v.punct == P_SHR) {
- ++e->pos;
- v = v >> ee_add(e);
- } else
- break;
- }
- return v;
-}
-
-static i64 ee_rel(EE* e) {
- i64 v = ee_shift(e);
- for (;;) {
- const Tok* t = ee_peek(e);
- if (!t || t->kind != TOK_PUNCT) break;
- if (t->v.punct == '<') {
- ++e->pos;
- v = (v < ee_shift(e));
- } else if (t->v.punct == '>') {
- ++e->pos;
- v = (v > ee_shift(e));
- } else if (t->v.punct == P_LE) {
- ++e->pos;
- v = (v <= ee_shift(e));
- } else if (t->v.punct == P_GE) {
- ++e->pos;
- v = (v >= ee_shift(e));
- } else
- break;
- }
- return v;
-}
-
-static i64 ee_eq(EE* e) {
- i64 v = ee_rel(e);
- for (;;) {
- const Tok* t = ee_peek(e);
- if (!t || t->kind != TOK_PUNCT) break;
- if (t->v.punct == P_EQ) {
- ++e->pos;
- v = (v == ee_rel(e));
- } else if (t->v.punct == P_NE) {
- ++e->pos;
- v = (v != ee_rel(e));
- } else
- break;
- }
- return v;
-}
-
-static i64 ee_band(EE* e) {
- i64 v = ee_eq(e);
- while (ee_match_punct(e, '&')) v = v & ee_eq(e);
- return v;
-}
-
-static i64 ee_bxor(EE* e) {
- i64 v = ee_band(e);
- while (ee_match_punct(e, '^')) v = v ^ ee_band(e);
- return v;
-}
-
-static i64 ee_bor(EE* e) {
- i64 v = ee_bxor(e);
- while (ee_match_punct(e, '|')) v = v | ee_bxor(e);
- return v;
-}
-
-static i64 ee_logand(EE* e) {
- i64 v = ee_bor(e);
- while (ee_match_punct(e, P_AND)) {
- i64 r = ee_bor(e);
- v = (v && r);
- }
- return v;
-}
-
-static i64 ee_logor(EE* e) {
- i64 v = ee_logand(e);
- while (ee_match_punct(e, P_OR)) {
- i64 r = ee_logand(e);
- v = (v || r);
- }
- return v;
-}
-
-static i64 ee_ternary(EE* e) {
- i64 c = ee_logor(e);
- if (ee_match_punct(e, '?')) {
- i64 a = ee_ternary(e);
- i64 b;
- if (!ee_match_punct(e, ':')) {
- compiler_panic(e->pp->c, e->loc, "#if: ':' expected in ternary");
- }
- b = ee_ternary(e);
- return c ? a : b;
- }
- return c;
-}
-
-static i64 eval_if_expr(Pp* pp, const Tok* line, u32 n, SrcLoc loc) {
- TokVec defs = {0};
- TokVec exp = {0};
- EE e;
- i64 v;
-
- prepass_defined(pp, line, n, &defs);
- expand_for_if(pp, defs.data, defs.n, &exp);
-
- e.pp = pp;
- e.toks = exp.data;
- e.n = exp.n;
- e.pos = 0;
- e.loc = loc;
- v = ee_ternary(&e);
- if (e.pos != e.n) {
- compiler_panic(pp->c, e.loc,
- "#if: unexpected trailing tokens in expression");
- }
- return v;
-}
-
-static void consume_to_newline(Pp* pp) {
- Tok t;
- do {
- t = src_next_raw(pp, NULL, NULL);
- } while (t.kind != TOK_NEWLINE && t.kind != TOK_EOF);
-}
-
-/* Drive the source forward consuming tokens until we either:
- * - reach a balancing #endif (pops the frame, returns), or
- * - reach a #elif / #else that flips the top frame to IF_INCLUDE
- * (returns with that frame active).
- * Nested #if directives inside the skipped group are tracked via
- * `local_depth`. Unrecognised directives in skipped groups are tolerated
- * (§6.10 ¶4, covered by `8c_skipped_relaxed_syntax`). */
-static void skip_until_active(Pp* pp) {
- int local_depth = 0;
- while (pp->ifstk_n > 0) {
- IfFrame* top = if_top(pp);
- Tok t;
- if (top->state == IF_INCLUDE && local_depth == 0) return;
- t = src_next_raw(pp, NULL, NULL);
- if (t.kind == TOK_EOF) {
- compiler_panic(pp->c, top->loc, "unterminated #if / #ifdef");
- }
- if (t.kind != TOK_PP_HASH || (t.flags & TF_AT_BOL) == 0) continue;
-
- /* Read directive name (or null directive). */
- {
- Tok nt = src_next_raw(pp, NULL, NULL);
- Sym name;
- if (nt.kind == TOK_NEWLINE || nt.kind == TOK_EOF) continue;
- if (nt.kind != TOK_IDENT) {
- consume_to_newline(pp);
- continue;
- }
- name = nt.v.ident;
- if (name == pp->sym_if || name == pp->sym_ifdef ||
- name == pp->sym_ifndef) {
- ++local_depth;
- consume_to_newline(pp);
- continue;
- }
- if (name == pp->sym_endif) {
- consume_to_newline(pp);
- if (local_depth > 0) {
- --local_depth;
- continue;
- }
- if_pop(pp);
- return;
- }
- if (name == pp->sym_else) {
- consume_to_newline(pp);
- if (local_depth > 0) continue;
- if (top->has_else) {
- compiler_panic(pp->c, t.loc, "duplicate #else");
- }
- top->has_else = 1;
- if (top->state == IF_SEEK_TRUE) {
- top->state = IF_INCLUDE;
- return;
- }
- top->state = IF_DONE;
- continue;
- }
- if (name == pp->sym_elif) {
- if (local_depth > 0 || top->has_else || top->state == IF_DONE) {
- consume_to_newline(pp);
- continue;
- }
- if (top->state == IF_SEEK_TRUE) {
- Tok* line;
- u32 ln;
- i64 v;
- read_directive_line(pp, &line, &ln);
- v = eval_if_expr(pp, line, ln, t.loc);
- if (v != 0) {
- top->state = IF_INCLUDE;
- return;
- }
- continue;
- }
- /* Was IF_INCLUDE; #elif means we're done. (Should already
- * have been transitioned to DONE before entering this
- * skip — defensive.) */
- top->state = IF_DONE;
- consume_to_newline(pp);
- continue;
- }
- /* Other directive — relaxed: skip silently. */
- consume_to_newline(pp);
- continue;
- }
- }
-}
-
-static int is_predefined_macro_name(Pp* pp, Sym name) {
- return name == pp->sym_va_args || name == pp->sym_line__ ||
- name == pp->sym_file__ || name == pp->sym_date__ ||
- name == pp->sym_time__;
- /* __STDC__/__STDC_HOSTED__/__STDC_VERSION__ are registered as real
- * macros, so the macro-table lookup catches them. */
-}
-
-static void do_ifdef(Pp* pp, const Tok* line, u32 n, int negate, SrcLoc loc) {
- int defined;
- IfFrame f;
- if (n < 1 || line[0].kind != TOK_IDENT) {
- compiler_panic(pp->c, loc,
- negate ? "#ifndef: expected identifier"
- : "#ifdef: expected identifier");
- }
- defined = (mt_get(pp, line[0].v.ident) != NULL) ||
- is_predefined_macro_name(pp, line[0].v.ident);
- if (negate) defined = !defined;
- memset(&f, 0, sizeof(f));
- f.state = defined ? IF_INCLUDE : IF_SEEK_TRUE;
- f.loc = loc;
- if_push(pp, f);
- if (!defined) skip_until_active(pp);
-}
-
-static void do_if_directive(Pp* pp, const Tok* line, u32 n, SrcLoc loc) {
- i64 v = eval_if_expr(pp, line, n, loc);
- IfFrame f;
- memset(&f, 0, sizeof(f));
- f.state = v ? IF_INCLUDE : IF_SEEK_TRUE;
- f.loc = loc;
- if_push(pp, f);
- if (!v) skip_until_active(pp);
-}
-
-static void do_elif(Pp* pp, SrcLoc loc) {
- /* We only reach do_elif from the active branch — meaning the
- * preceding group emitted code. So we must skip the rest. */
- IfFrame* top = if_top(pp);
- if (!top) compiler_panic(pp->c, loc, "stray #elif");
- if (top->has_else) compiler_panic(pp->c, loc, "#elif after #else");
- top->state = IF_DONE;
- skip_until_active(pp);
-}
-
-static void do_else(Pp* pp, SrcLoc loc) {
- IfFrame* top = if_top(pp);
- if (!top) compiler_panic(pp->c, loc, "stray #else");
- if (top->has_else) compiler_panic(pp->c, loc, "duplicate #else");
- top->has_else = 1;
- top->state = IF_DONE;
- skip_until_active(pp);
-}
-
-static void do_endif(Pp* pp, SrcLoc loc) {
- if (!if_top(pp)) compiler_panic(pp->c, loc, "stray #endif");
- if_pop(pp);
-}
-
-/* ============================================================
- * #include (§6.10.2)
- * ============================================================ */
-
-/* Read `path` via the host's file_io and copy its bytes into the pp
- * arena so they outlive io->release. Returns 1 on success. */
-static int try_open_include(Pp* pp, const char* path, const u8** data_out,
- size_t* size_out) {
- CfreeFileData fd;
- const CfreeFileIO* io;
- u8* buf;
-
- memset(&fd, 0, sizeof(fd));
- io = pp->c->env->file_io;
- if (!io || !io->read_all) {
- compiler_panic(pp->c, (SrcLoc){0, 0, 0},
- "#include: env.file_io is not configured");
- }
- if (!io->read_all(io->user, path, &fd)) return 0;
- {
- size_t sz = fd.size;
- buf = (u8*)arena_alloc(&pp->arena, sz ? sz : 1, 1);
- if (sz && fd.data) memcpy(buf, fd.data, sz);
- if (io->release) io->release(io->user, &fd); /* zeros fd */
- *data_out = buf;
- *size_out = sz;
- }
- return 1;
-}
-
-/* Return the includer's directory for resolving a quoted include, or "."
- * for in-memory/builtin sources (where CWD is the natural fallback, like
- * gcc treats stdin). `dir_out` must point to a buffer of size >= cap. */
-static int includer_dir(Pp* pp, SrcLoc loc, char* dir_out, size_t cap) {
- const SourceFile* sf = source_file(pp->c->sources, loc.file_id);
- const char* p = NULL;
- size_t plen = 0;
- const char* slash;
- size_t dlen;
- if (sf && sf->name) p = pool_str(pp->c->global, sf->name, &plen);
- if (!p || plen == 0 || p[0] == '<') {
- if (cap < 2) return 0;
- dir_out[0] = '.';
- dir_out[1] = 0;
- return 1;
- }
- slash = NULL;
- {
- size_t i;
- for (i = plen; i > 0; --i) {
- if (p[i - 1] == '/') {
- slash = p + i - 1;
- break;
- }
- }
- }
- if (!slash) {
- if (cap < 2) return 0;
- dir_out[0] = '.';
- dir_out[1] = 0;
- return 1;
- }
- dlen = (size_t)(slash - p);
- if (dlen == 0) dlen = 1; /* path was "/x" — dir is "/" */
- if (dlen + 1 > cap) return 0;
- memcpy(dir_out, p, dlen);
- dir_out[dlen] = 0;
- return 1;
-}
-
-/* Search for a header. Absolute paths are opened verbatim. Quoted form
- * ("...") additionally searches the directory of the file containing the
- * #include first (per C §6.10.2); bracket form (<...>) skips that step.
- * Both forms then walk the configured -I / -isystem dirs in order. */
-static int find_and_open_include(Pp* pp, const char* path, int system,
- SrcLoc loc, const u8** data, size_t* size,
- char* resolved, size_t resolved_cap) {
- char buf[4096];
- u32 i;
- size_t plen = strlen(path);
-
- if (plen > 0 && path[0] == '/') {
- if (try_open_include(pp, path, data, size)) {
- if (plen + 1 > resolved_cap) return 0;
- memcpy(resolved, path, plen + 1);
- return 1;
- }
- return 0;
- }
-
- if (!system) {
- char dir[4096];
- if (includer_dir(pp, loc, dir, sizeof(dir))) {
- size_t dlen = strlen(dir);
- if (dlen + 1 + plen + 1 <= sizeof(buf)) {
- memcpy(buf, dir, dlen);
- buf[dlen] = '/';
- memcpy(buf + dlen + 1, path, plen);
- buf[dlen + 1 + plen] = 0;
- if (try_open_include(pp, buf, data, size)) {
- if (dlen + 1 + plen + 1 > resolved_cap) return 0;
- memcpy(resolved, buf, dlen + 1 + plen + 1);
- return 1;
- }
- }
- }
- }
- for (i = 0; i < pp->ninc_dirs; ++i) {
- const char* d = pp->inc_dirs[i].path;
- size_t dlen = strlen(d);
- if (dlen + 1 + plen + 1 > sizeof(buf)) continue;
- memcpy(buf, d, dlen);
- buf[dlen] = '/';
- memcpy(buf + dlen + 1, path, plen);
- buf[dlen + 1 + plen] = 0;
- if (try_open_include(pp, buf, data, size)) {
- if (dlen + 1 + plen + 1 > resolved_cap) return 0;
- memcpy(resolved, buf, dlen + 1 + plen + 1);
- return 1;
- }
- }
- return 0;
-}
-
-/* Parse the directive arguments into (path, system_flag). Handles:
- * - directly-lexed TOK_HEADER: < ... > or " ... "
- * - macro-replaced form: line is macro-expanded, then expected to
- * produce either a TOK_STR ("...") or a < ... > sequence. */
-static void parse_include_path(Pp* pp, const Tok* line, u32 n, SrcLoc loc,
- char* path_out, size_t cap, int* system_out) {
- if (n == 0) compiler_panic(pp->c, loc, "#include: missing path");
-
- if (line[0].kind == TOK_HEADER) {
- size_t slen = 0;
- const char* s = pool_str(pp->c->global, line[0].spelling, &slen);
- if (slen < 2) compiler_panic(pp->c, loc, "#include: malformed header name");
- if (s[0] == '<' && s[slen - 1] == '>')
- *system_out = 1;
- else if (s[0] == '"' && s[slen - 1] == '"')
- *system_out = 0;
- else
- compiler_panic(pp->c, loc, "#include: malformed header name");
- if (slen - 2 + 1 > cap)
- compiler_panic(pp->c, loc, "#include: path too long");
- memcpy(path_out, s + 1, slen - 2);
- path_out[slen - 2] = 0;
- return;
- }
-
- /* Macro-replaced form. */
- {
- TokVec exp = {0};
- Tok* slice = arena_array(&pp->arena, Tok, n);
- memcpy(slice, line, sizeof(Tok) * n);
- expand_arg_to_eof(pp, slice, n, &exp);
-
- if (exp.n == 0) {
- compiler_panic(pp->c, loc, "#include: empty after macro replacement");
- }
- if (exp.data[0].kind == TOK_STR) {
- size_t slen = 0;
- const char* s = pool_str(pp->c->global, exp.data[0].spelling, &slen);
- if (slen < 2 || s[0] != '"' || s[slen - 1] != '"') {
- compiler_panic(pp->c, loc, "#include: malformed string");
- }
- if (slen - 2 + 1 > cap) {
- compiler_panic(pp->c, loc, "#include: path too long");
- }
- memcpy(path_out, s + 1, slen - 2);
- path_out[slen - 2] = 0;
- *system_out = 0;
- return;
- }
- if (exp.data[0].kind == TOK_PUNCT && exp.data[0].v.punct == '<') {
- size_t pos = 0;
- u32 i;
- for (i = 1; i < exp.n; ++i) {
- size_t slen = 0;
- const char* s = NULL;
- if (exp.data[i].kind == TOK_PUNCT && exp.data[i].v.punct == '>') {
- break;
- }
- if (exp.data[i].spelling) {
- s = pool_str(pp->c->global, exp.data[i].spelling, &slen);
- }
- if (s && pos + slen + 1 <= cap) {
- memcpy(path_out + pos, s, slen);
- pos += slen;
- }
- }
- path_out[pos] = 0;
- *system_out = 1;
- return;
- }
- compiler_panic(pp->c, loc,
- "#include: expected \"...\" or <...> after expansion");
- }
-}
-
-static void do_include(Pp* pp, const Tok* line, u32 n, SrcLoc loc) {
- char path[4096];
- char resolved[4096];
- int system_form = 0;
- const u8* data;
- size_t size;
- Lexer* lex;
- u32 includer_id = 0;
- u32 included_id;
- u32 i;
- TokSrc s;
-
- parse_include_path(pp, line, n, loc, path, sizeof(path), &system_form);
-
- if (!find_and_open_include(pp, path, system_form, loc, &data, &size, resolved,
- sizeof(resolved))) {
- compiler_panic(pp->c, loc, "#include: file not found: %s", path);
- }
-
- /* Walk the source stack to find the current includer's file_id. */
- for (i = pp->nsources; i > 0; --i) {
- TokSrc* tp = &pp->sources[i - 1];
- if (tp->kind == SRC_LEX && tp->lex) {
- includer_id = lex_file_id(tp->lex);
- break;
- }
- }
-
- lex = lex_open_mem(pp->c, resolved, (const char*)data, size);
- included_id = lex_file_id(lex);
-
- memset(&s, 0, sizeof(s));
- s.kind = SRC_LEX;
- s.lex = lex;
- src_push(pp, s);
-
- source_add_include(pp->c->sources, includer_id, included_id, loc,
- system_form);
-}
-
-/* ============================================================
- * #line (§6.10.4)
- * ============================================================ */
-
-/* Find the topmost SRC_LEX source on the stack — that's the "current
- * file" whose line/file should track #line directives. */
-static TokSrc* current_lex_src(Pp* pp) {
- u32 i;
- for (i = pp->nsources; i > 0; --i) {
- TokSrc* s = &pp->sources[i - 1];
- if (s->kind == SRC_LEX) return s;
- }
- return NULL;
-}
-
-static void do_line(Pp* pp, const Tok* line, u32 n, SrcLoc loc) {
- /* Macro-replace arguments first (a2). */
- TokVec exp = {0};
- Tok* slice;
- TokSrc* lex_src;
- i64 target_line;
- Sym target_file = 0;
-
- if (n == 0) compiler_panic(pp->c, loc, "#line: missing arguments");
- slice = arena_array(&pp->arena, Tok, n);
- memcpy(slice, line, sizeof(Tok) * n);
- expand_arg_to_eof(pp, slice, n, &exp);
-
- if (exp.n == 0 || exp.data[0].kind != TOK_NUM) {
- compiler_panic(pp->c, loc, "#line: expected line number");
- }
- {
- size_t sl = 0;
- const char* s = pool_str(pp->c->global, exp.data[0].spelling, &sl);
- target_line = parse_pp_int(s, sl);
- }
- if (exp.n >= 2) {
- if (exp.data[1].kind != TOK_STR) {
- compiler_panic(pp->c, loc, "#line: file argument must be a string");
- }
- {
- size_t sl = 0;
- const char* s = pool_str(pp->c->global, exp.data[1].spelling, &sl);
- if (sl >= 2 && s[0] == '"' && s[sl - 1] == '"') {
- target_file = pool_intern(pp->c->global, s + 1, sl - 2);
- }
- }
- }
-
- lex_src = current_lex_src(pp);
- if (!lex_src) compiler_panic(pp->c, loc, "#line outside any file");
- {
- /* The next token (post-directive-NL) currently has lex.line ==
- * <lex's line counter>. Set delta so its user-visible line ==
- * target_line. */
- SrcLoc here = lex_loc(lex_src->lex);
- lex_src->line_delta = (i32)target_line - (i32)here.line;
- if (target_file) lex_src->file_override = target_file;
- }
-}
-
-/* ============================================================
- * #pragma + _Pragma (§6.10.6, §6.10.9)
- * ============================================================ */
-
-/* Push the unmodified directive line back onto the source stack as a
- * buffer, so pp_emit_text writes it as-is. SRC_BUF gates directive
- * recognition off, so this won't recurse. */
-static void emit_pragma_line(Pp* pp, const Tok* line, u32 n, SrcLoc loc) {
- TokVec out = {0};
- HidesetId* hids;
- u32 i;
- Tok hash, ident, nl;
-
- memset(&hash, 0, sizeof(hash));
- hash.kind = TOK_PP_HASH;
- hash.flags = TF_AT_BOL;
- hash.loc = loc;
- hash.spelling = pool_intern_cstr(pp->c->global, "#");
- tv_push(pp, &out, hash);
-
- memset(&ident, 0, sizeof(ident));
- ident.kind = TOK_IDENT;
- ident.flags = 0;
- ident.loc = loc;
- ident.spelling = pp->sym_pragma_kw;
- ident.v.ident = pp->sym_pragma_kw;
- tv_push(pp, &out, ident);
-
- for (i = 0; i < n; ++i) {
- Tok t = line[i];
- /* Force a leading space between tokens. */
- t.flags |= TF_HAS_SPACE;
- if (i == 0) {
- /* Space between "pragma" and the first arg. */
- }
- tv_push(pp, &out, t);
- }
-
- memset(&nl, 0, sizeof(nl));
- nl.kind = TOK_NEWLINE;
- nl.loc = loc;
- tv_push(pp, &out, nl);
-
- hids = arena_array(&pp->arena, HidesetId, out.n ? out.n : 1);
- for (i = 0; i < out.n; ++i) hids[i] = HS_EMPTY;
- push_buf(pp, out.data, hids, out.n);
-}
-
-static void do_pragma(Pp* pp, const Tok* line, u32 n, SrcLoc loc) {
- /* Forward unrecognised pragmas to the output. STDC pragmas pass
- * through too; we don't act on them yet. */
- emit_pragma_line(pp, line, n, loc);
-}
-
-/* Destringize a string literal token's content: strip surrounding quotes
- * and undo the `\"` and `\\` escapes. Other escape sequences pass
- * through verbatim — the result is fed back through the lexer, which
- * does its own escape handling for any string literals nested inside. */
-static void destringize(Pp* pp, const Tok* str_tok, char* out, size_t cap,
- size_t* out_len) {
- size_t slen = 0;
- const char* s = pool_str(pp->c->global, str_tok->spelling, &slen);
- size_t i, w = 0;
- if (slen < 2 || s[0] != '"' || s[slen - 1] != '"') {
- compiler_panic(pp->c, str_tok->loc,
- "_Pragma: argument must be a string literal");
- }
- for (i = 1; i + 1 < slen; ++i) {
- char c = s[i];
- if (c == '\\' && i + 2 < slen && (s[i + 1] == '\\' || s[i + 1] == '"')) {
- ++i;
- c = s[i];
- }
- if (w + 1 >= cap)
- compiler_panic(pp->c, str_tok->loc, "_Pragma: payload too long");
- out[w++] = c;
- }
- out[w] = 0;
- *out_len = w;
-}
-
-/* Handle a `_Pragma("...")` invocation. Caller has consumed the
- * `_Pragma` identifier. Reads `(` STR `)`, destringizes, re-lexes the
- * payload, and emits a #pragma directive line. */
-static int try_expand_pragma_op(Pp* pp, const Tok* invoke) {
- Tok lp, str, rp;
- char buf[1024];
- size_t buf_n = 0;
- Lexer* lex;
- TokVec args = {0};
-
- /* Peek '(' (skipping NL). Use peek_for_invoke_paren for consistency,
- * but we need the saved-back behavior for a non-match. */
- {
- int saw_ws;
- if (!peek_for_invoke_paren(pp, &saw_ws)) {
- return 0; /* not an invocation; emit _Pragma as ident */
- }
- (void)saw_ws;
- }
- /* Read the string literal arg. */
- {
- HidesetId hs;
- str = src_next_raw(pp, &hs, NULL);
- }
- if (str.kind != TOK_STR) {
- compiler_panic(pp->c, invoke->loc, "_Pragma: expected string literal");
- }
- {
- HidesetId hs;
- rp = src_next_raw(pp, &hs, NULL);
- }
- if (rp.kind != TOK_PUNCT || rp.v.punct != ')') {
- compiler_panic(pp->c, invoke->loc, "_Pragma: expected ')'");
- }
- (void)lp;
-
- destringize(pp, &str, buf, sizeof(buf) - 2, &buf_n);
- /* Append a NL so the lexer terminates cleanly. */
- buf[buf_n++] = '\n';
- buf[buf_n] = 0;
-
- /* Re-lex into args. Bytes need to live until lex_close; copy into
- * arena. */
- {
- char* arena_buf = (char*)arena_alloc(&pp->arena, buf_n + 1, 1);
- memcpy(arena_buf, buf, buf_n + 1);
- lex = lex_open_mem(pp->c, "<_Pragma>", arena_buf, buf_n);
- }
- for (;;) {
- Tok t = lex_next(lex);
- if (t.kind == TOK_EOF || t.kind == TOK_NEWLINE) break;
- tv_push(pp, &args, t);
- }
- lex_close(lex);
-
- emit_pragma_line(pp, args.data, args.n, invoke->loc);
- return 1;
-}
-
-/* ============================================================
- * #error
- * ============================================================ */
-
-static void do_error(Pp* pp, const Tok* line, u32 n, SrcLoc loc) {
- /* Concatenate token spellings into a single message. */
- CharBuf cb = {0};
- u32 i;
- for (i = 0; i < n; ++i) {
- size_t sl = 0;
- const char* s = line[i].spelling
- ? pool_str(pp->c->global, line[i].spelling, &sl)
- : NULL;
- if (i > 0) cb_putc(pp, &cb, ' ');
- if (s && sl) cb_append(pp, &cb, s, (u32)sl);
- }
- cb_putc(pp, &cb, 0);
- compiler_panic(pp->c, loc, "#error: %s", cb.data ? cb.data : "");
-}
-
-/* ============================================================
- * #embed (C23, §6.10.* per N3033)
- * ============================================================ */
-
-static void do_embed(Pp* pp, const Tok* line, u32 n, SrcLoc loc) {
- char path[4096];
- char resolved[4096];
- int system_form = 0;
- const u8* data;
- size_t size;
- u32 j;
- /* Optional embed parameters parsed below. */
- i64 limit_n = -1;
- Tok* if_empty_toks = NULL;
- u32 if_empty_n = 0;
- /* Header-name path: first token. */
- u32 arg_start = 0;
-
- if (n == 0) compiler_panic(pp->c, loc, "#embed: missing path");
-
- if (line[0].kind == TOK_HEADER) {
- size_t sl = 0;
- const char* s = pool_str(pp->c->global, line[0].spelling, &sl);
- if (sl < 2) compiler_panic(pp->c, loc, "#embed: malformed header name");
- if (s[0] == '<' && s[sl - 1] == '>')
- system_form = 1;
- else if (s[0] == '"' && s[sl - 1] == '"')
- system_form = 0;
- else
- compiler_panic(pp->c, loc, "#embed: malformed header name");
- memcpy(path, s + 1, sl - 2);
- path[sl - 2] = 0;
- arg_start = 1;
- } else {
- compiler_panic(pp->c, loc, "#embed: header-name argument required");
- }
-
- /* Parse trailing parameters: limit(N), if_empty(...). */
- j = arg_start;
- while (j < n) {
- if (line[j].kind == TOK_IDENT) {
- size_t sl = 0;
- const char* s = pool_str(pp->c->global, line[j].v.ident, &sl);
- if (sl == 5 && memcmp(s, "limit", 5) == 0) {
- if (j + 1 >= n || line[j + 1].kind != TOK_PUNCT ||
- line[j + 1].v.punct != '(') {
- compiler_panic(pp->c, loc, "#embed: expected '(' after limit");
- }
- j += 2;
- if (j >= n || line[j].kind != TOK_NUM) {
- compiler_panic(pp->c, loc, "#embed: limit() expects an integer");
- }
- {
- size_t sl2 = 0;
- const char* s2 = pool_str(pp->c->global, line[j].spelling, &sl2);
- limit_n = parse_pp_int(s2, sl2);
- }
- ++j;
- if (j >= n || line[j].kind != TOK_PUNCT || line[j].v.punct != ')') {
- compiler_panic(pp->c, loc, "#embed: expected ')' to close limit");
- }
- ++j;
- continue;
- }
- if (sl == 8 && memcmp(s, "if_empty", 8) == 0) {
- u32 depth = 0;
- u32 start;
- if (j + 1 >= n || line[j + 1].kind != TOK_PUNCT ||
- line[j + 1].v.punct != '(') {
- compiler_panic(pp->c, loc, "#embed: expected '(' after if_empty");
- }
- j += 2;
- start = j;
- while (j < n) {
- if (line[j].kind == TOK_PUNCT) {
- if (line[j].v.punct == '(')
- ++depth;
- else if (line[j].v.punct == ')') {
- if (depth == 0) break;
- --depth;
- }
- }
- ++j;
- }
- if (j >= n) {
- compiler_panic(pp->c, loc, "#embed: unterminated if_empty");
- }
- if_empty_toks = arena_array(&pp->arena, Tok, j - start ? j - start : 1);
- if_empty_n = j - start;
- memcpy(if_empty_toks, line + start, sizeof(Tok) * if_empty_n);
- ++j; /* skip ')' */
- continue;
- }
- }
- compiler_panic(pp->c, loc, "#embed: unexpected token in parameter list");
- }
-
- if (!find_and_open_include(pp, path, system_form, loc, &data, &size, resolved,
- sizeof(resolved))) {
- compiler_panic(pp->c, loc, "#embed: file not found: %s", path);
- }
-
- /* Apply limit(). */
- {
- size_t emit_n = size;
- if (limit_n >= 0 && (u64)limit_n < emit_n) emit_n = (size_t)limit_n;
- if (emit_n == 0) {
- /* Empty: emit if_empty payload (or nothing). */
- if (if_empty_toks && if_empty_n) {
- HidesetId* hids = arena_array(&pp->arena, HidesetId, if_empty_n);
- u32 i;
- for (i = 0; i < if_empty_n; ++i) hids[i] = HS_EMPTY;
- push_buf(pp, if_empty_toks, hids, if_empty_n);
- }
- return;
- }
- /* Build a buffer of pp-numbers separated by ',' punctuators. */
- {
- TokVec out = {0};
- HidesetId* hids;
- size_t i;
- for (i = 0; i < emit_n; ++i) {
- char numbuf[8];
- int nl = 0;
- u8 v = data[i];
- /* "u8 -> decimal" without sprintf. */
- if (v == 0) {
- numbuf[nl++] = '0';
- } else {
- char tmp[4];
- int k = 0;
- while (v) {
- tmp[k++] = (char)('0' + (v % 10));
- v /= 10;
- }
- while (k > 0) numbuf[nl++] = tmp[--k];
- }
- {
- Tok t;
- memset(&t, 0, sizeof(t));
- t.kind = TOK_NUM;
- t.loc = loc;
- t.spelling = pool_intern(pp->c->global, numbuf, (size_t)nl);
- if (i == 0) t.flags = TF_AT_BOL;
- /* Bytes after a comma get a leading space to match
- * clang's `, ` separator format. */
- else
- t.flags = TF_HAS_SPACE;
- tv_push(pp, &out, t);
- }
- if (i + 1 < emit_n) {
- Tok comma;
- memset(&comma, 0, sizeof(comma));
- comma.kind = TOK_PUNCT;
- comma.v.punct = ',';
- comma.loc = loc;
- comma.spelling = pool_intern_cstr(pp->c->global, ",");
- tv_push(pp, &out, comma);
- }
- }
- hids = arena_array(&pp->arena, HidesetId, out.n ? out.n : 1);
- {
- u32 k;
- for (k = 0; k < out.n; ++k) hids[k] = HS_EMPTY;
- }
- push_buf(pp, out.data, hids, out.n);
- }
- }
-}
-
-/* ============================================================
- * Directive dispatch
- * ============================================================ */
-
-static void process_directive(Pp* pp, SrcLoc hash_loc) {
- Tok* line;
- u32 n;
- Sym name;
-
- read_directive_line(pp, &line, &n);
- if (n == 0) {
- /* Null directive: '#' newline. Nothing to do. */
- return;
- }
- if (line[0].kind != TOK_IDENT) {
- compiler_panic(pp->c, line[0].loc, "expected directive name after '#'");
- }
- name = line[0].v.ident;
- if (name == pp->sym_define)
- do_define(pp, line + 1, n - 1);
- else if (name == pp->sym_undef)
- do_undef(pp, line + 1, n - 1);
- else if (name == pp->sym_if)
- do_if_directive(pp, line + 1, n - 1, hash_loc);
- else if (name == pp->sym_ifdef)
- do_ifdef(pp, line + 1, n - 1, 0, hash_loc);
- else if (name == pp->sym_ifndef)
- do_ifdef(pp, line + 1, n - 1, 1, hash_loc);
- else if (name == pp->sym_elif)
- do_elif(pp, hash_loc);
- else if (name == pp->sym_else)
- do_else(pp, hash_loc);
- else if (name == pp->sym_endif)
- do_endif(pp, hash_loc);
- else if (name == pp->sym_include)
- do_include(pp, line + 1, n - 1, hash_loc);
- else if (name == pp->sym_line)
- do_line(pp, line + 1, n - 1, hash_loc);
- else if (name == pp->sym_pragma)
- do_pragma(pp, line + 1, n - 1, hash_loc);
- else if (name == pp->sym_error)
- do_error(pp, line + 1, n - 1, hash_loc);
- else if (name == pp->sym_embed)
- do_embed(pp, line + 1, n - 1, hash_loc);
- else {
- compiler_panic(pp->c, line[0].loc, "unsupported directive");
- }
-}
-
-/* ============================================================
- * Macro expansion
- * ============================================================ */
-
-static Tok pp_next_raw(Pp* pp);
-static void subst_phase2(Pp* pp, const Tok* in, u32 nin, const Tok* invoke,
- TokVec* out);
-
-/* Build a buffer of the macro's body (with hidesets) and push it. The
- * first expanded token inherits the invocation token's TF_AT_BOL /
- * TF_HAS_SPACE so output formatting matches the invocation site. */
-static void expand_object_macro(Pp* pp, const Macro* m, const Tok* invoke,
- HidesetId invoke_hs) {
- TokVec body = {0};
- Tok* tmp;
- HidesetId hs;
- HidesetId* hids;
- u32 i;
-
- if (m->body_len == 0) {
- return; /* placemarker: nothing to push */
- }
- /* Run the body through the paste phase: object-like macros may use
- * `##`. There are no parameters, so phase 1 reduces to a copy. */
- tmp = arena_array(&pp->arena, Tok, m->body_len);
- for (i = 0; i < m->body_len; ++i) tmp[i] = m->body[i];
- subst_phase2(pp, tmp, m->body_len, invoke, &body);
-
- if (body.n == 0) return;
-
- /* Transfer invocation flags onto the first emitted token. */
- body.data[0].flags =
- (u16)((body.data[0].flags & ~(TF_AT_BOL | TF_HAS_SPACE)) |
- (invoke->flags & (TF_AT_BOL | TF_HAS_SPACE)));
- for (i = 0; i < body.n; ++i) body.data[i].loc = invoke->loc;
-
- hs = hs_add(pp, invoke_hs, m->name);
- hids = arena_array(&pp->arena, HidesetId, body.n);
- for (i = 0; i < body.n; ++i) hids[i] = hs;
- push_buf(pp, body.data, hids, body.n);
-}
-
-/* ============================================================
- * Function-like macro expansion
- * ============================================================ */
-
-/* Peek for an open paren after the just-consumed identifier (which named
- * a function-like macro). Newlines are whitespace inside an invocation.
- * Returns 1 with `*ws_has_space_out` indicating whether any whitespace
- * (newlines or HAS_SPACE) sat between the ident and the `(`. Returns 0 if
- * no `(` follows; pushed-back tokens (NLs + the non-`(` token, if any)
- * are restored as a buffer source so subsequent reads still see them. */
-static int peek_for_invoke_paren(Pp* pp, int* ws_has_space_out) {
- TokVec saved = {0};
- int saw_ws = 0;
- Tok t;
- HidesetId hs;
-
- for (;;) {
- t = src_next_raw(pp, &hs, NULL);
- if (t.kind == TOK_NEWLINE) {
- saw_ws = 1;
- tv_push(pp, &saved, t);
- continue;
- }
- if (t.kind == TOK_EOF) {
- /* No '(' — push back saved tokens, leave EOF for next read. */
- if (saved.n) push_buf(pp, saved.data, NULL, saved.n);
- *ws_has_space_out = saw_ws;
- return 0;
- }
- if (t.flags & TF_HAS_SPACE) saw_ws = 1;
- if (t.kind == TOK_PUNCT && t.v.punct == '(') {
- /* Consumed. The newlines we walked past are whitespace and
- * dropped (per spec); they don't go back on the stack. */
- *ws_has_space_out = saw_ws;
- return 1;
- }
- /* Save this non-`(` token too and push back. */
- tv_push(pp, &saved, t);
- push_buf(pp, saved.data, NULL, saved.n);
- *ws_has_space_out = saw_ws;
- return 0;
- }
-}
-
-/* Run macro expansion on a fixed token sequence to completion, yielding the
- * fully-expanded token sequence. Used to pre-expand each function-macro
- * argument before substitution (§6.10.3.1 ¶1). */
-static void expand_arg_to_eof(Pp* pp, Tok* in, u32 nin, TokVec* out) {
- TokSrc src;
- Tok t;
-
- memset(&src, 0, sizeof(src));
- src.kind = SRC_BUF;
- src.scope_top = 1;
- src.toks = in;
- src.hs = NULL;
- src.n = nin;
- src_push(pp, src);
-
- for (;;) {
- t = pp_next_raw(pp); /* drives macro expansion within this scope */
- if (t.kind == TOK_EOF) break;
- if (t.kind == TOK_NEWLINE) {
- /* Newlines inside an arg act as whitespace; convert to
- * "next-token has TF_HAS_SPACE". Drop the NL token itself. */
- continue;
- }
- tv_push(pp, out, t);
- }
- /* Pop our scope source. */
- --pp->nsources;
-}
-
-/* Argument list for a function-like invocation. Stored as parallel
- * (start, end) ranges into a flat unexpanded token vector and a flat
- * expanded token vector. */
-typedef struct ArgList {
- /* Unexpanded arg tokens (raw as collected from invocation). */
- Tok* raw;
- u32 raw_n;
- u32* raw_start; /* size n_args + 1 (sentinel = raw_n) */
- /* Pre-expanded tokens. */
- Tok* exp;
- u32 exp_n;
- u32* exp_start; /* size n_args + 1 (sentinel = exp_n) */
- u32 n_args;
-} ArgList;
-
-/* Collect arguments. Caller has just consumed the opening `(`. Returns the
- * close-paren's token (used as the invocation's last source location). */
-static Tok read_invocation_args(Pp* pp, const Macro* m, SrcLoc invoke_loc,
- ArgList* out) {
- TokVec raw = {0};
- u32* starts;
- u32 starts_cap = 0;
- u32 n_args = 0;
- u32 cur_start = 0;
- int depth = 0;
- Tok t;
- HidesetId hs;
- int first_token_of_arg = 1;
- Tok close_tok;
-
- memset(out, 0, sizeof(*out));
- starts = arena_array(&pp->arena, u32, 8);
- starts_cap = 8;
- starts[0] = 0;
-
- for (;;) {
- t = src_next_raw(pp, &hs, NULL);
- if (t.kind == TOK_EOF) {
- compiler_panic(pp->c, invoke_loc,
- "unterminated function-like macro invocation");
- }
- if (t.kind == TOK_NEWLINE) {
- /* Whitespace within an invocation. Mark the next token as
- * having space; drop the NL. */
- if (raw.n && depth >= 0) {
- /* No-op token list; we'll OR onto the next pushed token. */
- }
- /* Use a sentinel: track via a flag on a deferred push. We
- * accumulate "has_space" by setting it on the next pushed
- * token. */
- /* Simpler: just push a placeholder by OR'ing onto next via
- * a flag stored in `first_token_of_arg`-style state. */
- /* Implementation: use the next read token's TF_HAS_SPACE bit,
- * which the lexer already sets after a NL. Actually NOT —
- * after a NL the lexer sets TF_AT_BOL on the next token, not
- * HAS_SPACE necessarily. Force it: */
- /* We'll OR it manually onto the next token. */
- /* Use a small flag stash: */
- /* (handled below by setting a pending flag) */
- /* See: pending_space variable */
- /* — commit: declare a pending_space static earlier. */
- continue;
- }
-
- if (t.kind == TOK_PUNCT) {
- u32 p = t.v.punct;
- if (p == '(' || p == '[' || p == '{') {
- ++depth;
- } else if (p == ')' || p == ']' || p == '}') {
- if (p == ')' && depth == 0) {
- /* End of invocation. Close the current argument. The
- * empty-args case (no commas seen, no tokens
- * collected) emits a slot only when the macro expects
- * at least one argument; arity-0 macros take none. */
- close_tok = t;
- {
- int empty_call =
- (n_args == 0 && raw.n == cur_start && first_token_of_arg);
- int want_slot = !empty_call || (m->n_params > 0) || m->is_variadic;
- if (want_slot) {
- if (n_args + 1 >= starts_cap) {
- u32 nc = starts_cap * 2;
- u32* nb = arena_array(&pp->arena, u32, nc);
- memcpy(nb, starts, sizeof(u32) * starts_cap);
- starts = nb;
- starts_cap = nc;
- }
- ++n_args;
- starts[n_args] = raw.n;
- }
- }
- goto done;
- }
- --depth;
- } else if (p == ',' && depth == 0) {
- /* Variadic: once we've filled all named params, the rest
- * (commas included) collect into __VA_ARGS__. */
- if (m->is_variadic && n_args + 1 >= m->n_params) {
- /* This comma is part of __VA_ARGS__. Push it. */
- tv_push(pp, &raw, t);
- first_token_of_arg = 0;
- continue;
- }
- /* Close current arg, start next. */
- if (n_args + 1 >= starts_cap) {
- u32 nc = starts_cap * 2;
- u32* nb = arena_array(&pp->arena, u32, nc);
- memcpy(nb, starts, sizeof(u32) * starts_cap);
- starts = nb;
- starts_cap = nc;
- }
- ++n_args;
- starts[n_args] = raw.n;
- cur_start = raw.n;
- first_token_of_arg = 1;
- continue;
- }
- }
- tv_push(pp, &raw, t);
- first_token_of_arg = 0;
- (void)hs; /* hideset of raw arg tokens carried for blue-paint
- * propagation in the arg's pre-expansion */
- }
-done:
- /* Validate arity. */
- {
- u32 expected = m->n_params;
- if (m->is_variadic) {
- if (n_args < (expected ? expected - 1 : 0)) {
- /* Allow exactly expected-1 (empty __VA_ARGS__) by
- * synthesizing an empty trailing arg. */
- if (n_args + 1 == (expected ? expected - 1 : 0)) {
- /* off by one — fall through to error */
- }
- compiler_panic(pp->c, invoke_loc,
- "too few arguments to variadic macro invocation");
- }
- /* Synthesize an empty __VA_ARGS__ if caller passed exactly
- * the named-parameter count. */
- if (n_args + 1 == expected) {
- if (n_args + 1 >= starts_cap) {
- u32 nc = starts_cap * 2;
- u32* nb = arena_array(&pp->arena, u32, nc);
- memcpy(nb, starts, sizeof(u32) * starts_cap);
- starts = nb;
- starts_cap = nc;
- }
- ++n_args;
- starts[n_args] = raw.n;
- }
- } else {
- if (n_args != expected) {
- /* Spec: arity-0 macro `M()` invoked as `M()` is allowed and
- * has 0 args. Above logic produces 0 in that case. */
- compiler_panic(pp->c, invoke_loc,
- "wrong number of arguments to function-like macro");
- }
- }
- }
- out->raw = raw.data;
- out->raw_n = raw.n;
- out->raw_start = starts;
- out->n_args = n_args;
- return close_tok;
-}
-
-/* Build pre-expanded args. */
-static void preexpand_args(Pp* pp, ArgList* a) {
- TokVec exp = {0};
- u32* exp_start;
- u32 i;
- exp_start = arena_array(&pp->arena, u32, a->n_args + 1);
- exp_start[0] = 0;
- for (i = 0; i < a->n_args; ++i) {
- u32 lo = a->raw_start[i];
- u32 hi = a->raw_start[i + 1];
- if (hi > lo) {
- /* Copy the slice into a fresh buffer so expand_arg_to_eof can
- * own it without aliasing. */
- Tok* slice = arena_array(&pp->arena, Tok, hi - lo);
- memcpy(slice, &a->raw[lo], sizeof(Tok) * (hi - lo));
- expand_arg_to_eof(pp, slice, hi - lo, &exp);
- }
- exp_start[i + 1] = exp.n;
- }
- a->exp = exp.data;
- a->exp_n = exp.n;
- a->exp_start = exp_start;
-}
-
-/* Build a stringized TOK_STR from the unexpanded argument tokens
- * `arg[lo..hi)`. The first token's leading-space flag is ignored (leading
- * whitespace stripped). Inside string/char-literal spellings, '"' and '\'
- * are escaped. */
-static Tok make_stringize(Pp* pp, const Tok* arg, u32 lo, u32 hi, SrcLoc loc) {
- CharBuf b = {0};
- u32 i;
- Tok t;
- Sym sp;
-
- cb_putc(pp, &b, '"');
- for (i = lo; i < hi; ++i) {
- const Tok* at = &arg[i];
- size_t slen = 0;
- const char* s =
- at->spelling ? pool_str(pp->c->global, at->spelling, &slen) : NULL;
- if (i > lo && (at->flags & TF_HAS_SPACE)) cb_putc(pp, &b, ' ');
- if (s && slen) {
- int esc = (at->kind == TOK_STR || at->kind == TOK_CHR);
- size_t k;
- for (k = 0; k < slen; ++k) {
- char c = s[k];
- if (esc && (c == '\\' || c == '"')) cb_putc(pp, &b, '\\');
- cb_putc(pp, &b, c);
- }
- }
- }
- cb_putc(pp, &b, '"');
-
- sp = pool_intern(pp->c->global, b.data, b.len);
- memset(&t, 0, sizeof(t));
- t.kind = TOK_STR;
- t.loc = loc;
- t.spelling = sp;
- t.v.str = sp;
- return t;
-}
-
-/* Concatenate two token spellings and re-lex into a single token. Empty
- * (placemarker) sides collapse to the other side per §6.10.3.3 ¶2. */
-static Tok paste_tokens(Pp* pp, Tok lhs, Tok rhs, SrcLoc loc) {
- char buf[1024];
- size_t alen = 0, blen = 0;
- const char* a;
- const char* b;
- Lexer* lex;
- Tok t1, t2;
-
- if (lhs.kind == TOK_PP_PLACEMARKER) return rhs;
- if (rhs.kind == TOK_PP_PLACEMARKER) return lhs;
-
- a = lhs.spelling ? pool_str(pp->c->global, lhs.spelling, &alen) : "";
- b = rhs.spelling ? pool_str(pp->c->global, rhs.spelling, &blen) : "";
- if (alen + blen + 2 > sizeof(buf)) {
- compiler_panic(pp->c, loc, "token paste: spelling too long");
- }
- if (alen) memcpy(buf, a, alen);
- if (blen) memcpy(buf + alen, b, blen);
- buf[alen + blen] = '\n';
- buf[alen + blen + 1] = 0;
-
- lex = lex_open_mem(pp->c, "<paste>", buf, alen + blen + 1);
- t1 = lex_next(lex);
- t2 = lex_next(lex);
- if (t1.kind == TOK_EOF) {
- /* Both empty (shouldn't reach here since we handled placemarkers). */
- lex_close(lex);
- return lhs;
- }
- if (t2.kind != TOK_NEWLINE && t2.kind != TOK_EOF) {
- lex_close(lex);
- compiler_panic(pp->c, loc, "token pasting yields multiple tokens, invalid");
- }
- lex_close(lex);
-
- /* Inherit positional flags from LHS (it sat in the same slot). */
- t1.flags = (u16)((t1.flags & ~(TF_AT_BOL | TF_HAS_SPACE)) |
- (lhs.flags & (TF_AT_BOL | TF_HAS_SPACE)));
- t1.loc = loc;
- return t1;
-}
-
-/* Phase 1 (param substitution). For each parameter occurrence in the
- * body: if adjacent to ## or # (handled separately), substitute the raw
- * argument tokens; otherwise substitute the pre-expanded form. Empty raw
- * args become a TOK_PP_PLACEMARKER which phase 2 collapses. */
-static void subst_phase1(Pp* pp, const Macro* m, ArgList* a, const Tok* invoke,
- TokVec* out) {
- u32 j;
- for (j = 0; j < m->body_len; ++j) {
- const Tok* bt = &m->body[j];
- if (bt->kind == TOK_PP_HASH) {
- /* §6.10.3.2: # must be followed by a parameter. */
- if (j + 1 >= m->body_len || m->body[j + 1].kind != TOK_PP_PARAM) {
- compiler_panic(pp->c, bt->loc,
- "'#' is not followed by a macro parameter");
- }
- {
- u32 p = m->body[j + 1].v.punct;
- u32 lo = a->raw_start[p];
- u32 hi = a->raw_start[p + 1];
- Tok s = make_stringize(pp, a->raw, lo, hi, invoke->loc);
- s.flags = (u16)((s.flags & ~(TF_AT_BOL | TF_HAS_SPACE)) |
- (bt->flags & (TF_AT_BOL | TF_HAS_SPACE)));
- tv_push(pp, out, s);
- ++j;
- continue;
- }
- }
- if (bt->kind == TOK_PP_PARAM) {
- u32 p = bt->v.punct;
- int adj_paste =
- (j > 0 && m->body[j - 1].kind == TOK_PP_PASTE) ||
- (j + 1 < m->body_len && m->body[j + 1].kind == TOK_PP_PASTE);
-
- u32 lo, hi;
- if (adj_paste) {
- lo = a->raw_start[p];
- hi = a->raw_start[p + 1];
- } else {
- lo = a->exp_start[p];
- hi = a->exp_start[p + 1];
- }
-
- if (lo == hi) {
- /* Empty argument → placemarker. */
- Tok pm;
- memset(&pm, 0, sizeof(pm));
- pm.kind = TOK_PP_PLACEMARKER;
- pm.flags = bt->flags & (TF_AT_BOL | TF_HAS_SPACE);
- pm.loc = invoke->loc;
- tv_push(pp, out, pm);
- } else {
- u32 k;
- int first = 1;
- Tok* src = adj_paste ? a->raw : a->exp;
- for (k = lo; k < hi; ++k) {
- Tok t = src[k];
- if (first) {
- t.flags = (u16)((t.flags & ~(TF_AT_BOL | TF_HAS_SPACE)) |
- (bt->flags & (TF_AT_BOL | TF_HAS_SPACE)));
- first = 0;
- }
- tv_push(pp, out, t);
- }
- }
- continue;
- }
- tv_push(pp, out, *bt);
- }
-}
-
-/* Phase 2 (paste). Walk the post-substitute buffer; for each TOK_PP_PASTE,
- * splice the previous output token with the next input token. Then strip
- * remaining placemarkers. */
-static void subst_phase2(Pp* pp, const Tok* in, u32 nin, const Tok* invoke,
- TokVec* out) {
- u32 i;
- for (i = 0; i < nin; ++i) {
- Tok t = in[i];
- if (t.kind == TOK_PP_PASTE) {
- Tok lhs, rhs;
- if (out->n == 0 || i + 1 >= nin) {
- compiler_panic(pp->c, invoke->loc,
- "'##' at start or end of replacement list");
- }
- lhs = out->data[--out->n];
- rhs = in[++i];
- tv_push(pp, out, paste_tokens(pp, lhs, rhs, invoke->loc));
- continue;
- }
- tv_push(pp, out, t);
- }
- /* Strip placemarkers, preserving leading-space flag on the next token. */
- {
- u32 r = 0, w = 0;
- u16 carry = 0;
- for (r = 0; r < out->n; ++r) {
- if (out->data[r].kind == TOK_PP_PLACEMARKER) {
- carry |= out->data[r].flags & (TF_AT_BOL | TF_HAS_SPACE);
- continue;
- }
- if (carry) {
- out->data[r].flags |= carry;
- carry = 0;
- }
- if (w != r) out->data[w] = out->data[r];
- ++w;
- }
- out->n = w;
- }
-}
-
-/* Wrapper: phases 1 and 2 in sequence, plus invocation-loc / flag transfer. */
-static void substitute_body(Pp* pp, const Macro* m, ArgList* a,
- const Tok* invoke, HidesetId result_hs, TokVec* out,
- TokVec* hs_out) {
- TokVec phase1 = {0};
- u32 i;
- subst_phase1(pp, m, a, invoke, &phase1);
- subst_phase2(pp, phase1.data, phase1.n, invoke, out);
- /* Invocation flags onto first emitted token. */
- if (out->n) {
- out->data[0].flags =
- (u16)((out->data[0].flags & ~(TF_AT_BOL | TF_HAS_SPACE)) |
- (invoke->flags & (TF_AT_BOL | TF_HAS_SPACE)));
- }
- /* Locations to invocation site. */
- for (i = 0; i < out->n; ++i) out->data[i].loc = invoke->loc;
- /* Build parallel hideset vector. */
- for (i = 0; i < out->n; ++i) {
- Tok hsmark;
- memset(&hsmark, 0, sizeof(hsmark));
- hsmark.spelling = (Sym)result_hs;
- tv_push(pp, hs_out, hsmark);
- }
-}
-
-/* Expand a function-like macro invocation: peek for `(`, collect args,
- * pre-expand them, substitute the body, push the result. Returns 1 if
- * the invocation was performed, 0 if there was no `(` (the caller should
- * emit the identifier as-is). */
-static int try_expand_func_macro(Pp* pp, const Macro* m, const Tok* invoke,
- HidesetId invoke_hs) {
- int saw_ws;
- ArgList args;
- TokVec body = {0};
- TokVec hsvec = {0}; /* parallel to body, holds HidesetId per slot */
- HidesetId result_hs;
- Tok close_tok;
-
- if (!peek_for_invoke_paren(pp, &saw_ws)) {
- return 0;
- }
- (void)saw_ws;
- read_invocation_args(pp, m, invoke->loc, &args);
- /* Note: assigned to silence unused-result; we don't use the close tok yet. */
- close_tok.kind = 0;
- (void)close_tok;
- preexpand_args(pp, &args);
-
- /* Hideset of result = invocation hideset ∪ {macro_name}. The standard
- * intersects with the closing `)`'s hideset for blue-paint purity, but
- * for the freshly-collected `)` from the lex source that's the empty
- * set, so the union form suffices here. */
- result_hs = hs_add(pp, invoke_hs, m->name);
- substitute_body(pp, m, &args, invoke, result_hs, &body, &hsvec);
-
- {
- u32 i;
- HidesetId* hids = arena_array(&pp->arena, HidesetId, body.n ? body.n : 1);
- for (i = 0; i < body.n; ++i) {
- hids[i] = (HidesetId)hsvec.data[i].spelling;
- }
- push_buf(pp, body.data, hids, body.n);
- }
- return 1;
-}
-
-/* ============================================================
- * Public streaming entries
- * ============================================================ */
-
-/* pp_next_raw: reads from the top source, applies macro expansion when an
- * identifier names a macro that isn't blue-painted, and consumes
- * directives in-place. TOK_NEWLINE is preserved for pp_emit_text. */
-static Tok pp_next_raw(Pp* pp) {
- Tok t;
- HidesetId hs;
- u8 src_kind;
- for (;;) {
- t = src_next_raw(pp, &hs, &src_kind);
- if (t.kind == TOK_EOF) return t;
- if (t.kind == TOK_PP_HASH && (t.flags & TF_AT_BOL) && src_kind == SRC_LEX) {
- process_directive(pp, t.loc);
- /* No synthesized newline: the comparator collapses
- * whitespace, so blank-line replacement of consumed
- * directives isn't observable here. Directives that produce
- * content (e.g. #include, #embed, #pragma) push their own
- * tokens onto the source stack, which the next loop
- * iteration picks up. */
- continue;
- }
- if (t.kind == TOK_IDENT && (t.flags & TF_NO_EXPAND) == 0) {
- Sym id = t.v.ident;
-
- /* Dynamic predefined macros: __LINE__ / __FILE__ /
- * __DATE__ / __TIME__. Always expand, ignoring the macro
- * table. */
- if (id == pp->sym_line__) {
- char tmp[16], buf[16];
- int k = 0, j = 0;
- u32 ln = t.loc.line;
- if (ln == 0)
- buf[k++] = '0';
- else {
- while (ln) {
- tmp[j++] = (char)('0' + ln % 10);
- ln /= 10;
- }
- while (j > 0) buf[k++] = tmp[--j];
- }
- t.kind = TOK_NUM;
- t.spelling = pool_intern(pp->c->global, buf, (size_t)k);
- return t;
- }
- if (id == pp->sym_file__) {
- TokSrc* ls = current_lex_src(pp);
- Sym name = 0;
- size_t nlen = 0;
- const char* nstr = NULL;
- char* buf;
- if (ls && ls->file_override) {
- name = ls->file_override;
- } else if (ls) {
- const SourceFile* sf =
- source_file(pp->c->sources, lex_file_id(ls->lex));
- if (sf) name = sf->name;
- }
- if (name) nstr = pool_str(pp->c->global, name, &nlen);
- buf = (char*)arena_alloc(&pp->arena, nlen + 2, 1);
- buf[0] = '"';
- if (nlen) memcpy(buf + 1, nstr, nlen);
- buf[nlen + 1] = '"';
- t.kind = TOK_STR;
- t.spelling = pool_intern(pp->c->global, buf, nlen + 2);
- t.v.str = t.spelling;
- return t;
- }
- if (id == pp->sym_date__) {
- t.kind = TOK_STR;
- t.spelling = pp->val_date_str;
- t.v.str = t.spelling;
- return t;
- }
- if (id == pp->sym_time__) {
- t.kind = TOK_STR;
- t.spelling = pp->val_time_str;
- t.v.str = t.spelling;
- return t;
- }
- if (id == pp->sym__pragma) {
- if (try_expand_pragma_op(pp, &t)) continue;
- /* No '(' — fall through and emit as plain ident. */
- }
-
- {
- Macro* m = mt_get(pp, id);
- if (m && !hs_contains(pp, hs, m->name)) {
- if (!m->is_func) {
- expand_object_macro(pp, m, &t, hs);
- continue;
- }
- if (try_expand_func_macro(pp, m, &t, hs)) {
- continue;
- }
- /* No '(' followed; emit as plain identifier. */
- }
- }
- }
- return t;
- }
-}
-
Tok pp_next(Pp* pp) {
/* Public: filter newlines so consumers like the C parser don't need
* to handle them. pp_emit_text uses pp_next_raw via its own loop. */
diff --git a/src/pp/pp_directive.c b/src/pp/pp_directive.c
@@ -0,0 +1,1252 @@
+/* pp_directive.c — if-stack, PP expression evaluator, #include search/open,
+ * #line, #pragma, #error, #embed, and directive dispatch. */
+
+#include "pp/pp_priv.h"
+
+/* ============================================================
+ * If-stack
+ * ============================================================ */
+
+static void if_push(Pp* pp, IfFrame f) {
+ if (pp->ifstk_n == pp->ifstk_cap) {
+ u32 nc = pp->ifstk_cap ? pp->ifstk_cap * 2 : 4;
+ pp->ifstk = pp_xrealloc(pp, pp->ifstk, sizeof(IfFrame) * pp->ifstk_cap,
+ sizeof(IfFrame) * nc, _Alignof(IfFrame));
+ pp->ifstk_cap = nc;
+ }
+ pp->ifstk[pp->ifstk_n++] = f;
+}
+
+static IfFrame* if_top(Pp* pp) {
+ return pp->ifstk_n ? &pp->ifstk[pp->ifstk_n - 1] : NULL;
+}
+
+static void if_pop(Pp* pp) {
+ if (pp->ifstk_n) --pp->ifstk_n;
+}
+
+/* ============================================================
+ * Directive line reader
+ * ============================================================ */
+
+/* Read tokens up through (and including) the next TOK_NEWLINE / TOK_EOF.
+ * Drops the newline; collected tokens are arena-allocated and returned via
+ * *out_toks/out_n. */
+void read_directive_line(Pp* pp, Tok** out_toks, u32* out_n) {
+ Tok* buf = NULL;
+ u32 cap = 0, n = 0;
+ Tok t;
+ HidesetId hs;
+ for (;;) {
+ t = src_next_raw(pp, &hs, NULL);
+ if (t.kind == TOK_NEWLINE || t.kind == TOK_EOF) break;
+ if (n == cap) {
+ u32 nc = cap ? cap * 2 : 8;
+ Tok* nb = (Tok*)arena_alloc(&pp->arena, sizeof(Tok) * nc, _Alignof(Tok));
+ if (cap) memcpy(nb, buf, sizeof(Tok) * cap);
+ buf = nb;
+ cap = nc;
+ }
+ buf[n++] = t;
+ }
+ *out_toks = buf;
+ *out_n = n;
+}
+
+/* ============================================================
+ * PP expression evaluator (§6.10.1)
+ * ============================================================ */
+
+/* Parse a C integer constant from a pp-number's spelling. Suffixes (u, l,
+ * etc.) are ignored. Recognizes decimal, hex (0x...), and octal (0...). */
+static i64 parse_pp_int(const char* s, size_t n) {
+ int base = 10;
+ size_t i = 0;
+ i64 val = 0;
+ if (n >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) {
+ base = 16;
+ i = 2;
+ } else if (n >= 1 && s[0] == '0') {
+ base = 8;
+ i = 1;
+ }
+ for (; i < n; ++i) {
+ char c = s[i];
+ int d;
+ if (c >= '0' && c <= '9')
+ d = c - '0';
+ else if (base == 16 && c >= 'a' && c <= 'f')
+ d = c - 'a' + 10;
+ else if (base == 16 && c >= 'A' && c <= 'F')
+ d = c - 'A' + 10;
+ else
+ break;
+ if (d >= base) break;
+ val = val * (i64)base + (i64)d;
+ }
+ return val;
+}
+
+/* Pre-pass: replace `defined X` / `defined ( X )` with a 0/1 pp-number,
+ * preserving the rest of the token sequence. The operand of `defined` is
+ * NOT macro-expanded. Output is a fresh TokVec. */
+static void prepass_defined(Pp* pp, const Tok* in, u32 nin, TokVec* out) {
+ u32 i;
+ for (i = 0; i < nin; ++i) {
+ if (in[i].kind == TOK_IDENT && in[i].v.ident == pp->sym_defined) {
+ int has_paren = 0;
+ Sym ident = 0;
+ u32 j = i + 1;
+ if (j < nin && in[j].kind == TOK_PUNCT && in[j].v.punct == '(') {
+ has_paren = 1;
+ ++j;
+ }
+ if (j >= nin || in[j].kind != TOK_IDENT) {
+ compiler_panic(pp->c, in[i].loc,
+ "operand of 'defined' must be an identifier");
+ }
+ ident = in[j].v.ident;
+ ++j;
+ if (has_paren) {
+ if (j >= nin || in[j].kind != TOK_PUNCT || in[j].v.punct != ')') {
+ compiler_panic(pp->c, in[i].loc,
+ "expected ')' after 'defined' operand");
+ }
+ ++j;
+ }
+ {
+ Tok t;
+ memset(&t, 0, sizeof(t));
+ t.kind = TOK_NUM;
+ t.flags = in[i].flags & (TF_AT_BOL | TF_HAS_SPACE);
+ t.loc = in[i].loc;
+ t.spelling =
+ pool_intern_cstr(pp->c->global, mt_get(pp, ident) ? "1" : "0");
+ tv_push(pp, out, t);
+ }
+ i = j - 1;
+ } else {
+ tv_push(pp, out, in[i]);
+ }
+ }
+}
+
+/* Macro-expand a sequence of pre-#if tokens to completion. Wraps the
+ * fixed-buffer arg pre-expansion machinery with TOK_IDENT → 0
+ * substitution per §6.10.1 ¶4. */
+static void expand_for_if(Pp* pp, const Tok* in, u32 nin, TokVec* out) {
+ Tok* slice;
+ if (nin == 0) return;
+ slice = arena_array(&pp->arena, Tok, nin);
+ memcpy(slice, in, sizeof(Tok) * nin);
+ expand_arg_to_eof(pp, slice, nin, out);
+ /* Replace remaining identifiers with `0`. */
+ {
+ u32 i;
+ Sym zero = pool_intern_cstr(pp->c->global, "0");
+ for (i = 0; i < out->n; ++i) {
+ if (out->data[i].kind == TOK_IDENT) {
+ out->data[i].kind = TOK_NUM;
+ out->data[i].spelling = zero;
+ }
+ }
+ }
+}
+
+/* Recursive-descent expression evaluator over an expanded token list. */
+typedef struct EE {
+ Pp* pp;
+ const Tok* toks;
+ u32 n;
+ u32 pos;
+ SrcLoc loc;
+} EE;
+
+static i64 ee_ternary(EE* e);
+
+static const Tok* ee_peek(EE* e) {
+ return e->pos < e->n ? &e->toks[e->pos] : NULL;
+}
+
+static int ee_match_punct(EE* e, u32 p) {
+ const Tok* t = ee_peek(e);
+ if (t && t->kind == TOK_PUNCT && t->v.punct == p) {
+ ++e->pos;
+ return 1;
+ }
+ return 0;
+}
+
+static i64 ee_primary(EE* e) {
+ const Tok* t = ee_peek(e);
+ if (!t) compiler_panic(e->pp->c, e->loc, "#if: missing operand");
+ if (t->kind == TOK_NUM) {
+ size_t slen;
+ const char* s = pool_str(e->pp->c->global, t->spelling, &slen);
+ ++e->pos;
+ return parse_pp_int(s, slen);
+ }
+ if (t->kind == TOK_CHR) {
+ /* Treat as the codepoint of the first character (post-decoding
+ * not implemented; cover the common case of a single ASCII
+ * char). */
+ size_t slen;
+ const char* s = pool_str(e->pp->c->global, t->spelling, &slen);
+ ++e->pos;
+ if (slen >= 3 && s[0] == '\'') return (unsigned char)s[1];
+ return 0;
+ }
+ if (t->kind == TOK_PUNCT && t->v.punct == '(') {
+ i64 v;
+ ++e->pos;
+ v = ee_ternary(e);
+ if (!ee_match_punct(e, ')')) {
+ compiler_panic(e->pp->c, t->loc, "#if: expected ')'");
+ }
+ return v;
+ }
+ compiler_panic(e->pp->c, t->loc, "#if: unexpected token in expression");
+ return 0;
+}
+
+static i64 ee_unary(EE* e) {
+ const Tok* t = ee_peek(e);
+ if (t && t->kind == TOK_PUNCT) {
+ u32 p = t->v.punct;
+ if (p == '!' || p == '-' || p == '+' || p == '~') {
+ i64 v;
+ ++e->pos;
+ v = ee_unary(e);
+ switch (p) {
+ case '!':
+ return v ? 0 : 1;
+ case '-':
+ return -v;
+ case '+':
+ return v;
+ case '~':
+ return ~v;
+ }
+ }
+ }
+ return ee_primary(e);
+}
+
+static i64 ee_mul(EE* e) {
+ i64 v = ee_unary(e);
+ for (;;) {
+ const Tok* t = ee_peek(e);
+ if (!t || t->kind != TOK_PUNCT) break;
+ if (t->v.punct == '*') {
+ ++e->pos;
+ v = v * ee_unary(e);
+ } else if (t->v.punct == '/') {
+ i64 r;
+ ++e->pos;
+ r = ee_unary(e);
+ if (r == 0) compiler_panic(e->pp->c, t->loc, "#if: division by zero");
+ v = v / r;
+ } else if (t->v.punct == '%') {
+ i64 r;
+ ++e->pos;
+ r = ee_unary(e);
+ if (r == 0) compiler_panic(e->pp->c, t->loc, "#if: modulo by zero");
+ v = v % r;
+ } else
+ break;
+ }
+ return v;
+}
+
+static i64 ee_add(EE* e) {
+ i64 v = ee_mul(e);
+ for (;;) {
+ const Tok* t = ee_peek(e);
+ if (!t || t->kind != TOK_PUNCT) break;
+ if (t->v.punct == '+') {
+ ++e->pos;
+ v = v + ee_mul(e);
+ } else if (t->v.punct == '-') {
+ ++e->pos;
+ v = v - ee_mul(e);
+ } else
+ break;
+ }
+ return v;
+}
+
+static i64 ee_shift(EE* e) {
+ i64 v = ee_add(e);
+ for (;;) {
+ const Tok* t = ee_peek(e);
+ if (!t || t->kind != TOK_PUNCT) break;
+ if (t->v.punct == P_SHL) {
+ ++e->pos;
+ v = v << ee_add(e);
+ } else if (t->v.punct == P_SHR) {
+ ++e->pos;
+ v = v >> ee_add(e);
+ } else
+ break;
+ }
+ return v;
+}
+
+static i64 ee_rel(EE* e) {
+ i64 v = ee_shift(e);
+ for (;;) {
+ const Tok* t = ee_peek(e);
+ if (!t || t->kind != TOK_PUNCT) break;
+ if (t->v.punct == '<') {
+ ++e->pos;
+ v = (v < ee_shift(e));
+ } else if (t->v.punct == '>') {
+ ++e->pos;
+ v = (v > ee_shift(e));
+ } else if (t->v.punct == P_LE) {
+ ++e->pos;
+ v = (v <= ee_shift(e));
+ } else if (t->v.punct == P_GE) {
+ ++e->pos;
+ v = (v >= ee_shift(e));
+ } else
+ break;
+ }
+ return v;
+}
+
+static i64 ee_eq(EE* e) {
+ i64 v = ee_rel(e);
+ for (;;) {
+ const Tok* t = ee_peek(e);
+ if (!t || t->kind != TOK_PUNCT) break;
+ if (t->v.punct == P_EQ) {
+ ++e->pos;
+ v = (v == ee_rel(e));
+ } else if (t->v.punct == P_NE) {
+ ++e->pos;
+ v = (v != ee_rel(e));
+ } else
+ break;
+ }
+ return v;
+}
+
+static i64 ee_band(EE* e) {
+ i64 v = ee_eq(e);
+ while (ee_match_punct(e, '&')) v = v & ee_eq(e);
+ return v;
+}
+
+static i64 ee_bxor(EE* e) {
+ i64 v = ee_band(e);
+ while (ee_match_punct(e, '^')) v = v ^ ee_band(e);
+ return v;
+}
+
+static i64 ee_bor(EE* e) {
+ i64 v = ee_bxor(e);
+ while (ee_match_punct(e, '|')) v = v | ee_bxor(e);
+ return v;
+}
+
+static i64 ee_logand(EE* e) {
+ i64 v = ee_bor(e);
+ while (ee_match_punct(e, P_AND)) {
+ i64 r = ee_bor(e);
+ v = (v && r);
+ }
+ return v;
+}
+
+static i64 ee_logor(EE* e) {
+ i64 v = ee_logand(e);
+ while (ee_match_punct(e, P_OR)) {
+ i64 r = ee_logand(e);
+ v = (v || r);
+ }
+ return v;
+}
+
+static i64 ee_ternary(EE* e) {
+ i64 c = ee_logor(e);
+ if (ee_match_punct(e, '?')) {
+ i64 a = ee_ternary(e);
+ i64 b;
+ if (!ee_match_punct(e, ':')) {
+ compiler_panic(e->pp->c, e->loc, "#if: ':' expected in ternary");
+ }
+ b = ee_ternary(e);
+ return c ? a : b;
+ }
+ return c;
+}
+
+i64 eval_if_expr(Pp* pp, const Tok* line, u32 n, SrcLoc loc) {
+ TokVec defs = {0};
+ TokVec exp = {0};
+ EE e;
+ i64 v;
+
+ prepass_defined(pp, line, n, &defs);
+ expand_for_if(pp, defs.data, defs.n, &exp);
+
+ e.pp = pp;
+ e.toks = exp.data;
+ e.n = exp.n;
+ e.pos = 0;
+ e.loc = loc;
+ v = ee_ternary(&e);
+ if (e.pos != e.n) {
+ compiler_panic(pp->c, e.loc,
+ "#if: unexpected trailing tokens in expression");
+ }
+ return v;
+}
+
+/* ============================================================
+ * Conditional inclusion helpers
+ * ============================================================ */
+
+static void consume_to_newline(Pp* pp) {
+ Tok t;
+ do {
+ t = src_next_raw(pp, NULL, NULL);
+ } while (t.kind != TOK_NEWLINE && t.kind != TOK_EOF);
+}
+
+/* Drive the source forward consuming tokens until we either:
+ * - reach a balancing #endif (pops the frame, returns), or
+ * - reach a #elif / #else that flips the top frame to IF_INCLUDE
+ * (returns with that frame active).
+ * Nested #if directives inside the skipped group are tracked via
+ * `local_depth`. Unrecognised directives in skipped groups are tolerated
+ * (§6.10 ¶4, covered by `8c_skipped_relaxed_syntax`). */
+static void skip_until_active(Pp* pp) {
+ int local_depth = 0;
+ while (pp->ifstk_n > 0) {
+ IfFrame* top = if_top(pp);
+ Tok t;
+ if (top->state == IF_INCLUDE && local_depth == 0) return;
+ t = src_next_raw(pp, NULL, NULL);
+ if (t.kind == TOK_EOF) {
+ compiler_panic(pp->c, top->loc, "unterminated #if / #ifdef");
+ }
+ if (t.kind != TOK_PP_HASH || (t.flags & TF_AT_BOL) == 0) continue;
+
+ /* Read directive name (or null directive). */
+ {
+ Tok nt = src_next_raw(pp, NULL, NULL);
+ Sym name;
+ if (nt.kind == TOK_NEWLINE || nt.kind == TOK_EOF) continue;
+ if (nt.kind != TOK_IDENT) {
+ consume_to_newline(pp);
+ continue;
+ }
+ name = nt.v.ident;
+ if (name == pp->sym_if || name == pp->sym_ifdef ||
+ name == pp->sym_ifndef) {
+ ++local_depth;
+ consume_to_newline(pp);
+ continue;
+ }
+ if (name == pp->sym_endif) {
+ consume_to_newline(pp);
+ if (local_depth > 0) {
+ --local_depth;
+ continue;
+ }
+ if_pop(pp);
+ return;
+ }
+ if (name == pp->sym_else) {
+ consume_to_newline(pp);
+ if (local_depth > 0) continue;
+ if (top->has_else) {
+ compiler_panic(pp->c, t.loc, "duplicate #else");
+ }
+ top->has_else = 1;
+ if (top->state == IF_SEEK_TRUE) {
+ top->state = IF_INCLUDE;
+ return;
+ }
+ top->state = IF_DONE;
+ continue;
+ }
+ if (name == pp->sym_elif) {
+ if (local_depth > 0 || top->has_else || top->state == IF_DONE) {
+ consume_to_newline(pp);
+ continue;
+ }
+ if (top->state == IF_SEEK_TRUE) {
+ Tok* line;
+ u32 ln;
+ i64 v;
+ read_directive_line(pp, &line, &ln);
+ v = eval_if_expr(pp, line, ln, t.loc);
+ if (v != 0) {
+ top->state = IF_INCLUDE;
+ return;
+ }
+ continue;
+ }
+ /* Was IF_INCLUDE; #elif means we're done. (Should already
+ * have been transitioned to DONE before entering this
+ * skip — defensive.) */
+ top->state = IF_DONE;
+ consume_to_newline(pp);
+ continue;
+ }
+ /* Other directive — relaxed: skip silently. */
+ consume_to_newline(pp);
+ continue;
+ }
+ }
+}
+
+/* ============================================================
+ * Predefined macro name guard
+ * ============================================================ */
+
+static int is_predefined_macro_name(Pp* pp, Sym name) {
+ return name == pp->sym_va_args || name == pp->sym_line__ ||
+ name == pp->sym_file__ || name == pp->sym_date__ ||
+ name == pp->sym_time__;
+ /* __STDC__/__STDC_HOSTED__/__STDC_VERSION__ are registered as real
+ * macros, so the macro-table lookup catches them. */
+}
+
+/* ============================================================
+ * #ifdef / #if / #elif / #else / #endif
+ * ============================================================ */
+
+static void do_ifdef(Pp* pp, const Tok* line, u32 n, int negate, SrcLoc loc) {
+ int defined;
+ IfFrame f;
+ if (n < 1 || line[0].kind != TOK_IDENT) {
+ compiler_panic(pp->c, loc,
+ negate ? "#ifndef: expected identifier"
+ : "#ifdef: expected identifier");
+ }
+ defined = (mt_get(pp, line[0].v.ident) != NULL) ||
+ is_predefined_macro_name(pp, line[0].v.ident);
+ if (negate) defined = !defined;
+ memset(&f, 0, sizeof(f));
+ f.state = defined ? IF_INCLUDE : IF_SEEK_TRUE;
+ f.loc = loc;
+ if_push(pp, f);
+ if (!defined) skip_until_active(pp);
+}
+
+static void do_if_directive(Pp* pp, const Tok* line, u32 n, SrcLoc loc) {
+ i64 v = eval_if_expr(pp, line, n, loc);
+ IfFrame f;
+ memset(&f, 0, sizeof(f));
+ f.state = v ? IF_INCLUDE : IF_SEEK_TRUE;
+ f.loc = loc;
+ if_push(pp, f);
+ if (!v) skip_until_active(pp);
+}
+
+static void do_elif(Pp* pp, SrcLoc loc) {
+ /* We only reach do_elif from the active branch — meaning the
+ * preceding group emitted code. So we must skip the rest. */
+ IfFrame* top = if_top(pp);
+ if (!top) compiler_panic(pp->c, loc, "stray #elif");
+ if (top->has_else) compiler_panic(pp->c, loc, "#elif after #else");
+ top->state = IF_DONE;
+ skip_until_active(pp);
+}
+
+static void do_else(Pp* pp, SrcLoc loc) {
+ IfFrame* top = if_top(pp);
+ if (!top) compiler_panic(pp->c, loc, "stray #else");
+ if (top->has_else) compiler_panic(pp->c, loc, "duplicate #else");
+ top->has_else = 1;
+ top->state = IF_DONE;
+ skip_until_active(pp);
+}
+
+static void do_endif(Pp* pp, SrcLoc loc) {
+ if (!if_top(pp)) compiler_panic(pp->c, loc, "stray #endif");
+ if_pop(pp);
+}
+
+/* ============================================================
+ * #include (§6.10.2)
+ * ============================================================ */
+
+/* Read `path` via the host's file_io and copy its bytes into the pp
+ * arena so they outlive io->release. Returns 1 on success. */
+static int try_open_include(Pp* pp, const char* path, const u8** data_out,
+ size_t* size_out) {
+ CfreeFileData fd;
+ const CfreeFileIO* io;
+ u8* buf;
+
+ memset(&fd, 0, sizeof(fd));
+ io = pp->c->env->file_io;
+ if (!io || !io->read_all) {
+ compiler_panic(pp->c, (SrcLoc){0, 0, 0},
+ "#include: env.file_io is not configured");
+ }
+ if (!io->read_all(io->user, path, &fd)) return 0;
+ {
+ size_t sz = fd.size;
+ buf = (u8*)arena_alloc(&pp->arena, sz ? sz : 1, 1);
+ if (sz && fd.data) memcpy(buf, fd.data, sz);
+ if (io->release) io->release(io->user, &fd); /* zeros fd */
+ *data_out = buf;
+ *size_out = sz;
+ }
+ return 1;
+}
+
+/* Return the includer's directory for resolving a quoted include, or "."
+ * for in-memory/builtin sources (where CWD is the natural fallback, like
+ * gcc treats stdin). `dir_out` must point to a buffer of size >= cap. */
+static int includer_dir(Pp* pp, SrcLoc loc, char* dir_out, size_t cap) {
+ const SourceFile* sf = source_file(pp->c->sources, loc.file_id);
+ const char* p = NULL;
+ size_t plen = 0;
+ const char* slash;
+ size_t dlen;
+ if (sf && sf->name) p = pool_str(pp->c->global, sf->name, &plen);
+ if (!p || plen == 0 || p[0] == '<') {
+ if (cap < 2) return 0;
+ dir_out[0] = '.';
+ dir_out[1] = 0;
+ return 1;
+ }
+ slash = NULL;
+ {
+ size_t i;
+ for (i = plen; i > 0; --i) {
+ if (p[i - 1] == '/') {
+ slash = p + i - 1;
+ break;
+ }
+ }
+ }
+ if (!slash) {
+ if (cap < 2) return 0;
+ dir_out[0] = '.';
+ dir_out[1] = 0;
+ return 1;
+ }
+ dlen = (size_t)(slash - p);
+ if (dlen == 0) dlen = 1; /* path was "/x" — dir is "/" */
+ if (dlen + 1 > cap) return 0;
+ memcpy(dir_out, p, dlen);
+ dir_out[dlen] = 0;
+ return 1;
+}
+
+/* Search for a header. Absolute paths are opened verbatim. Quoted form
+ * ("...") additionally searches the directory of the file containing the
+ * #include first (per C §6.10.2); bracket form (<...>) skips that step.
+ * Both forms then walk the configured -I / -isystem dirs in order. */
+static int find_and_open_include(Pp* pp, const char* path, int system,
+ SrcLoc loc, const u8** data, size_t* size,
+ char* resolved, size_t resolved_cap) {
+ char buf[4096];
+ u32 i;
+ size_t plen = strlen(path);
+
+ if (plen > 0 && path[0] == '/') {
+ if (try_open_include(pp, path, data, size)) {
+ if (plen + 1 > resolved_cap) return 0;
+ memcpy(resolved, path, plen + 1);
+ return 1;
+ }
+ return 0;
+ }
+
+ if (!system) {
+ char dir[4096];
+ if (includer_dir(pp, loc, dir, sizeof(dir))) {
+ size_t dlen = strlen(dir);
+ if (dlen + 1 + plen + 1 <= sizeof(buf)) {
+ memcpy(buf, dir, dlen);
+ buf[dlen] = '/';
+ memcpy(buf + dlen + 1, path, plen);
+ buf[dlen + 1 + plen] = 0;
+ if (try_open_include(pp, buf, data, size)) {
+ if (dlen + 1 + plen + 1 > resolved_cap) return 0;
+ memcpy(resolved, buf, dlen + 1 + plen + 1);
+ return 1;
+ }
+ }
+ }
+ }
+ for (i = 0; i < pp->ninc_dirs; ++i) {
+ const char* d = pp->inc_dirs[i].path;
+ size_t dlen = strlen(d);
+ if (dlen + 1 + plen + 1 > sizeof(buf)) continue;
+ memcpy(buf, d, dlen);
+ buf[dlen] = '/';
+ memcpy(buf + dlen + 1, path, plen);
+ buf[dlen + 1 + plen] = 0;
+ if (try_open_include(pp, buf, data, size)) {
+ if (dlen + 1 + plen + 1 > resolved_cap) return 0;
+ memcpy(resolved, buf, dlen + 1 + plen + 1);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* Parse the directive arguments into (path, system_flag). Handles:
+ * - directly-lexed TOK_HEADER: < ... > or " ... "
+ * - macro-replaced form: line is macro-expanded, then expected to
+ * produce either a TOK_STR ("...") or a < ... > sequence. */
+static void parse_include_path(Pp* pp, const Tok* line, u32 n, SrcLoc loc,
+ char* path_out, size_t cap, int* system_out) {
+ if (n == 0) compiler_panic(pp->c, loc, "#include: missing path");
+
+ if (line[0].kind == TOK_HEADER) {
+ size_t slen = 0;
+ const char* s = pool_str(pp->c->global, line[0].spelling, &slen);
+ if (slen < 2) compiler_panic(pp->c, loc, "#include: malformed header name");
+ if (s[0] == '<' && s[slen - 1] == '>')
+ *system_out = 1;
+ else if (s[0] == '"' && s[slen - 1] == '"')
+ *system_out = 0;
+ else
+ compiler_panic(pp->c, loc, "#include: malformed header name");
+ if (slen - 2 + 1 > cap)
+ compiler_panic(pp->c, loc, "#include: path too long");
+ memcpy(path_out, s + 1, slen - 2);
+ path_out[slen - 2] = 0;
+ return;
+ }
+
+ /* Macro-replaced form. */
+ {
+ TokVec exp = {0};
+ Tok* slice = arena_array(&pp->arena, Tok, n);
+ memcpy(slice, line, sizeof(Tok) * n);
+ expand_arg_to_eof(pp, slice, n, &exp);
+
+ if (exp.n == 0) {
+ compiler_panic(pp->c, loc, "#include: empty after macro replacement");
+ }
+ if (exp.data[0].kind == TOK_STR) {
+ size_t slen = 0;
+ const char* s = pool_str(pp->c->global, exp.data[0].spelling, &slen);
+ if (slen < 2 || s[0] != '"' || s[slen - 1] != '"') {
+ compiler_panic(pp->c, loc, "#include: malformed string");
+ }
+ if (slen - 2 + 1 > cap) {
+ compiler_panic(pp->c, loc, "#include: path too long");
+ }
+ memcpy(path_out, s + 1, slen - 2);
+ path_out[slen - 2] = 0;
+ *system_out = 0;
+ return;
+ }
+ if (exp.data[0].kind == TOK_PUNCT && exp.data[0].v.punct == '<') {
+ size_t pos = 0;
+ u32 i;
+ for (i = 1; i < exp.n; ++i) {
+ size_t slen = 0;
+ const char* s = NULL;
+ if (exp.data[i].kind == TOK_PUNCT && exp.data[i].v.punct == '>') {
+ break;
+ }
+ if (exp.data[i].spelling) {
+ s = pool_str(pp->c->global, exp.data[i].spelling, &slen);
+ }
+ if (s && pos + slen + 1 <= cap) {
+ memcpy(path_out + pos, s, slen);
+ pos += slen;
+ }
+ }
+ path_out[pos] = 0;
+ *system_out = 1;
+ return;
+ }
+ compiler_panic(pp->c, loc,
+ "#include: expected \"...\" or <...> after expansion");
+ }
+}
+
+static void do_include(Pp* pp, const Tok* line, u32 n, SrcLoc loc) {
+ char path[4096];
+ char resolved[4096];
+ int system_form = 0;
+ const u8* data;
+ size_t size;
+ Lexer* lex;
+ u32 includer_id = 0;
+ u32 included_id;
+ u32 i;
+ TokSrc s;
+
+ parse_include_path(pp, line, n, loc, path, sizeof(path), &system_form);
+
+ if (!find_and_open_include(pp, path, system_form, loc, &data, &size, resolved,
+ sizeof(resolved))) {
+ compiler_panic(pp->c, loc, "#include: file not found: %s", path);
+ }
+
+ /* Walk the source stack to find the current includer's file_id. */
+ for (i = pp->nsources; i > 0; --i) {
+ TokSrc* tp = &pp->sources[i - 1];
+ if (tp->kind == SRC_LEX && tp->lex) {
+ includer_id = lex_file_id(tp->lex);
+ break;
+ }
+ }
+
+ lex = lex_open_mem(pp->c, resolved, (const char*)data, size);
+ included_id = lex_file_id(lex);
+
+ memset(&s, 0, sizeof(s));
+ s.kind = SRC_LEX;
+ s.lex = lex;
+ src_push(pp, s);
+
+ source_add_include(pp->c->sources, includer_id, included_id, loc,
+ system_form);
+}
+
+/* ============================================================
+ * #line (§6.10.4)
+ * ============================================================ */
+
+/* Find the topmost SRC_LEX source on the stack — that's the "current
+ * file" whose line/file should track #line directives. */
+TokSrc* current_lex_src(Pp* pp) {
+ u32 i;
+ for (i = pp->nsources; i > 0; --i) {
+ TokSrc* s = &pp->sources[i - 1];
+ if (s->kind == SRC_LEX) return s;
+ }
+ return NULL;
+}
+
+static void do_line(Pp* pp, const Tok* line, u32 n, SrcLoc loc) {
+ /* Macro-replace arguments first (a2). */
+ TokVec exp = {0};
+ Tok* slice;
+ TokSrc* lex_src;
+ i64 target_line;
+ Sym target_file = 0;
+
+ if (n == 0) compiler_panic(pp->c, loc, "#line: missing arguments");
+ slice = arena_array(&pp->arena, Tok, n);
+ memcpy(slice, line, sizeof(Tok) * n);
+ expand_arg_to_eof(pp, slice, n, &exp);
+
+ if (exp.n == 0 || exp.data[0].kind != TOK_NUM) {
+ compiler_panic(pp->c, loc, "#line: expected line number");
+ }
+ {
+ size_t sl = 0;
+ const char* s = pool_str(pp->c->global, exp.data[0].spelling, &sl);
+ target_line = parse_pp_int(s, sl);
+ }
+ if (exp.n >= 2) {
+ if (exp.data[1].kind != TOK_STR) {
+ compiler_panic(pp->c, loc, "#line: file argument must be a string");
+ }
+ {
+ size_t sl = 0;
+ const char* s = pool_str(pp->c->global, exp.data[1].spelling, &sl);
+ if (sl >= 2 && s[0] == '"' && s[sl - 1] == '"') {
+ target_file = pool_intern(pp->c->global, s + 1, sl - 2);
+ }
+ }
+ }
+
+ lex_src = current_lex_src(pp);
+ if (!lex_src) compiler_panic(pp->c, loc, "#line outside any file");
+ {
+ /* The next token (post-directive-NL) currently has lex.line ==
+ * <lex's line counter>. Set delta so its user-visible line ==
+ * target_line. */
+ SrcLoc here = lex_loc(lex_src->lex);
+ lex_src->line_delta = (i32)target_line - (i32)here.line;
+ if (target_file) lex_src->file_override = target_file;
+ }
+}
+
+/* ============================================================
+ * #pragma + _Pragma (§6.10.6, §6.10.9)
+ * ============================================================ */
+
+/* Push the unmodified directive line back onto the source stack as a
+ * buffer, so pp_emit_text writes it as-is. SRC_BUF gates directive
+ * recognition off, so this won't recurse. */
+void emit_pragma_line(Pp* pp, const Tok* line, u32 n, SrcLoc loc) {
+ TokVec out = {0};
+ HidesetId* hids;
+ u32 i;
+ Tok hash, ident, nl;
+
+ memset(&hash, 0, sizeof(hash));
+ hash.kind = TOK_PP_HASH;
+ hash.flags = TF_AT_BOL;
+ hash.loc = loc;
+ hash.spelling = pool_intern_cstr(pp->c->global, "#");
+ tv_push(pp, &out, hash);
+
+ memset(&ident, 0, sizeof(ident));
+ ident.kind = TOK_IDENT;
+ ident.flags = 0;
+ ident.loc = loc;
+ ident.spelling = pp->sym_pragma_kw;
+ ident.v.ident = pp->sym_pragma_kw;
+ tv_push(pp, &out, ident);
+
+ for (i = 0; i < n; ++i) {
+ Tok t = line[i];
+ /* Force a leading space between tokens. */
+ t.flags |= TF_HAS_SPACE;
+ if (i == 0) {
+ /* Space between "pragma" and the first arg. */
+ }
+ tv_push(pp, &out, t);
+ }
+
+ memset(&nl, 0, sizeof(nl));
+ nl.kind = TOK_NEWLINE;
+ nl.loc = loc;
+ tv_push(pp, &out, nl);
+
+ hids = arena_array(&pp->arena, HidesetId, out.n ? out.n : 1);
+ for (i = 0; i < out.n; ++i) hids[i] = HS_EMPTY;
+ push_buf(pp, out.data, hids, out.n);
+}
+
+static void do_pragma(Pp* pp, const Tok* line, u32 n, SrcLoc loc) {
+ /* Forward unrecognised pragmas to the output. STDC pragmas pass
+ * through too; we don't act on them yet. */
+ emit_pragma_line(pp, line, n, loc);
+}
+
+/* Destringize a string literal token's content: strip surrounding quotes
+ * and undo the `\"` and `\\` escapes. Other escape sequences pass
+ * through verbatim — the result is fed back through the lexer, which
+ * does its own escape handling for any string literals nested inside. */
+static void destringize(Pp* pp, const Tok* str_tok, char* out, size_t cap,
+ size_t* out_len) {
+ size_t slen = 0;
+ const char* s = pool_str(pp->c->global, str_tok->spelling, &slen);
+ size_t i, w = 0;
+ if (slen < 2 || s[0] != '"' || s[slen - 1] != '"') {
+ compiler_panic(pp->c, str_tok->loc,
+ "_Pragma: argument must be a string literal");
+ }
+ for (i = 1; i + 1 < slen; ++i) {
+ char c = s[i];
+ if (c == '\\' && i + 2 < slen && (s[i + 1] == '\\' || s[i + 1] == '"')) {
+ ++i;
+ c = s[i];
+ }
+ if (w + 1 >= cap)
+ compiler_panic(pp->c, str_tok->loc, "_Pragma: payload too long");
+ out[w++] = c;
+ }
+ out[w] = 0;
+ *out_len = w;
+}
+
+/* Handle a `_Pragma("...")` invocation. Caller has consumed the
+ * `_Pragma` identifier. Reads `(` STR `)`, destringizes, re-lexes the
+ * payload, and emits a #pragma directive line. */
+int try_expand_pragma_op(Pp* pp, const Tok* invoke) {
+ Tok lp, str, rp;
+ char buf[1024];
+ size_t buf_n = 0;
+ Lexer* lex;
+ TokVec args = {0};
+
+ /* Peek '(' (skipping NL). Use peek_for_invoke_paren for consistency,
+ * but we need the saved-back behavior for a non-match. */
+ {
+ int saw_ws;
+ if (!peek_for_invoke_paren(pp, &saw_ws)) {
+ return 0; /* not an invocation; emit _Pragma as ident */
+ }
+ (void)saw_ws;
+ }
+ /* Read the string literal arg. */
+ {
+ HidesetId hs;
+ str = src_next_raw(pp, &hs, NULL);
+ }
+ if (str.kind != TOK_STR) {
+ compiler_panic(pp->c, invoke->loc, "_Pragma: expected string literal");
+ }
+ {
+ HidesetId hs;
+ rp = src_next_raw(pp, &hs, NULL);
+ }
+ if (rp.kind != TOK_PUNCT || rp.v.punct != ')') {
+ compiler_panic(pp->c, invoke->loc, "_Pragma: expected ')'");
+ }
+ (void)lp;
+
+ destringize(pp, &str, buf, sizeof(buf) - 2, &buf_n);
+ /* Append a NL so the lexer terminates cleanly. */
+ buf[buf_n++] = '\n';
+ buf[buf_n] = 0;
+
+ /* Re-lex into args. Bytes need to live until lex_close; copy into
+ * arena. */
+ {
+ char* arena_buf = (char*)arena_alloc(&pp->arena, buf_n + 1, 1);
+ memcpy(arena_buf, buf, buf_n + 1);
+ lex = lex_open_mem(pp->c, "<_Pragma>", arena_buf, buf_n);
+ }
+ for (;;) {
+ Tok t = lex_next(lex);
+ if (t.kind == TOK_EOF || t.kind == TOK_NEWLINE) break;
+ tv_push(pp, &args, t);
+ }
+ lex_close(lex);
+
+ emit_pragma_line(pp, args.data, args.n, invoke->loc);
+ return 1;
+}
+
+/* ============================================================
+ * #error
+ * ============================================================ */
+
+static void do_error(Pp* pp, const Tok* line, u32 n, SrcLoc loc) {
+ /* Concatenate token spellings into a single message. */
+ CharBuf cb = {0};
+ u32 i;
+ for (i = 0; i < n; ++i) {
+ size_t sl = 0;
+ const char* s = line[i].spelling
+ ? pool_str(pp->c->global, line[i].spelling, &sl)
+ : NULL;
+ if (i > 0) cb_putc(pp, &cb, ' ');
+ if (s && sl) cb_append(pp, &cb, s, (u32)sl);
+ }
+ cb_putc(pp, &cb, 0);
+ compiler_panic(pp->c, loc, "#error: %s", cb.data ? cb.data : "");
+}
+
+/* ============================================================
+ * #embed (C23, §6.10.* per N3033)
+ * ============================================================ */
+
+static void do_embed(Pp* pp, const Tok* line, u32 n, SrcLoc loc) {
+ char path[4096];
+ char resolved[4096];
+ int system_form = 0;
+ const u8* data;
+ size_t size;
+ u32 j;
+ /* Optional embed parameters parsed below. */
+ i64 limit_n = -1;
+ Tok* if_empty_toks = NULL;
+ u32 if_empty_n = 0;
+ /* Header-name path: first token. */
+ u32 arg_start = 0;
+
+ if (n == 0) compiler_panic(pp->c, loc, "#embed: missing path");
+
+ if (line[0].kind == TOK_HEADER) {
+ size_t sl = 0;
+ const char* s = pool_str(pp->c->global, line[0].spelling, &sl);
+ if (sl < 2) compiler_panic(pp->c, loc, "#embed: malformed header name");
+ if (s[0] == '<' && s[sl - 1] == '>')
+ system_form = 1;
+ else if (s[0] == '"' && s[sl - 1] == '"')
+ system_form = 0;
+ else
+ compiler_panic(pp->c, loc, "#embed: malformed header name");
+ memcpy(path, s + 1, sl - 2);
+ path[sl - 2] = 0;
+ arg_start = 1;
+ } else {
+ compiler_panic(pp->c, loc, "#embed: header-name argument required");
+ }
+
+ /* Parse trailing parameters: limit(N), if_empty(...). */
+ j = arg_start;
+ while (j < n) {
+ if (line[j].kind == TOK_IDENT) {
+ size_t sl = 0;
+ const char* s = pool_str(pp->c->global, line[j].v.ident, &sl);
+ if (sl == 5 && memcmp(s, "limit", 5) == 0) {
+ if (j + 1 >= n || line[j + 1].kind != TOK_PUNCT ||
+ line[j + 1].v.punct != '(') {
+ compiler_panic(pp->c, loc, "#embed: expected '(' after limit");
+ }
+ j += 2;
+ if (j >= n || line[j].kind != TOK_NUM) {
+ compiler_panic(pp->c, loc, "#embed: limit() expects an integer");
+ }
+ {
+ size_t sl2 = 0;
+ const char* s2 = pool_str(pp->c->global, line[j].spelling, &sl2);
+ limit_n = parse_pp_int(s2, sl2);
+ }
+ ++j;
+ if (j >= n || line[j].kind != TOK_PUNCT || line[j].v.punct != ')') {
+ compiler_panic(pp->c, loc, "#embed: expected ')' to close limit");
+ }
+ ++j;
+ continue;
+ }
+ if (sl == 8 && memcmp(s, "if_empty", 8) == 0) {
+ u32 depth = 0;
+ u32 start;
+ if (j + 1 >= n || line[j + 1].kind != TOK_PUNCT ||
+ line[j + 1].v.punct != '(') {
+ compiler_panic(pp->c, loc, "#embed: expected '(' after if_empty");
+ }
+ j += 2;
+ start = j;
+ while (j < n) {
+ if (line[j].kind == TOK_PUNCT) {
+ if (line[j].v.punct == '(')
+ ++depth;
+ else if (line[j].v.punct == ')') {
+ if (depth == 0) break;
+ --depth;
+ }
+ }
+ ++j;
+ }
+ if (j >= n) {
+ compiler_panic(pp->c, loc, "#embed: unterminated if_empty");
+ }
+ if_empty_toks = arena_array(&pp->arena, Tok, j - start ? j - start : 1);
+ if_empty_n = j - start;
+ memcpy(if_empty_toks, line + start, sizeof(Tok) * if_empty_n);
+ ++j; /* skip ')' */
+ continue;
+ }
+ }
+ compiler_panic(pp->c, loc, "#embed: unexpected token in parameter list");
+ }
+
+ if (!find_and_open_include(pp, path, system_form, loc, &data, &size, resolved,
+ sizeof(resolved))) {
+ compiler_panic(pp->c, loc, "#embed: file not found: %s", path);
+ }
+
+ /* Apply limit(). */
+ {
+ size_t emit_n = size;
+ if (limit_n >= 0 && (u64)limit_n < emit_n) emit_n = (size_t)limit_n;
+ if (emit_n == 0) {
+ /* Empty: emit if_empty payload (or nothing). */
+ if (if_empty_toks && if_empty_n) {
+ HidesetId* hids = arena_array(&pp->arena, HidesetId, if_empty_n);
+ u32 i;
+ for (i = 0; i < if_empty_n; ++i) hids[i] = HS_EMPTY;
+ push_buf(pp, if_empty_toks, hids, if_empty_n);
+ }
+ return;
+ }
+ /* Build a buffer of pp-numbers separated by ',' punctuators. */
+ {
+ TokVec out = {0};
+ HidesetId* hids;
+ size_t i;
+ for (i = 0; i < emit_n; ++i) {
+ char numbuf[8];
+ int nl = 0;
+ u8 v = data[i];
+ /* "u8 -> decimal" without sprintf. */
+ if (v == 0) {
+ numbuf[nl++] = '0';
+ } else {
+ char tmp[4];
+ int k = 0;
+ while (v) {
+ tmp[k++] = (char)('0' + (v % 10));
+ v /= 10;
+ }
+ while (k > 0) numbuf[nl++] = tmp[--k];
+ }
+ {
+ Tok t;
+ memset(&t, 0, sizeof(t));
+ t.kind = TOK_NUM;
+ t.loc = loc;
+ t.spelling = pool_intern(pp->c->global, numbuf, (size_t)nl);
+ if (i == 0) t.flags = TF_AT_BOL;
+ /* Bytes after a comma get a leading space to match
+ * clang's `, ` separator format. */
+ else
+ t.flags = TF_HAS_SPACE;
+ tv_push(pp, &out, t);
+ }
+ if (i + 1 < emit_n) {
+ Tok comma;
+ memset(&comma, 0, sizeof(comma));
+ comma.kind = TOK_PUNCT;
+ comma.v.punct = ',';
+ comma.loc = loc;
+ comma.spelling = pool_intern_cstr(pp->c->global, ",");
+ tv_push(pp, &out, comma);
+ }
+ }
+ hids = arena_array(&pp->arena, HidesetId, out.n ? out.n : 1);
+ {
+ u32 k;
+ for (k = 0; k < out.n; ++k) hids[k] = HS_EMPTY;
+ }
+ push_buf(pp, out.data, hids, out.n);
+ }
+ }
+}
+
+/* ============================================================
+ * Directive dispatch
+ * ============================================================ */
+
+void process_directive(Pp* pp, SrcLoc hash_loc) {
+ Tok* line;
+ u32 n;
+ Sym name;
+
+ read_directive_line(pp, &line, &n);
+ if (n == 0) {
+ /* Null directive: '#' newline. Nothing to do. */
+ return;
+ }
+ if (line[0].kind != TOK_IDENT) {
+ compiler_panic(pp->c, line[0].loc, "expected directive name after '#'");
+ }
+ name = line[0].v.ident;
+ if (name == pp->sym_define)
+ do_define(pp, line + 1, n - 1);
+ else if (name == pp->sym_undef)
+ do_undef(pp, line + 1, n - 1);
+ else if (name == pp->sym_if)
+ do_if_directive(pp, line + 1, n - 1, hash_loc);
+ else if (name == pp->sym_ifdef)
+ do_ifdef(pp, line + 1, n - 1, 0, hash_loc);
+ else if (name == pp->sym_ifndef)
+ do_ifdef(pp, line + 1, n - 1, 1, hash_loc);
+ else if (name == pp->sym_elif)
+ do_elif(pp, hash_loc);
+ else if (name == pp->sym_else)
+ do_else(pp, hash_loc);
+ else if (name == pp->sym_endif)
+ do_endif(pp, hash_loc);
+ else if (name == pp->sym_include)
+ do_include(pp, line + 1, n - 1, hash_loc);
+ else if (name == pp->sym_line)
+ do_line(pp, line + 1, n - 1, hash_loc);
+ else if (name == pp->sym_pragma)
+ do_pragma(pp, line + 1, n - 1, hash_loc);
+ else if (name == pp->sym_error)
+ do_error(pp, line + 1, n - 1, hash_loc);
+ else if (name == pp->sym_embed)
+ do_embed(pp, line + 1, n - 1, hash_loc);
+ else {
+ compiler_panic(pp->c, line[0].loc, "unsupported directive");
+ }
+}
diff --git a/src/pp/pp_expand.c b/src/pp/pp_expand.c
@@ -0,0 +1,1008 @@
+/* pp_expand.c — hideset table, macro hashmap, #define/#undef, substitution,
+ * paste, stringize, argument prescan, func/object macro expansion. */
+
+#include "pp/pp_priv.h"
+
+static int body_tokens_equal(const Tok* a, u32 na, const Tok* b, u32 nb);
+static int macros_equal(const Macro* a, const Macro* b);
+
+/* ============================================================
+ * Hideset table
+ * ============================================================ */
+
+static int sym_in_array(const Sym* a, u32 n, Sym s) {
+ u32 i;
+ for (i = 0; i < n; ++i)
+ if (a[i] == s) return 1;
+ return 0;
+}
+
+static HidesetId hs_register(Pp* pp, const Sym* names, u32 n) {
+ Hideset* h;
+ u32 i;
+ if (n == 0) return HS_EMPTY;
+
+ /* Linear search for an existing identical hideset. Hidesets are tiny. */
+ for (i = 1; i < pp->hsets_n; ++i) {
+ Hideset* e = pp->hsets[i];
+ if (e->n != n) continue;
+ {
+ u32 j;
+ for (j = 0; j < n; ++j)
+ if (e->names[j] != names[j]) break;
+ if (j == n) return (HidesetId)i;
+ }
+ }
+
+ if (pp->hsets_n == pp->hsets_cap) {
+ u32 nc = pp->hsets_cap ? pp->hsets_cap * 2 : 8;
+ pp->hsets =
+ (Hideset**)pp_xrealloc(pp, pp->hsets, sizeof(Hideset*) * pp->hsets_cap,
+ sizeof(Hideset*) * nc, _Alignof(Hideset*));
+ pp->hsets_cap = nc;
+ }
+ h = (Hideset*)arena_alloc(&pp->arena,
+ sizeof(Hideset) + sizeof(Sym) * (n ? n - 1 : 0),
+ _Alignof(Hideset));
+ h->n = n;
+ for (i = 0; i < n; ++i) h->names[i] = names[i];
+ pp->hsets[pp->hsets_n] = h;
+ return (HidesetId)pp->hsets_n++;
+}
+
+int hs_contains(Pp* pp, HidesetId id, Sym s) {
+ Hideset* h;
+ if (id == HS_EMPTY || s == 0) return 0;
+ h = pp->hsets[id];
+ return sym_in_array(h->names, h->n, s);
+}
+
+HidesetId hs_add(Pp* pp, HidesetId id, Sym s) {
+ Sym buf[64];
+ Hideset* h;
+ u32 n;
+ u32 i;
+
+ if (s == 0) return id;
+ if (hs_contains(pp, id, s)) return id;
+
+ n = (id == HS_EMPTY) ? 0 : pp->hsets[id]->n;
+ if (n + 1 > sizeof(buf) / sizeof(buf[0])) {
+ compiler_panic(pp->c, (SrcLoc){0, 0, 0}, "pp: hideset overflow");
+ }
+ if (id != HS_EMPTY) {
+ h = pp->hsets[id];
+ for (i = 0; i < h->n; ++i) buf[i] = h->names[i];
+ }
+ /* Keep sorted (numerically) for canonical hideset identity. */
+ {
+ u32 pos = n;
+ while (pos > 0 && buf[pos - 1] > s) {
+ buf[pos] = buf[pos - 1];
+ --pos;
+ }
+ buf[pos] = s;
+ }
+ return hs_register(pp, buf, n + 1);
+}
+
+/* Used by token-paste in stage 5; declared early so the rest of the file
+ * doesn't grow forward decls. */
+__attribute__((unused)) static HidesetId hs_intersect(Pp* pp, HidesetId a,
+ HidesetId b) {
+ Sym buf[64];
+ Hideset *ha, *hb;
+ u32 i, j, k;
+ if (a == HS_EMPTY || b == HS_EMPTY) return HS_EMPTY;
+ if (a == b) return a;
+ ha = pp->hsets[a];
+ hb = pp->hsets[b];
+ /* Both sorted; standard merge intersection. */
+ i = j = k = 0;
+ while (i < ha->n && j < hb->n) {
+ if (ha->names[i] == hb->names[j]) {
+ buf[k++] = ha->names[i];
+ ++i;
+ ++j;
+ } else if (ha->names[i] < hb->names[j]) {
+ ++i;
+ } else {
+ ++j;
+ }
+ }
+ return hs_register(pp, buf, k);
+}
+
+/* ============================================================
+ * Macro table
+ * ============================================================ */
+
+/* Thin wrappers over the generated MacroMap_* functions; preserved
+ * because the call sites are tagged "mt_*" throughout this TU. */
+Macro* mt_get(Pp* pp, Sym name) {
+ Macro** v = MacroMap_get(&pp->mtab, name);
+ return v ? *v : NULL;
+}
+
+void mt_put(Pp* pp, Sym name, Macro* m) {
+ (void)MacroMap_set(&pp->mtab, name, m);
+}
+
+void mt_del(Pp* pp, Sym name) { MacroMap_del(&pp->mtab, name); }
+
+/* ============================================================
+ * #define / #undef
+ * ============================================================ */
+
+void do_define(Pp* pp, const Tok* line, u32 n) {
+ Macro* m;
+ u32 i = 0;
+ Sym name;
+ SrcLoc def_loc;
+ Macro* existing;
+
+ if (i >= n || line[i].kind != TOK_IDENT) {
+ compiler_panic(pp->c, n ? line[0].loc : (SrcLoc){0, 0, 0},
+ "#define: expected macro name");
+ }
+ name = line[i].v.ident;
+ def_loc = line[i].loc;
+ ++i;
+
+ m = arena_znew(&pp->arena, Macro);
+ m->name = name;
+ m->def_loc = def_loc;
+
+ /* Function-like vs object-like: '(' immediately after the name with no
+ * intervening whitespace. */
+ if (i < n && line[i].kind == TOK_PUNCT && line[i].v.punct == '(' &&
+ (line[i].flags & TF_HAS_SPACE) == 0) {
+ Sym* params = NULL;
+ u32 pcap = 0, pn = 0;
+ ++i;
+ m->is_func = 1;
+ if (i < n && line[i].kind == TOK_PUNCT && line[i].v.punct == ')') {
+ ++i;
+ } else {
+ for (;;) {
+ if (i >= n) {
+ compiler_panic(pp->c, def_loc,
+ "#define: unterminated parameter list");
+ }
+ if (line[i].kind == TOK_PUNCT && line[i].v.punct == P_ELLIPSIS) {
+ /* Append a synthetic __VA_ARGS__ param so body-rewrite
+ * matches the standard identifier directly. */
+ if (pn == pcap) {
+ u32 nc = pcap ? pcap * 2 : 4;
+ Sym* nb = arena_array(&pp->arena, Sym, nc);
+ if (pcap) memcpy(nb, params, sizeof(Sym) * pcap);
+ params = nb;
+ pcap = nc;
+ }
+ params[pn++] = pp->sym_va_args;
+ m->is_variadic = 1;
+ ++i;
+ } else if (line[i].kind == TOK_IDENT) {
+ if (pn == pcap) {
+ u32 nc = pcap ? pcap * 2 : 4;
+ Sym* nb = arena_array(&pp->arena, Sym, nc);
+ if (pcap) memcpy(nb, params, sizeof(Sym) * pcap);
+ params = nb;
+ pcap = nc;
+ }
+ params[pn++] = line[i].v.ident;
+ ++i;
+ } else {
+ compiler_panic(pp->c, line[i].loc, "#define: bad parameter list");
+ }
+ if (i >= n) {
+ compiler_panic(pp->c, def_loc,
+ "#define: unterminated parameter list");
+ }
+ if (line[i].kind == TOK_PUNCT && line[i].v.punct == ')') {
+ ++i;
+ break;
+ }
+ if (m->is_variadic) {
+ compiler_panic(pp->c, line[i].loc,
+ "#define: '...' must be last parameter");
+ }
+ if (line[i].kind == TOK_PUNCT && line[i].v.punct == ',') {
+ ++i;
+ continue;
+ }
+ compiler_panic(pp->c, line[i].loc, "#define: expected ',' or ')'");
+ }
+ }
+ m->params = params;
+ m->n_params = pn;
+ }
+
+ /* Refuse define/undef of a few names the spec reserves: `defined`
+ * and a small set of mandatory predefined macros. */
+ if (name == pp->sym_defined || name == pp->sym_line__ ||
+ name == pp->sym_file__ || name == pp->sym_date__ ||
+ name == pp->sym_time__) {
+ compiler_panic(pp->c, def_loc,
+ "#define of a reserved / predefined name is not allowed");
+ }
+ /* Static predefineds are already in the macro table; redefining
+ * with a different body is caught by the existing macros_equal
+ * check below, but #define of __STDC__ et al. with the SAME body
+ * should also be rejected. */
+ if (name == pp->sym_stdc__ || name == pp->sym_stdc_hosted__ ||
+ name == pp->sym_stdc_version__) {
+ /* Allow re-registration of the predefined value at pp_new time
+ * but reject user-level redefinition. We detect "user-level"
+ * by checking whether it's already in the table — at pp_new the
+ * first call goes through cleanly. */
+ if (mt_get(pp, name)) {
+ compiler_panic(pp->c, def_loc,
+ "#define of a mandatory predefined macro is not allowed");
+ }
+ }
+
+ /* Body: rewrite parameter occurrences to TOK_PP_PARAM. */
+ {
+ u32 body_n = n - i;
+ u32 j;
+ m->body = body_n ? arena_array(&pp->arena, Tok, body_n) : NULL;
+ m->body_len = body_n;
+ for (j = 0; j < body_n; ++j) {
+ Tok t = line[i + j];
+ if (m->is_func && t.kind == TOK_IDENT) {
+ u32 p;
+ for (p = 0; p < m->n_params; ++p) {
+ if (m->params[p] == t.v.ident) {
+ t.kind = TOK_PP_PARAM;
+ t.v.punct = p;
+ break;
+ }
+ }
+ }
+ /* §6.10.3 ¶5: __VA_ARGS__ outside a variadic macro is
+ * undefined behavior; we diagnose. */
+ if (!m->is_variadic && t.kind == TOK_IDENT &&
+ t.v.ident == pp->sym_va_args) {
+ compiler_panic(pp->c, t.loc,
+ "__VA_ARGS__ may only appear in a variadic macro body");
+ }
+ m->body[j] = t;
+ }
+ /* Drop the leading-space bit on the first body token: it reflects
+ * the whitespace between the macro name (or close-paren) and the
+ * body, which is irrelevant to expansion output. */
+ if (m->body_len) m->body[0].flags &= (u16)~TF_HAS_SPACE;
+ }
+
+ existing = mt_get(pp, name);
+ if (existing) {
+ if (!macros_equal(existing, m)) {
+ compiler_panic(pp->c, def_loc,
+ "macro redefined with different replacement");
+ }
+ return;
+ }
+ mt_put(pp, name, m);
+}
+
+void do_undef(Pp* pp, const Tok* line, u32 n) {
+ Sym name;
+ if (!n || line[0].kind != TOK_IDENT) {
+ compiler_panic(pp->c, n ? line[0].loc : (SrcLoc){0, 0, 0},
+ "#undef: expected identifier");
+ }
+ name = line[0].v.ident;
+ if (name == pp->sym_defined || name == pp->sym_line__ ||
+ name == pp->sym_file__ || name == pp->sym_date__ ||
+ name == pp->sym_time__ || name == pp->sym_stdc__ ||
+ name == pp->sym_stdc_hosted__ || name == pp->sym_stdc_version__) {
+ compiler_panic(pp->c, line[0].loc,
+ "#undef of a mandatory predefined name is not allowed");
+ }
+ mt_del(pp, name);
+}
+
+/* ============================================================
+ * Body comparison helpers
+ * ============================================================ */
+
+static int body_tokens_equal(const Tok* a, u32 na, const Tok* b, u32 nb) {
+ u32 i;
+ if (na != nb) return 0;
+ for (i = 0; i < na; ++i) {
+ if (a[i].kind != b[i].kind) return 0;
+ if (a[i].spelling != b[i].spelling) return 0;
+ /* Whitespace separation must match (§6.10.3 ¶2). The first body
+ * token's leading-space bit is meaningless (it's whatever was
+ * between macro name and body); skip i==0 for that bit. */
+ if (i > 0) {
+ if ((a[i].flags & TF_HAS_SPACE) != (b[i].flags & TF_HAS_SPACE)) {
+ return 0;
+ }
+ }
+ }
+ return 1;
+}
+
+static int macros_equal(const Macro* a, const Macro* b) {
+ if (a->is_func != b->is_func) return 0;
+ if (a->is_variadic != b->is_variadic) return 0;
+ if (a->n_params != b->n_params) return 0;
+ {
+ u32 i;
+ for (i = 0; i < a->n_params; ++i) {
+ if (a->params[i] != b->params[i]) return 0;
+ }
+ }
+ return body_tokens_equal(a->body, a->body_len, b->body, b->body_len);
+}
+
+/* ============================================================
+ * Object-macro expansion
+ * ============================================================ */
+
+static void subst_phase2(Pp* pp, const Tok* in, u32 nin, const Tok* invoke,
+ TokVec* out);
+
+/* Build a buffer of the macro's body (with hidesets) and push it. The
+ * first expanded token inherits the invocation token's TF_AT_BOL /
+ * TF_HAS_SPACE so output formatting matches the invocation site. */
+static void expand_object_macro(Pp* pp, const Macro* m, const Tok* invoke,
+ HidesetId invoke_hs) {
+ TokVec body = {0};
+ Tok* tmp;
+ HidesetId hs;
+ HidesetId* hids;
+ u32 i;
+
+ if (m->body_len == 0) {
+ return; /* placemarker: nothing to push */
+ }
+ /* Run the body through the paste phase: object-like macros may use
+ * `##`. There are no parameters, so phase 1 reduces to a copy. */
+ tmp = arena_array(&pp->arena, Tok, m->body_len);
+ for (i = 0; i < m->body_len; ++i) tmp[i] = m->body[i];
+ subst_phase2(pp, tmp, m->body_len, invoke, &body);
+
+ if (body.n == 0) return;
+
+ /* Transfer invocation flags onto the first emitted token. */
+ body.data[0].flags =
+ (u16)((body.data[0].flags & ~(TF_AT_BOL | TF_HAS_SPACE)) |
+ (invoke->flags & (TF_AT_BOL | TF_HAS_SPACE)));
+ for (i = 0; i < body.n; ++i) body.data[i].loc = invoke->loc;
+
+ hs = hs_add(pp, invoke_hs, m->name);
+ hids = arena_array(&pp->arena, HidesetId, body.n);
+ for (i = 0; i < body.n; ++i) hids[i] = hs;
+ push_buf(pp, body.data, hids, body.n);
+}
+
+/* ============================================================
+ * Function-like macro expansion
+ * ============================================================ */
+
+/* Peek for an open paren after the just-consumed identifier (which named
+ * a function-like macro). Newlines are whitespace inside an invocation.
+ * Returns 1 with `*ws_has_space_out` indicating whether any whitespace
+ * (newlines or HAS_SPACE) sat between the ident and the `(`. Returns 0 if
+ * no `(` follows; pushed-back tokens (NLs + the non-`(` token, if any)
+ * are restored as a buffer source so subsequent reads still see them. */
+int peek_for_invoke_paren(Pp* pp, int* ws_has_space_out) {
+ TokVec saved = {0};
+ int saw_ws = 0;
+ Tok t;
+ HidesetId hs;
+
+ for (;;) {
+ t = src_next_raw(pp, &hs, NULL);
+ if (t.kind == TOK_NEWLINE) {
+ saw_ws = 1;
+ tv_push(pp, &saved, t);
+ continue;
+ }
+ if (t.kind == TOK_EOF) {
+ /* No '(' — push back saved tokens, leave EOF for next read. */
+ if (saved.n) push_buf(pp, saved.data, NULL, saved.n);
+ *ws_has_space_out = saw_ws;
+ return 0;
+ }
+ if (t.flags & TF_HAS_SPACE) saw_ws = 1;
+ if (t.kind == TOK_PUNCT && t.v.punct == '(') {
+ /* Consumed. The newlines we walked past are whitespace and
+ * dropped (per spec); they don't go back on the stack. */
+ *ws_has_space_out = saw_ws;
+ return 1;
+ }
+ /* Save this non-`(` token too and push back. */
+ tv_push(pp, &saved, t);
+ push_buf(pp, saved.data, NULL, saved.n);
+ *ws_has_space_out = saw_ws;
+ return 0;
+ }
+}
+
+/* Run macro expansion on a fixed token sequence to completion, yielding the
+ * fully-expanded token sequence. Used to pre-expand each function-macro
+ * argument before substitution (§6.10.3.1 ¶1). */
+void expand_arg_to_eof(Pp* pp, Tok* in, u32 nin, TokVec* out) {
+ TokSrc src;
+ Tok t;
+
+ memset(&src, 0, sizeof(src));
+ src.kind = SRC_BUF;
+ src.scope_top = 1;
+ src.toks = in;
+ src.hs = NULL;
+ src.n = nin;
+ src_push(pp, src);
+
+ for (;;) {
+ t = pp_next_raw(pp); /* drives macro expansion within this scope */
+ if (t.kind == TOK_EOF) break;
+ if (t.kind == TOK_NEWLINE) {
+ /* Newlines inside an arg act as whitespace; convert to
+ * "next-token has TF_HAS_SPACE". Drop the NL token itself. */
+ continue;
+ }
+ tv_push(pp, out, t);
+ }
+ /* Pop our scope source. */
+ --pp->nsources;
+}
+
+/* Argument list for a function-like invocation. Stored as parallel
+ * (start, end) ranges into a flat unexpanded token vector and a flat
+ * expanded token vector. */
+typedef struct ArgList {
+ /* Unexpanded arg tokens (raw as collected from invocation). */
+ Tok* raw;
+ u32 raw_n;
+ u32* raw_start; /* size n_args + 1 (sentinel = raw_n) */
+ /* Pre-expanded tokens. */
+ Tok* exp;
+ u32 exp_n;
+ u32* exp_start; /* size n_args + 1 (sentinel = exp_n) */
+ u32 n_args;
+} ArgList;
+
+/* Collect arguments. Caller has just consumed the opening `(`. Returns the
+ * close-paren's token (used as the invocation's last source location). */
+static Tok read_invocation_args(Pp* pp, const Macro* m, SrcLoc invoke_loc,
+ ArgList* out) {
+ TokVec raw = {0};
+ u32* starts;
+ u32 starts_cap = 0;
+ u32 n_args = 0;
+ u32 cur_start = 0;
+ int depth = 0;
+ Tok t;
+ HidesetId hs;
+ int first_token_of_arg = 1;
+ Tok close_tok;
+
+ memset(out, 0, sizeof(*out));
+ starts = arena_array(&pp->arena, u32, 8);
+ starts_cap = 8;
+ starts[0] = 0;
+
+ for (;;) {
+ t = src_next_raw(pp, &hs, NULL);
+ if (t.kind == TOK_EOF) {
+ compiler_panic(pp->c, invoke_loc,
+ "unterminated function-like macro invocation");
+ }
+ if (t.kind == TOK_NEWLINE) {
+ /* Whitespace within an invocation. Mark the next token as
+ * having space; drop the NL. */
+ if (raw.n && depth >= 0) {
+ /* No-op token list; we'll OR onto the next pushed token. */
+ }
+ /* Use a sentinel: track via a flag on a deferred push. We
+ * accumulate "has_space" by setting it on the next pushed
+ * token. */
+ /* Simpler: just push a placeholder by OR'ing onto next via
+ * a flag stored in `first_token_of_arg`-style state. */
+ /* Implementation: use the next read token's TF_HAS_SPACE bit,
+ * which the lexer already sets after a NL. Actually NOT —
+ * after a NL the lexer sets TF_AT_BOL on the next token, not
+ * HAS_SPACE necessarily. Force it: */
+ /* We'll OR it manually onto the next token. */
+ /* Use a small flag stash: */
+ /* (handled below by setting a pending flag) */
+ /* See: pending_space variable */
+ /* — commit: declare a pending_space static earlier. */
+ continue;
+ }
+
+ if (t.kind == TOK_PUNCT) {
+ u32 p = t.v.punct;
+ if (p == '(' || p == '[' || p == '{') {
+ ++depth;
+ } else if (p == ')' || p == ']' || p == '}') {
+ if (p == ')' && depth == 0) {
+ /* End of invocation. Close the current argument. The
+ * empty-args case (no commas seen, no tokens
+ * collected) emits a slot only when the macro expects
+ * at least one argument; arity-0 macros take none. */
+ close_tok = t;
+ {
+ int empty_call =
+ (n_args == 0 && raw.n == cur_start && first_token_of_arg);
+ int want_slot = !empty_call || (m->n_params > 0) || m->is_variadic;
+ if (want_slot) {
+ if (n_args + 1 >= starts_cap) {
+ u32 nc = starts_cap * 2;
+ u32* nb = arena_array(&pp->arena, u32, nc);
+ memcpy(nb, starts, sizeof(u32) * starts_cap);
+ starts = nb;
+ starts_cap = nc;
+ }
+ ++n_args;
+ starts[n_args] = raw.n;
+ }
+ }
+ goto done;
+ }
+ --depth;
+ } else if (p == ',' && depth == 0) {
+ /* Variadic: once we've filled all named params, the rest
+ * (commas included) collect into __VA_ARGS__. */
+ if (m->is_variadic && n_args + 1 >= m->n_params) {
+ /* This comma is part of __VA_ARGS__. Push it. */
+ tv_push(pp, &raw, t);
+ first_token_of_arg = 0;
+ continue;
+ }
+ /* Close current arg, start next. */
+ if (n_args + 1 >= starts_cap) {
+ u32 nc = starts_cap * 2;
+ u32* nb = arena_array(&pp->arena, u32, nc);
+ memcpy(nb, starts, sizeof(u32) * starts_cap);
+ starts = nb;
+ starts_cap = nc;
+ }
+ ++n_args;
+ starts[n_args] = raw.n;
+ cur_start = raw.n;
+ first_token_of_arg = 1;
+ continue;
+ }
+ }
+ tv_push(pp, &raw, t);
+ first_token_of_arg = 0;
+ (void)hs; /* hideset of raw arg tokens carried for blue-paint
+ * propagation in the arg's pre-expansion */
+ }
+done:
+ /* Validate arity. */
+ {
+ u32 expected = m->n_params;
+ if (m->is_variadic) {
+ if (n_args < (expected ? expected - 1 : 0)) {
+ /* Allow exactly expected-1 (empty __VA_ARGS__) by
+ * synthesizing an empty trailing arg. */
+ if (n_args + 1 == (expected ? expected - 1 : 0)) {
+ /* off by one — fall through to error */
+ }
+ compiler_panic(pp->c, invoke_loc,
+ "too few arguments to variadic macro invocation");
+ }
+ /* Synthesize an empty __VA_ARGS__ if caller passed exactly
+ * the named-parameter count. */
+ if (n_args + 1 == expected) {
+ if (n_args + 1 >= starts_cap) {
+ u32 nc = starts_cap * 2;
+ u32* nb = arena_array(&pp->arena, u32, nc);
+ memcpy(nb, starts, sizeof(u32) * starts_cap);
+ starts = nb;
+ starts_cap = nc;
+ }
+ ++n_args;
+ starts[n_args] = raw.n;
+ }
+ } else {
+ if (n_args != expected) {
+ /* Spec: arity-0 macro `M()` invoked as `M()` is allowed and
+ * has 0 args. Above logic produces 0 in that case. */
+ compiler_panic(pp->c, invoke_loc,
+ "wrong number of arguments to function-like macro");
+ }
+ }
+ }
+ out->raw = raw.data;
+ out->raw_n = raw.n;
+ out->raw_start = starts;
+ out->n_args = n_args;
+ return close_tok;
+}
+
+/* Build pre-expanded args. */
+static void preexpand_args(Pp* pp, ArgList* a) {
+ TokVec exp = {0};
+ u32* exp_start;
+ u32 i;
+ exp_start = arena_array(&pp->arena, u32, a->n_args + 1);
+ exp_start[0] = 0;
+ for (i = 0; i < a->n_args; ++i) {
+ u32 lo = a->raw_start[i];
+ u32 hi = a->raw_start[i + 1];
+ if (hi > lo) {
+ /* Copy the slice into a fresh buffer so expand_arg_to_eof can
+ * own it without aliasing. */
+ Tok* slice = arena_array(&pp->arena, Tok, hi - lo);
+ memcpy(slice, &a->raw[lo], sizeof(Tok) * (hi - lo));
+ expand_arg_to_eof(pp, slice, hi - lo, &exp);
+ }
+ exp_start[i + 1] = exp.n;
+ }
+ a->exp = exp.data;
+ a->exp_n = exp.n;
+ a->exp_start = exp_start;
+}
+
+/* Build a stringized TOK_STR from the unexpanded argument tokens
+ * `arg[lo..hi)`. The first token's leading-space flag is ignored (leading
+ * whitespace stripped). Inside string/char-literal spellings, '"' and '\'
+ * are escaped. */
+static Tok make_stringize(Pp* pp, const Tok* arg, u32 lo, u32 hi, SrcLoc loc) {
+ CharBuf b = {0};
+ u32 i;
+ Tok t;
+ Sym sp;
+
+ cb_putc(pp, &b, '"');
+ for (i = lo; i < hi; ++i) {
+ const Tok* at = &arg[i];
+ size_t slen = 0;
+ const char* s =
+ at->spelling ? pool_str(pp->c->global, at->spelling, &slen) : NULL;
+ if (i > lo && (at->flags & TF_HAS_SPACE)) cb_putc(pp, &b, ' ');
+ if (s && slen) {
+ int esc = (at->kind == TOK_STR || at->kind == TOK_CHR);
+ size_t k;
+ for (k = 0; k < slen; ++k) {
+ char c = s[k];
+ if (esc && (c == '\\' || c == '"')) cb_putc(pp, &b, '\\');
+ cb_putc(pp, &b, c);
+ }
+ }
+ }
+ cb_putc(pp, &b, '"');
+
+ sp = pool_intern(pp->c->global, b.data, b.len);
+ memset(&t, 0, sizeof(t));
+ t.kind = TOK_STR;
+ t.loc = loc;
+ t.spelling = sp;
+ t.v.str = sp;
+ return t;
+}
+
+/* Concatenate two token spellings and re-lex into a single token. Empty
+ * (placemarker) sides collapse to the other side per §6.10.3.3 ¶2. */
+static Tok paste_tokens(Pp* pp, Tok lhs, Tok rhs, SrcLoc loc) {
+ char buf[1024];
+ size_t alen = 0, blen = 0;
+ const char* a;
+ const char* b;
+ Lexer* lex;
+ Tok t1, t2;
+
+ if (lhs.kind == TOK_PP_PLACEMARKER) return rhs;
+ if (rhs.kind == TOK_PP_PLACEMARKER) return lhs;
+
+ a = lhs.spelling ? pool_str(pp->c->global, lhs.spelling, &alen) : "";
+ b = rhs.spelling ? pool_str(pp->c->global, rhs.spelling, &blen) : "";
+ if (alen + blen + 2 > sizeof(buf)) {
+ compiler_panic(pp->c, loc, "token paste: spelling too long");
+ }
+ if (alen) memcpy(buf, a, alen);
+ if (blen) memcpy(buf + alen, b, blen);
+ buf[alen + blen] = '\n';
+ buf[alen + blen + 1] = 0;
+
+ lex = lex_open_mem(pp->c, "<paste>", buf, alen + blen + 1);
+ t1 = lex_next(lex);
+ t2 = lex_next(lex);
+ if (t1.kind == TOK_EOF) {
+ /* Both empty (shouldn't reach here since we handled placemarkers). */
+ lex_close(lex);
+ return lhs;
+ }
+ if (t2.kind != TOK_NEWLINE && t2.kind != TOK_EOF) {
+ lex_close(lex);
+ compiler_panic(pp->c, loc, "token pasting yields multiple tokens, invalid");
+ }
+ lex_close(lex);
+
+ /* Inherit positional flags from LHS (it sat in the same slot). */
+ t1.flags = (u16)((t1.flags & ~(TF_AT_BOL | TF_HAS_SPACE)) |
+ (lhs.flags & (TF_AT_BOL | TF_HAS_SPACE)));
+ t1.loc = loc;
+ return t1;
+}
+
+/* Phase 1 (param substitution). For each parameter occurrence in the
+ * body: if adjacent to ## or # (handled separately), substitute the raw
+ * argument tokens; otherwise substitute the pre-expanded form. Empty raw
+ * args become a TOK_PP_PLACEMARKER which phase 2 collapses. */
+static void subst_phase1(Pp* pp, const Macro* m, ArgList* a, const Tok* invoke,
+ TokVec* out) {
+ u32 j;
+ for (j = 0; j < m->body_len; ++j) {
+ const Tok* bt = &m->body[j];
+ if (bt->kind == TOK_PP_HASH) {
+ /* §6.10.3.2: # must be followed by a parameter. */
+ if (j + 1 >= m->body_len || m->body[j + 1].kind != TOK_PP_PARAM) {
+ compiler_panic(pp->c, bt->loc,
+ "'#' is not followed by a macro parameter");
+ }
+ {
+ u32 p = m->body[j + 1].v.punct;
+ u32 lo = a->raw_start[p];
+ u32 hi = a->raw_start[p + 1];
+ Tok s = make_stringize(pp, a->raw, lo, hi, invoke->loc);
+ s.flags = (u16)((s.flags & ~(TF_AT_BOL | TF_HAS_SPACE)) |
+ (bt->flags & (TF_AT_BOL | TF_HAS_SPACE)));
+ tv_push(pp, out, s);
+ ++j;
+ continue;
+ }
+ }
+ if (bt->kind == TOK_PP_PARAM) {
+ u32 p = bt->v.punct;
+ int adj_paste =
+ (j > 0 && m->body[j - 1].kind == TOK_PP_PASTE) ||
+ (j + 1 < m->body_len && m->body[j + 1].kind == TOK_PP_PASTE);
+
+ u32 lo, hi;
+ if (adj_paste) {
+ lo = a->raw_start[p];
+ hi = a->raw_start[p + 1];
+ } else {
+ lo = a->exp_start[p];
+ hi = a->exp_start[p + 1];
+ }
+
+ if (lo == hi) {
+ /* Empty argument → placemarker. */
+ Tok pm;
+ memset(&pm, 0, sizeof(pm));
+ pm.kind = TOK_PP_PLACEMARKER;
+ pm.flags = bt->flags & (TF_AT_BOL | TF_HAS_SPACE);
+ pm.loc = invoke->loc;
+ tv_push(pp, out, pm);
+ } else {
+ u32 k;
+ int first = 1;
+ Tok* src = adj_paste ? a->raw : a->exp;
+ for (k = lo; k < hi; ++k) {
+ Tok t = src[k];
+ if (first) {
+ t.flags = (u16)((t.flags & ~(TF_AT_BOL | TF_HAS_SPACE)) |
+ (bt->flags & (TF_AT_BOL | TF_HAS_SPACE)));
+ first = 0;
+ }
+ tv_push(pp, out, t);
+ }
+ }
+ continue;
+ }
+ tv_push(pp, out, *bt);
+ }
+}
+
+/* Phase 2 (paste). Walk the post-substitute buffer; for each TOK_PP_PASTE,
+ * splice the previous output token with the next input token. Then strip
+ * remaining placemarkers. */
+static void subst_phase2(Pp* pp, const Tok* in, u32 nin, const Tok* invoke,
+ TokVec* out) {
+ u32 i;
+ for (i = 0; i < nin; ++i) {
+ Tok t = in[i];
+ if (t.kind == TOK_PP_PASTE) {
+ Tok lhs, rhs;
+ if (out->n == 0 || i + 1 >= nin) {
+ compiler_panic(pp->c, invoke->loc,
+ "'##' at start or end of replacement list");
+ }
+ lhs = out->data[--out->n];
+ rhs = in[++i];
+ tv_push(pp, out, paste_tokens(pp, lhs, rhs, invoke->loc));
+ continue;
+ }
+ tv_push(pp, out, t);
+ }
+ /* Strip placemarkers, preserving leading-space flag on the next token. */
+ {
+ u32 r = 0, w = 0;
+ u16 carry = 0;
+ for (r = 0; r < out->n; ++r) {
+ if (out->data[r].kind == TOK_PP_PLACEMARKER) {
+ carry |= out->data[r].flags & (TF_AT_BOL | TF_HAS_SPACE);
+ continue;
+ }
+ if (carry) {
+ out->data[r].flags |= carry;
+ carry = 0;
+ }
+ if (w != r) out->data[w] = out->data[r];
+ ++w;
+ }
+ out->n = w;
+ }
+}
+
+/* Wrapper: phases 1 and 2 in sequence, plus invocation-loc / flag transfer. */
+static void substitute_body(Pp* pp, const Macro* m, ArgList* a,
+ const Tok* invoke, HidesetId result_hs, TokVec* out,
+ TokVec* hs_out) {
+ TokVec phase1 = {0};
+ u32 i;
+ subst_phase1(pp, m, a, invoke, &phase1);
+ subst_phase2(pp, phase1.data, phase1.n, invoke, out);
+ /* Invocation flags onto first emitted token. */
+ if (out->n) {
+ out->data[0].flags =
+ (u16)((out->data[0].flags & ~(TF_AT_BOL | TF_HAS_SPACE)) |
+ (invoke->flags & (TF_AT_BOL | TF_HAS_SPACE)));
+ }
+ /* Locations to invocation site. */
+ for (i = 0; i < out->n; ++i) out->data[i].loc = invoke->loc;
+ /* Build parallel hideset vector. */
+ for (i = 0; i < out->n; ++i) {
+ Tok hsmark;
+ memset(&hsmark, 0, sizeof(hsmark));
+ hsmark.spelling = (Sym)result_hs;
+ tv_push(pp, hs_out, hsmark);
+ }
+}
+
+/* Expand a function-like macro invocation: peek for `(`, collect args,
+ * pre-expand them, substitute the body, push the result. Returns 1 if
+ * the invocation was performed, 0 if there was no `(` (the caller should
+ * emit the identifier as-is). */
+static int try_expand_func_macro(Pp* pp, const Macro* m, const Tok* invoke,
+ HidesetId invoke_hs) {
+ int saw_ws;
+ ArgList args;
+ TokVec body = {0};
+ TokVec hsvec = {0}; /* parallel to body, holds HidesetId per slot */
+ HidesetId result_hs;
+ Tok close_tok;
+
+ if (!peek_for_invoke_paren(pp, &saw_ws)) {
+ return 0;
+ }
+ (void)saw_ws;
+ read_invocation_args(pp, m, invoke->loc, &args);
+ /* Note: assigned to silence unused-result; we don't use the close tok yet. */
+ close_tok.kind = 0;
+ (void)close_tok;
+ preexpand_args(pp, &args);
+
+ /* Hideset of result = invocation hideset ∪ {macro_name}. The standard
+ * intersects with the closing `)`'s hideset for blue-paint purity, but
+ * for the freshly-collected `)` from the lex source that's the empty
+ * set, so the union form suffices here. */
+ result_hs = hs_add(pp, invoke_hs, m->name);
+ substitute_body(pp, m, &args, invoke, result_hs, &body, &hsvec);
+
+ {
+ u32 i;
+ HidesetId* hids = arena_array(&pp->arena, HidesetId, body.n ? body.n : 1);
+ for (i = 0; i < body.n; ++i) {
+ hids[i] = (HidesetId)hsvec.data[i].spelling;
+ }
+ push_buf(pp, body.data, hids, body.n);
+ }
+ return 1;
+}
+
+/* ============================================================
+ * pp_next_raw — mutual recursion entry (called from expand_arg_to_eof)
+ * Defined here; also declared in pp_priv.h so pp.c can call it.
+ * ============================================================ */
+
+/* pp_next_raw: reads from the top source, applies macro expansion when an
+ * identifier names a macro that isn't blue-painted, and consumes
+ * directives in-place. TOK_NEWLINE is preserved for pp_emit_text. */
+Tok pp_next_raw(Pp* pp) {
+ Tok t;
+ HidesetId hs;
+ u8 src_kind;
+ for (;;) {
+ t = src_next_raw(pp, &hs, &src_kind);
+ if (t.kind == TOK_EOF) return t;
+ if (t.kind == TOK_PP_HASH && (t.flags & TF_AT_BOL) && src_kind == SRC_LEX) {
+ process_directive(pp, t.loc);
+ /* No synthesized newline: the comparator collapses
+ * whitespace, so blank-line replacement of consumed
+ * directives isn't observable here. Directives that produce
+ * content (e.g. #include, #embed, #pragma) push their own
+ * tokens onto the source stack, which the next loop
+ * iteration picks up. */
+ continue;
+ }
+ if (t.kind == TOK_IDENT && (t.flags & TF_NO_EXPAND) == 0) {
+ Sym id = t.v.ident;
+
+ /* Dynamic predefined macros: __LINE__ / __FILE__ /
+ * __DATE__ / __TIME__. Always expand, ignoring the macro
+ * table. */
+ if (id == pp->sym_line__) {
+ char tmp[16], buf[16];
+ int k = 0, j = 0;
+ u32 ln = t.loc.line;
+ if (ln == 0)
+ buf[k++] = '0';
+ else {
+ while (ln) {
+ tmp[j++] = (char)('0' + ln % 10);
+ ln /= 10;
+ }
+ while (j > 0) buf[k++] = tmp[--j];
+ }
+ t.kind = TOK_NUM;
+ t.spelling = pool_intern(pp->c->global, buf, (size_t)k);
+ return t;
+ }
+ if (id == pp->sym_file__) {
+ TokSrc* ls = current_lex_src(pp);
+ Sym name = 0;
+ size_t nlen = 0;
+ const char* nstr = NULL;
+ char* buf;
+ if (ls && ls->file_override) {
+ name = ls->file_override;
+ } else if (ls) {
+ const SourceFile* sf =
+ source_file(pp->c->sources, lex_file_id(ls->lex));
+ if (sf) name = sf->name;
+ }
+ if (name) nstr = pool_str(pp->c->global, name, &nlen);
+ buf = (char*)arena_alloc(&pp->arena, nlen + 2, 1);
+ buf[0] = '"';
+ if (nlen) memcpy(buf + 1, nstr, nlen);
+ buf[nlen + 1] = '"';
+ t.kind = TOK_STR;
+ t.spelling = pool_intern(pp->c->global, buf, nlen + 2);
+ t.v.str = t.spelling;
+ return t;
+ }
+ if (id == pp->sym_date__) {
+ t.kind = TOK_STR;
+ t.spelling = pp->val_date_str;
+ t.v.str = t.spelling;
+ return t;
+ }
+ if (id == pp->sym_time__) {
+ t.kind = TOK_STR;
+ t.spelling = pp->val_time_str;
+ t.v.str = t.spelling;
+ return t;
+ }
+ if (id == pp->sym__pragma) {
+ if (try_expand_pragma_op(pp, &t)) continue;
+ /* No '(' — fall through and emit as plain ident. */
+ }
+
+ {
+ Macro* m = mt_get(pp, id);
+ if (m && !hs_contains(pp, hs, m->name)) {
+ if (!m->is_func) {
+ expand_object_macro(pp, m, &t, hs);
+ continue;
+ }
+ if (try_expand_func_macro(pp, m, &t, hs)) {
+ continue;
+ }
+ /* No '(' followed; emit as plain identifier. */
+ }
+ }
+ }
+ return t;
+ }
+}
diff --git a/src/pp/pp_priv.h b/src/pp/pp_priv.h
@@ -0,0 +1,278 @@
+/* pp_priv.h — shared types, helpers, and cross-module forward declarations
+ * for the preprocessor split (pp.c / pp_expand.c / pp_directive.c).
+ * NOT part of the public API; included only within src/pp/. */
+
+#ifndef CFREE_PP_PRIV_H
+#define CFREE_PP_PRIV_H
+
+#include "pp/pp.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "core/arena.h"
+#include "core/diag.h"
+#include "core/heap.h"
+#include "core/pool.h"
+
+/* ============================================================
+ * Internal token kinds
+ * ============================================================ */
+
+/* Outside the range used by the lexer (TOK_KW_LAST = 0x1000). */
+#define TOK_PP_PARAM ((u16)0x1100)
+#define TOK_PP_PLACEMARKER ((u16)0x1101) /* empty-arg substitution marker */
+
+/* ============================================================
+ * Types
+ * ============================================================ */
+
+typedef struct Macro {
+ Sym name;
+ SrcLoc def_loc;
+ u8 is_func;
+ u8 is_variadic;
+ u8 pad[2];
+ u32 n_params;
+ Sym* params; /* parameter names */
+ Tok* body; /* body tokens; TOK_PP_PARAM kind + v.punct=idx */
+ u32 body_len;
+} Macro;
+
+typedef u32 HidesetId;
+#define HS_EMPTY 0u
+
+typedef struct Hideset {
+ u32 n;
+ Sym names[1]; /* flexible; allocated with extra trailing slots */
+} Hideset;
+
+typedef enum { SRC_LEX = 1, SRC_BUF = 2 } SrcKind;
+
+typedef struct TokSrc {
+ u8 kind;
+ /* When set on a SRC_BUF: src_next_raw returns TOK_EOF when this is
+ * the top source and it's exhausted, instead of popping. The caller
+ * (e.g. argument pre-expansion) explicitly pops the scope when done.
+ * This bounds expansion to a single argument's token stream. */
+ u8 scope_top;
+ u8 pad[2];
+ /* SRC_LEX */
+ Lexer* lex;
+ /* SRC_BUF */
+ Tok* toks;
+ HidesetId* hs;
+ u32 i;
+ u32 n;
+ /* #line state (SRC_LEX only). line_delta is added to every emitted
+ * token's loc.line on its way out so __LINE__ and the output cursor
+ * see user-visible numbering. file_override is the Sym (without
+ * surrounding quotes) used by __FILE__ when set. */
+ i32 line_delta;
+ Sym file_override;
+} TokSrc;
+
+typedef enum IfState {
+ IF_INCLUDE = 1, /* group active, emit code */
+ IF_SEEK_TRUE = 2, /* skip, looking for the first true elif/else */
+ IF_DONE = 3, /* skip, already had a true branch */
+} IfState;
+
+typedef struct IfFrame {
+ u8 state;
+ u8 has_else;
+ u8 pad[2];
+ SrcLoc loc;
+} IfFrame;
+
+/* MacroMap = Sym -> Macro*. Generated open-addressed hashmap with
+ * deletion (#undef). See core/hashmap.h. */
+#include "core/hashmap.h"
+static inline u32 macro_hash_(Sym s) { return hash_u32((u32)s); }
+HASHMAP_DEFINE(MacroMap, Sym, Macro*, macro_hash_);
+
+/* ============================================================
+ * Pp struct (definition shared across all three TUs)
+ * ============================================================ */
+
+struct Pp {
+ Compiler* c;
+
+ /* Source stack — top of stack is sources[nsources-1]. */
+ TokSrc* sources;
+ u32 nsources;
+ u32 sources_cap;
+
+ /* Macro table (open-addressed; key = Sym, value = Macro*). */
+ MacroMap mtab;
+
+ /* Conditional inclusion stack (#if / #ifdef / #ifndef → #endif). */
+ IfFrame* ifstk;
+ u32 ifstk_n;
+ u32 ifstk_cap;
+
+ /* Hideset table. Element 0 reserved as HS_EMPTY. */
+ Hideset** hsets;
+ u32 hsets_n;
+ u32 hsets_cap;
+
+ /* Include directories (stage 9). */
+ struct {
+ const char* path;
+ u8 system;
+ }* inc_dirs;
+ u32 ninc_dirs;
+ u32 inc_dirs_cap;
+
+ /* Internal arena: macro bodies, hidesets, expansion buffers, file
+ * data for #include. Lives until pp_free. */
+ Arena arena;
+
+ /* Cached interned identifiers used for directive recognition. */
+ Sym sym_define;
+ Sym sym_undef;
+ Sym sym_include;
+ Sym sym_if;
+ Sym sym_ifdef;
+ Sym sym_ifndef;
+ Sym sym_elif;
+ Sym sym_else;
+ Sym sym_endif;
+ Sym sym_line;
+ Sym sym_pragma;
+ Sym sym_error;
+ Sym sym_embed;
+ Sym sym_defined;
+ Sym sym_va_args;
+ Sym sym_line__; /* __LINE__ */
+ Sym sym_file__; /* __FILE__ */
+ Sym sym_date__; /* __DATE__ */
+ Sym sym_time__; /* __TIME__ */
+ Sym sym_stdc__; /* __STDC__ */
+ Sym sym_stdc_hosted__;
+ Sym sym_stdc_version__;
+ Sym sym__pragma; /* _Pragma operator */
+ Sym sym_pragma_kw; /* "pragma" — for synthesized #pragma */
+
+ /* Pre-formatted "Mmm dd yyyy" / "hh:mm:ss" string spellings for
+ * __DATE__ and __TIME__, derived from SOURCE_DATE_EPOCH (or
+ * time(NULL) if unset). */
+ Sym val_date_str;
+ Sym val_time_str;
+};
+
+/* ============================================================
+ * Allocation helpers (defined in pp.c, used everywhere)
+ * ============================================================ */
+
+static inline Heap* pp_heap(Pp* pp) { return (Heap*)pp->c->env->heap; }
+
+static inline void* pp_xrealloc(Pp* pp, void* p, size_t old_n, size_t new_n,
+ size_t align) {
+ Heap* h = pp_heap(pp);
+ void* q = h->realloc(h, p, old_n, new_n, align);
+ if (!q) compiler_panic(pp->c, (SrcLoc){0, 0, 0}, "pp: out of memory");
+ return q;
+}
+
+static inline void pp_xfree(Pp* pp, void* p, size_t n) {
+ if (p) pp_heap(pp)->free(pp_heap(pp), p, n);
+}
+
+/* ============================================================
+ * Token-vector helpers
+ * ============================================================ */
+
+typedef struct TokVec {
+ Tok* data;
+ u32 n;
+ u32 cap;
+} TokVec;
+
+static inline void tv_grow(Pp* pp, TokVec* v, u32 want) {
+ u32 nc;
+ if (v->cap >= want) return;
+ nc = v->cap ? v->cap * 2 : 8;
+ while (nc < want) nc *= 2;
+ {
+ Tok* nb = arena_array(&pp->arena, Tok, nc);
+ if (v->n) memcpy(nb, v->data, sizeof(Tok) * v->n);
+ v->data = nb;
+ v->cap = nc;
+ }
+}
+
+static inline void tv_push(Pp* pp, TokVec* v, Tok t) {
+ tv_grow(pp, v, v->n + 1);
+ v->data[v->n++] = t;
+}
+
+/* Growable char buffer (arena-backed). */
+typedef struct CharBuf {
+ char* data;
+ u32 len;
+ u32 cap;
+} CharBuf;
+
+static inline void cb_append(Pp* pp, CharBuf* b, const char* s, u32 n) {
+ if (b->len + n > b->cap) {
+ u32 nc = b->cap ? b->cap * 2 : 64;
+ while (nc < b->len + n) nc *= 2;
+ {
+ char* nb = (char*)arena_alloc(&pp->arena, nc, 1);
+ if (b->len) memcpy(nb, b->data, b->len);
+ b->data = nb;
+ b->cap = nc;
+ }
+ }
+ if (n) memcpy(b->data + b->len, s, n);
+ b->len += n;
+}
+
+static inline void cb_putc(Pp* pp, CharBuf* b, char c) {
+ cb_append(pp, b, &c, 1);
+}
+
+/* ============================================================
+ * Cross-module forward declarations
+ * ============================================================ */
+
+/* --- pp.c (source stack) → pp_expand.c, pp_directive.c --- */
+Tok src_next_raw(Pp* pp, HidesetId* hs_out, u8* src_kind_out);
+void src_push(Pp* pp, TokSrc s);
+void src_pop(Pp* pp);
+void push_buf(Pp* pp, Tok* toks, HidesetId* hs, u32 n);
+
+/* pp_next_raw is the mutual-recursion entry: expand_arg_to_eof calls it,
+ * and pp_next_raw drives directives and expansion. Declared non-static so
+ * pp_expand.c can call it without a forward decl each time. */
+Tok pp_next_raw(Pp* pp);
+
+/* --- pp_expand.c → pp.c, pp_directive.c --- */
+HidesetId hs_add(Pp* pp, HidesetId id, Sym s);
+int hs_contains(Pp* pp, HidesetId id, Sym s);
+Macro* mt_get(Pp* pp, Sym name);
+void mt_put(Pp* pp, Sym name, Macro* m);
+void mt_del(Pp* pp, Sym name);
+void expand_arg_to_eof(Pp* pp, Tok* in, u32 nin, TokVec* out);
+
+/* --- pp_directive.c → pp_expand.c --- */
+i64 eval_if_expr(Pp* pp, const Tok* line, u32 n, SrcLoc loc);
+void process_directive(Pp* pp, SrcLoc hash_loc);
+
+/* --- pp_directive.c internal helpers called from pp_expand.c --- */
+void emit_pragma_line(Pp* pp, const Tok* line, u32 n, SrcLoc loc);
+int peek_for_invoke_paren(Pp* pp, int* ws_has_space_out);
+int try_expand_pragma_op(Pp* pp, const Tok* invoke);
+
+/* --- pp_directive.c: read_directive_line (used by pp.c/pp_define) --- */
+void read_directive_line(Pp* pp, Tok** out_toks, u32* out_n);
+
+/* --- pp_expand.c: do_define / do_undef (used by pp.c/pp_define) --- */
+void do_define(Pp* pp, const Tok* line, u32 n);
+void do_undef(Pp* pp, const Tok* line, u32 n);
+
+/* --- pp_directive.c helpers needed by pp_expand.c (_Pragma) --- */
+TokSrc* current_lex_src(Pp* pp);
+
+#endif /* CFREE_PP_PRIV_H */